Merge branch 'nd/worktree-name-sanitization'
authorJunio C Hamano <gitster@pobox.com>
Thu, 13 Jun 2019 20:19:40 +0000 (13:19 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 13 Jun 2019 20:19:40 +0000 (13:19 -0700)
In recent versions of Git, per-worktree refs are exposed in
refs/worktrees/<wtname>/ hierarchy, which means that worktree names
must be a valid refname component. The code now sanitizes the names
given to worktrees, to make sure these refs are well-formed.

* nd/worktree-name-sanitization:
worktree add: sanitize worktree names

builtin/worktree.c
refs.c
refs.h
t/t2400-worktree-add.sh
index d2a7e2f3f18ba411d065a52ac868db612de5beaa..a5bb02b2076a27a78947fda25c1e16dafd637622 100644 (file)
@@ -275,6 +275,7 @@ static int add_worktree(const char *path, const char *refname,
        struct strbuf symref = STRBUF_INIT;
        struct commit *commit = NULL;
        int is_branch = 0;
+       struct strbuf sb_name = STRBUF_INIT;
 
        validate_worktree_add(path, opts);
 
@@ -290,7 +291,13 @@ static int add_worktree(const char *path, const char *refname,
                die(_("invalid reference: %s"), refname);
 
        name = worktree_basename(path, &len);
-       git_path_buf(&sb_repo, "worktrees/%.*s", (int)(path + len - name), name);
+       strbuf_add(&sb, name, path + len - name);
+       sanitize_refname_component(sb.buf, &sb_name);
+       if (!sb_name.len)
+               BUG("How come '%s' becomes empty after sanitization?", sb.buf);
+       strbuf_reset(&sb);
+       name = sb_name.buf;
+       git_path_buf(&sb_repo, "worktrees/%s", name);
        len = sb_repo.len;
        if (safe_create_leading_directories_const(sb_repo.buf))
                die_errno(_("could not create leading directories of '%s'"),
@@ -418,6 +425,7 @@ static int add_worktree(const char *path, const char *refname,
        strbuf_release(&symref);
        strbuf_release(&sb_repo);
        strbuf_release(&sb_git);
+       strbuf_release(&sb_name);
        return ret;
 }
 
diff --git a/refs.c b/refs.c
index 92d1f6dbdd0d313cdf109e02f31a51bb55cfc601..b8a8430c963831d9f761fbd8dad099370286987f 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -63,7 +63,7 @@ static unsigned char refname_disposition[256] = {
  * not legal.  It is legal if it is something reasonable to have under
  * ".git/refs/"; We do not like it if:
  *
- * - any path component of it begins with ".", or
+ * - it begins with ".", or
  * - it has double dots "..", or
  * - it has ASCII control characters, or
  * - it has ":", "?", "[", "\", "^", "~", SP, or TAB anywhere, or
@@ -71,31 +71,63 @@ static unsigned char refname_disposition[256] = {
  * - it ends with a "/", or
  * - it ends with ".lock", or
  * - it contains a "@{" portion
+ *
+ * When sanitized is not NULL, instead of rejecting the input refname
+ * as an error, try to come up with a usable replacement for the input
+ * refname in it.
  */
-static int check_refname_component(const char *refname, int *flags)
+static int check_refname_component(const char *refname, int *flags,
+                                  struct strbuf *sanitized)
 {
        const char *cp;
        char last = '\0';
+       size_t component_start = 0; /* garbage - not a reasonable initial value */
+
+       if (sanitized)
+               component_start = sanitized->len;
 
        for (cp = refname; ; cp++) {
                int ch = *cp & 255;
                unsigned char disp = refname_disposition[ch];
+
+               if (sanitized && disp != 1)
+                       strbuf_addch(sanitized, ch);
+
                switch (disp) {
                case 1:
                        goto out;
                case 2:
-                       if (last == '.')
-                               return -1; /* Refname contains "..". */
+                       if (last == '.') { /* Refname contains "..". */
+                               if (sanitized)
+                                       /* collapse ".." to single "." */
+                                       strbuf_setlen(sanitized, sanitized->len - 1);
+                               else
+                                       return -1;
+                       }
                        break;
                case 3:
-                       if (last == '@')
-                               return -1; /* Refname contains "@{". */
+                       if (last == '@') { /* Refname contains "@{". */
+                               if (sanitized)
+                                       sanitized->buf[sanitized->len-1] = '-';
+                               else
+                                       return -1;
+                       }
                        break;
                case 4:
-                       return -1;
+                       /* forbidden char */
+                       if (sanitized)
+                               sanitized->buf[sanitized->len-1] = '-';
+                       else
+                               return -1;
+                       break;
                case 5:
-                       if (!(*flags & REFNAME_REFSPEC_PATTERN))
-                               return -1; /* refspec can't be a pattern */
+                       if (!(*flags & REFNAME_REFSPEC_PATTERN)) {
+                               /* refspec can't be a pattern */
+                               if (sanitized)
+                                       sanitized->buf[sanitized->len-1] = '-';
+                               else
+                                       return -1;
+                       }
 
                        /*
                         * Unset the pattern flag so that we only accept
@@ -109,26 +141,48 @@ static int check_refname_component(const char *refname, int *flags)
 out:
        if (cp == refname)
                return 0; /* Component has zero length. */
-       if (refname[0] == '.')
-               return -1; /* Component starts with '.'. */
+
+       if (refname[0] == '.') { /* Component starts with '.'. */
+               if (sanitized)
+                       sanitized->buf[component_start] = '-';
+               else
+                       return -1;
+       }
        if (cp - refname >= LOCK_SUFFIX_LEN &&
-           !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN))
-               return -1; /* Refname ends with ".lock". */
+           !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN)) {
+               if (!sanitized)
+                       return -1;
+               /* Refname ends with ".lock". */
+               while (strbuf_strip_suffix(sanitized, LOCK_SUFFIX)) {
+                       /* try again in case we have .lock.lock */
+               }
+       }
        return cp - refname;
 }
 
-int check_refname_format(const char *refname, int flags)
+static int check_or_sanitize_refname(const char *refname, int flags,
+                                    struct strbuf *sanitized)
 {
        int component_len, component_count = 0;
 
-       if (!strcmp(refname, "@"))
+       if (!strcmp(refname, "@")) {
                /* Refname is a single character '@'. */
-               return -1;
+               if (sanitized)
+                       strbuf_addch(sanitized, '-');
+               else
+                       return -1;
+       }
 
        while (1) {
+               if (sanitized && sanitized->len)
+                       strbuf_complete(sanitized, '/');
+
                /* We are at the start of a path component. */
-               component_len = check_refname_component(refname, &flags);
-               if (component_len <= 0)
+               component_len = check_refname_component(refname, &flags,
+                                                       sanitized);
+               if (sanitized && component_len == 0)
+                       ; /* OK, omit empty component */
+               else if (component_len <= 0)
                        return -1;
 
                component_count++;
@@ -138,13 +192,29 @@ int check_refname_format(const char *refname, int flags)
                refname += component_len + 1;
        }
 
-       if (refname[component_len - 1] == '.')
-               return -1; /* Refname ends with '.'. */
+       if (refname[component_len - 1] == '.') {
+               /* Refname ends with '.'. */
+               if (sanitized)
+                       ; /* omit ending dot */
+               else
+                       return -1;
+       }
        if (!(flags & REFNAME_ALLOW_ONELEVEL) && component_count < 2)
                return -1; /* Refname has only one component. */
        return 0;
 }
 
+int check_refname_format(const char *refname, int flags)
+{
+       return check_or_sanitize_refname(refname, flags, NULL);
+}
+
+void sanitize_refname_component(const char *refname, struct strbuf *out)
+{
+       if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
+               BUG("sanitizing refname '%s' check returned error", refname);
+}
+
 int refname_is_safe(const char *refname)
 {
        const char *rest;
diff --git a/refs.h b/refs.h
index 2727405b61c4e26538c3e76b528df37078337376..730d05ad91a6ac8961c58d599942a5721960d7d7 100644 (file)
--- a/refs.h
+++ b/refs.h
@@ -463,6 +463,12 @@ int for_each_reflog(each_ref_fn fn, void *cb_data);
  */
 int check_refname_format(const char *refname, int flags);
 
+/*
+ * Apply the rules from check_refname_format, but mutate the result until it
+ * is acceptable, and place the result in "out".
+ */
+void sanitize_refname_component(const char *refname, struct strbuf *out);
+
 const char *prettify_refname(const char *refname);
 
 char *refs_shorten_unambiguous_ref(struct ref_store *refs,
index 286bba35d8ae06f97cbf59263cd472975036deaa..c989dbe321deb333517c59d95d6692032aaea94b 100755 (executable)
@@ -570,4 +570,9 @@ test_expect_success '"add" an existing locked but missing worktree' '
        git worktree add --force --force --detach gnoo
 '
 
+test_expect_success FUNNYNAMES 'sanitize generated worktree name' '
+       git worktree add --detach ".  weird*..?.lock.lock" &&
+       test -d .git/worktrees/---weird-.-
+'
+
 test_done