Merge branch 'md/list-objects-filter-combo'
authorJunio C Hamano <gitster@pobox.com>
Wed, 18 Sep 2019 18:50:09 +0000 (11:50 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 18 Sep 2019 18:50:09 +0000 (11:50 -0700)
The list-objects-filter API (used to create a sparse/lazy clone)
learned to take a combined filter specification.

* md/list-objects-filter-combo:
list-objects-filter-options: make parser void
list-objects-filter-options: clean up use of ALLOC_GROW
list-objects-filter-options: allow mult. --filter
strbuf: give URL-encoding API a char predicate fn
list-objects-filter-options: make filter_spec a string_list
list-objects-filter-options: move error check up
list-objects-filter: implement composite filters
list-objects-filter-options: always supply *errbuf
list-objects-filter: put omits set in filter struct
list-objects-filter: encapsulate filter components

22 files changed:
Documentation/rev-list-options.txt
builtin/clone.c
builtin/fetch.c
builtin/rev-list.c
cache.h
credential-store.c
fetch-pack.c
http.c
list-objects-filter-options.c
list-objects-filter-options.h
list-objects-filter.c
list-objects-filter.h
list-objects.c
strbuf.c
strbuf.h
t/t5616-partial-clone.sh
t/t6112-rev-list-filters-objects.sh
transport-helper.c
transport.c
upload-pack.c
url.c
url.h
index bb1251c0364dc71880f6e63db7c6116ed859b90f..90ff9e2bea2e2fd0caf4744ed4e5c5e59f4c1243 100644 (file)
@@ -756,6 +756,22 @@ explicitly-given commit or tree.
 Note that the form '--filter=sparse:path=<path>' that wants to read
 from an arbitrary path on the filesystem has been dropped for security
 reasons.
++
+Multiple '--filter=' flags can be specified to combine filters. Only
+objects which are accepted by every filter are included.
++
+The form '--filter=combine:<filter1>+<filter2>+...<filterN>' can also be
+used to combined several filters, but this is harder than just repeating
+the '--filter' flag and is usually not necessary. Filters are joined by
+'{plus}' and individual filters are %-encoded (i.e. URL-encoded).
+Besides the '{plus}' and '%' characters, the following characters are
+reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+&#39;&#96;+
+as well as all characters with ASCII code &lt;= `0x20`, which includes
+space and newline.
++
+Other arbitrary characters can also be encoded. For instance,
+'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are
+equivalent.
 
 --no-filter::
        Turn off any previous `--filter=` argument.
index f665b28ccccfacaf5dfe84b7f94081e1afacdd49..2048b6760aa9f0a426524e5ea1bd7a3a4c4c5536 100644 (file)
@@ -1160,13 +1160,11 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
                transport->server_options = &server_options;
 
        if (filter_options.choice) {
-               struct strbuf expanded_filter_spec = STRBUF_INIT;
-               expand_list_objects_filter_spec(&filter_options,
-                                               &expanded_filter_spec);
+               const char *spec =
+                       expand_list_objects_filter_spec(&filter_options);
                transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-                                    expanded_filter_spec.buf);
+                                    spec);
                transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-               strbuf_release(&expanded_filter_spec);
        }
 
        if (transport->smart_options && !deepen && !filter_options.choice)
index 538f0e72073fdc92af1b0d79604856cf80a045f6..9b27ae968199a75e62a617b649b56788e501e076 100644 (file)
@@ -1243,13 +1243,10 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
        if (update_shallow)
                set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
        if (filter_options.choice) {
-               struct strbuf expanded_filter_spec = STRBUF_INIT;
-               expand_list_objects_filter_spec(&filter_options,
-                                               &expanded_filter_spec);
-               set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
-                          expanded_filter_spec.buf);
+               const char *spec =
+                       expand_list_objects_filter_spec(&filter_options);
+               set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec);
                set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
-               strbuf_release(&expanded_filter_spec);
        }
        if (negotiation_tip.nr) {
                if (transport->smart_options)
index 301ccb970bb36340bc6b4a136a3029811f252aa2..b8dc2e1fba6ce4c7763a55924f34d86796d982a2 100644 (file)
@@ -473,8 +473,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
                                die(_("object filtering requires --objects"));
                        if (filter_options.choice == LOFC_SPARSE_OID &&
                            !filter_options.sparse_oid_value)
-                               die(_("invalid sparse value '%s'"),
-                                   filter_options.filter_spec);
+                               die(
+                                       _("invalid sparse value '%s'"),
+                                       list_objects_filter_spec(
+                                               &filter_options));
                        continue;
                }
                if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
diff --git a/cache.h b/cache.h
index 3cbad5b603ec03ce9e4ec26be84de653e049c980..5624e6c02d5e72ab2975bb3219329e4580f96f55 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -636,6 +636,9 @@ int daemonize(void);
  * at least 'nr' entries; the number of entries currently allocated
  * is 'alloc', using the standard growing factor alloc_nr() macro.
  *
+ * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
+ * added niceties.
+ *
  * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
  */
 #define ALLOC_GROW(x, nr, alloc) \
@@ -649,6 +652,25 @@ int daemonize(void);
                } \
        } while (0)
 
+/*
+ * Similar to ALLOC_GROW but handles updating of the nr value and
+ * zeroing the bytes of the newly-grown array elements.
+ *
+ * DO NOT USE any expression with side-effect for any of the
+ * arguments.
+ */
+#define ALLOC_GROW_BY(x, nr, increase, alloc) \
+       do { \
+               if (increase) { \
+                       size_t new_nr = nr + (increase); \
+                       if (new_nr < nr) \
+                               BUG("negative growth in ALLOC_GROW_BY"); \
+                       ALLOC_GROW(x, new_nr, alloc); \
+                       memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
+                       nr = new_nr; \
+               } \
+       } while (0)
+
 /* Initialize and use the cache information */
 struct lock_file;
 void preload_index(struct index_state *index,
index ac295420dd0d03d1b31922f9b7c16a83e987e6a3..c010497cb21db3c2bc921caf1b34be3e60ff5570 100644 (file)
@@ -72,15 +72,16 @@ static void store_credential_file(const char *fn, struct credential *c)
        struct strbuf buf = STRBUF_INIT;
 
        strbuf_addf(&buf, "%s://", c->protocol);
-       strbuf_addstr_urlencode(&buf, c->username, 1);
+       strbuf_addstr_urlencode(&buf, c->username, is_rfc3986_unreserved);
        strbuf_addch(&buf, ':');
-       strbuf_addstr_urlencode(&buf, c->password, 1);
+       strbuf_addstr_urlencode(&buf, c->password, is_rfc3986_unreserved);
        strbuf_addch(&buf, '@');
        if (c->host)
-               strbuf_addstr_urlencode(&buf, c->host, 1);
+               strbuf_addstr_urlencode(&buf, c->host, is_rfc3986_unreserved);
        if (c->path) {
                strbuf_addch(&buf, '/');
-               strbuf_addstr_urlencode(&buf, c->path, 0);
+               strbuf_addstr_urlencode(&buf, c->path,
+                                       is_rfc3986_reserved_or_unreserved);
        }
 
        rewrite_credential_file(fn, c, &buf);
index d81f47c07b36de586fda0840093ff259eb29650d..6ccc6294ea7b7fe2da9d2306acdcf40031a4383f 100644 (file)
@@ -338,12 +338,9 @@ static int find_common(struct fetch_negotiator *negotiator,
                }
        }
        if (server_supports_filtering && args->filter_options.choice) {
-               struct strbuf expanded_filter_spec = STRBUF_INIT;
-               expand_list_objects_filter_spec(&args->filter_options,
-                                               &expanded_filter_spec);
-               packet_buf_write(&req_buf, "filter %s",
-                                expanded_filter_spec.buf);
-               strbuf_release(&expanded_filter_spec);
+               const char *spec =
+                       expand_list_objects_filter_spec(&args->filter_options);
+               packet_buf_write(&req_buf, "filter %s", spec);
        }
        packet_buf_flush(&req_buf);
        state_len = req_buf.len;
@@ -1112,7 +1109,7 @@ static int add_haves(struct fetch_negotiator *negotiator,
 }
 
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
-                             const struct fetch_pack_args *args,
+                             struct fetch_pack_args *args,
                              const struct ref *wants, struct oidset *common,
                              int *haves_to_send, int *in_vain,
                              int sideband_all)
@@ -1153,13 +1150,10 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
        /* Add filter */
        if (server_supports_feature("fetch", "filter", 0) &&
            args->filter_options.choice) {
-               struct strbuf expanded_filter_spec = STRBUF_INIT;
+               const char *spec =
+                       expand_list_objects_filter_spec(&args->filter_options);
                print_verbose(args, _("Server supports filter"));
-               expand_list_objects_filter_spec(&args->filter_options,
-                                               &expanded_filter_spec);
-               packet_buf_write(&req_buf, "filter %s",
-                                expanded_filter_spec.buf);
-               strbuf_release(&expanded_filter_spec);
+               packet_buf_write(&req_buf, "filter %s", spec);
        } else if (args->filter_options.choice) {
                warning("filtering not recognized by server, ignoring");
        }
diff --git a/http.c b/http.c
index 27aa0a3192988cd0c272dab0e9f6cf52d538b6fa..938b9e55af435c46484a4fd54747a9c0bc2db0fe 100644 (file)
--- a/http.c
+++ b/http.c
@@ -513,9 +513,11 @@ static void set_proxyauth_name_password(CURL *result)
 #else
                struct strbuf s = STRBUF_INIT;
 
-               strbuf_addstr_urlencode(&s, proxy_auth.username, 1);
+               strbuf_addstr_urlencode(&s, proxy_auth.username,
+                                       is_rfc3986_unreserved);
                strbuf_addch(&s, ':');
-               strbuf_addstr_urlencode(&s, proxy_auth.password, 1);
+               strbuf_addstr_urlencode(&s, proxy_auth.password,
+                                       is_rfc3986_unreserved);
                curl_proxyuserpwd = strbuf_detach(&s, NULL);
                curl_easy_setopt(result, CURLOPT_PROXYUSERPWD, curl_proxyuserpwd);
 #endif
index 28c571f922e46cd6ff7aff659677d9003cf3c948..4d88bfe64ad230b4055a77b79432ae2db8af87f6 100644 (file)
@@ -7,6 +7,13 @@
 #include "list-objects-filter.h"
 #include "list-objects-filter-options.h"
 #include "promisor-remote.h"
+#include "trace.h"
+#include "url.h"
+
+static int parse_combine_filter(
+       struct list_objects_filter_options *filter_options,
+       const char *arg,
+       struct strbuf *errbuf);
 
 /*
  * Parse value of the argument to the "filter" keyword.
@@ -33,16 +40,8 @@ static int gently_parse_list_objects_filter(
        if (!arg)
                return 0;
 
-       if (filter_options->choice) {
-               if (errbuf) {
-                       strbuf_addstr(
-                               errbuf,
-                               _("multiple filter-specs cannot be combined"));
-               }
-               return 1;
-       }
-
-       filter_options->filter_spec = strdup(arg);
+       if (filter_options->choice)
+               BUG("filter_options already populated");
 
        if (!strcmp(arg, "blob:none")) {
                filter_options->choice = LOFC_BLOB_NONE;
@@ -56,11 +55,7 @@ static int gently_parse_list_objects_filter(
 
        } else if (skip_prefix(arg, "tree:", &v0)) {
                if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
-                       if (errbuf) {
-                               strbuf_addstr(
-                                       errbuf,
-                                       _("expected 'tree:<depth>'"));
-                       }
+                       strbuf_addstr(errbuf, _("expected 'tree:<depth>'"));
                        return 1;
                }
                filter_options->choice = LOFC_TREE_DEPTH;
@@ -88,67 +83,253 @@ static int gently_parse_list_objects_filter(
                                _("sparse:path filters support has been dropped"));
                }
                return 1;
+
+       } else if (skip_prefix(arg, "combine:", &v0)) {
+               return parse_combine_filter(filter_options, v0, errbuf);
+
        }
        /*
         * Please update _git_fetch() in git-completion.bash when you
         * add new filters
         */
 
-       if (errbuf)
-               strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);
+       strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);
 
        memset(filter_options, 0, sizeof(*filter_options));
        return 1;
 }
 
-int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
-                             const char *arg)
+static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
+
+static int has_reserved_character(
+       struct strbuf *sub_spec, struct strbuf *errbuf)
 {
-       struct strbuf buf = STRBUF_INIT;
-       if (gently_parse_list_objects_filter(filter_options, arg, &buf))
-               die("%s", buf.buf);
+       const char *c = sub_spec->buf;
+       while (*c) {
+               if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
+                       strbuf_addf(
+                               errbuf,
+                               _("must escape char in sub-filter-spec: '%c'"),
+                               *c);
+                       return 1;
+               }
+               c++;
+       }
+
        return 0;
 }
 
+static int parse_combine_subfilter(
+       struct list_objects_filter_options *filter_options,
+       struct strbuf *subspec,
+       struct strbuf *errbuf)
+{
+       size_t new_index = filter_options->sub_nr;
+       char *decoded;
+       int result;
+
+       ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+                     filter_options->sub_alloc);
+
+       decoded = url_percent_decode(subspec->buf);
+
+       result = has_reserved_character(subspec, errbuf) ||
+               gently_parse_list_objects_filter(
+                       &filter_options->sub[new_index], decoded, errbuf);
+
+       free(decoded);
+       return result;
+}
+
+static int parse_combine_filter(
+       struct list_objects_filter_options *filter_options,
+       const char *arg,
+       struct strbuf *errbuf)
+{
+       struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
+       size_t sub;
+       int result = 0;
+
+       if (!subspecs[0]) {
+               strbuf_addstr(errbuf, _("expected something after combine:"));
+               result = 1;
+               goto cleanup;
+       }
+
+       for (sub = 0; subspecs[sub] && !result; sub++) {
+               if (subspecs[sub + 1]) {
+                       /*
+                        * This is not the last subspec. Remove trailing "+" so
+                        * we can parse it.
+                        */
+                       size_t last = subspecs[sub]->len - 1;
+                       assert(subspecs[sub]->buf[last] == '+');
+                       strbuf_remove(subspecs[sub], last, 1);
+               }
+               result = parse_combine_subfilter(
+                       filter_options, subspecs[sub], errbuf);
+       }
+
+       filter_options->choice = LOFC_COMBINE;
+
+cleanup:
+       strbuf_list_free(subspecs);
+       if (result) {
+               list_objects_filter_release(filter_options);
+               memset(filter_options, 0, sizeof(*filter_options));
+       }
+       return result;
+}
+
+static int allow_unencoded(char ch)
+{
+       if (ch <= ' ' || ch == '%' || ch == '+')
+               return 0;
+       return !strchr(RESERVED_NON_WS, ch);
+}
+
+static void filter_spec_append_urlencode(
+       struct list_objects_filter_options *filter, const char *raw)
+{
+       struct strbuf buf = STRBUF_INIT;
+       strbuf_addstr_urlencode(&buf, raw, allow_unencoded);
+       trace_printf("Add to combine filter-spec: %s\n", buf.buf);
+       string_list_append(&filter->filter_spec, strbuf_detach(&buf, NULL));
+}
+
+/*
+ * Changes filter_options into an equivalent LOFC_COMBINE filter options
+ * instance. Does not do anything if filter_options is already LOFC_COMBINE.
+ */
+static void transform_to_combine_type(
+       struct list_objects_filter_options *filter_options)
+{
+       assert(filter_options->choice);
+       if (filter_options->choice == LOFC_COMBINE)
+               return;
+       {
+               const int initial_sub_alloc = 2;
+               struct list_objects_filter_options *sub_array =
+                       xcalloc(initial_sub_alloc, sizeof(*sub_array));
+               sub_array[0] = *filter_options;
+               memset(filter_options, 0, sizeof(*filter_options));
+               filter_options->sub = sub_array;
+               filter_options->sub_alloc = initial_sub_alloc;
+       }
+       filter_options->sub_nr = 1;
+       filter_options->choice = LOFC_COMBINE;
+       string_list_append(&filter_options->filter_spec, xstrdup("combine:"));
+       filter_spec_append_urlencode(
+               filter_options,
+               list_objects_filter_spec(&filter_options->sub[0]));
+       /*
+        * We don't need the filter_spec strings for subfilter specs, only the
+        * top level.
+        */
+       string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
+}
+
+void list_objects_filter_die_if_populated(
+       struct list_objects_filter_options *filter_options)
+{
+       if (filter_options->choice)
+               die(_("multiple filter-specs cannot be combined"));
+}
+
+void parse_list_objects_filter(
+       struct list_objects_filter_options *filter_options,
+       const char *arg)
+{
+       struct strbuf errbuf = STRBUF_INIT;
+       int parse_error;
+
+       if (!filter_options->choice) {
+               string_list_append(&filter_options->filter_spec, xstrdup(arg));
+
+               parse_error = gently_parse_list_objects_filter(
+                       filter_options, arg, &errbuf);
+       } else {
+               /*
+                * Make filter_options an LOFC_COMBINE spec so we can trivially
+                * add subspecs to it.
+                */
+               transform_to_combine_type(filter_options);
+
+               string_list_append(&filter_options->filter_spec, xstrdup("+"));
+               filter_spec_append_urlencode(filter_options, arg);
+               ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
+                             filter_options->sub_alloc);
+
+               parse_error = gently_parse_list_objects_filter(
+                       &filter_options->sub[filter_options->sub_nr - 1], arg,
+                       &errbuf);
+       }
+       if (parse_error)
+               die("%s", errbuf.buf);
+}
+
 int opt_parse_list_objects_filter(const struct option *opt,
                                  const char *arg, int unset)
 {
        struct list_objects_filter_options *filter_options = opt->value;
 
-       if (unset || !arg) {
+       if (unset || !arg)
                list_objects_filter_set_no_filter(filter_options);
-               return 0;
+       else
+               parse_list_objects_filter(filter_options, arg);
+       return 0;
+}
+
+const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
+{
+       if (!filter->filter_spec.nr)
+               BUG("no filter_spec available for this filter");
+       if (filter->filter_spec.nr != 1) {
+               struct strbuf concatted = STRBUF_INIT;
+               strbuf_add_separated_string_list(
+                       &concatted, "", &filter->filter_spec);
+               string_list_clear(&filter->filter_spec, /*free_util=*/0);
+               string_list_append(
+                       &filter->filter_spec, strbuf_detach(&concatted, NULL));
        }
 
-       return parse_list_objects_filter(filter_options, arg);
+       return filter->filter_spec.items[0].string;
 }
 
-void expand_list_objects_filter_spec(
-       const struct list_objects_filter_options *filter,
-       struct strbuf *expanded_spec)
+const char *expand_list_objects_filter_spec(
+       struct list_objects_filter_options *filter)
 {
-       strbuf_init(expanded_spec, strlen(filter->filter_spec));
-       if (filter->choice == LOFC_BLOB_LIMIT)
-               strbuf_addf(expanded_spec, "blob:limit=%lu",
+       if (filter->choice == LOFC_BLOB_LIMIT) {
+               struct strbuf expanded_spec = STRBUF_INIT;
+               strbuf_addf(&expanded_spec, "blob:limit=%lu",
                            filter->blob_limit_value);
-       else if (filter->choice == LOFC_TREE_DEPTH)
-               strbuf_addf(expanded_spec, "tree:%lu",
-                           filter->tree_exclude_depth);
-       else
-               strbuf_addstr(expanded_spec, filter->filter_spec);
+               string_list_clear(&filter->filter_spec, /*free_util=*/0);
+               string_list_append(
+                       &filter->filter_spec,
+                       strbuf_detach(&expanded_spec, NULL));
+       }
+
+       return list_objects_filter_spec(filter);
 }
 
 void list_objects_filter_release(
        struct list_objects_filter_options *filter_options)
 {
-       free(filter_options->filter_spec);
+       size_t sub;
+
+       if (!filter_options)
+               return;
+       string_list_clear(&filter_options->filter_spec, /*free_util=*/0);
        free(filter_options->sparse_oid_value);
+       for (sub = 0; sub < filter_options->sub_nr; sub++)
+               list_objects_filter_release(&filter_options->sub[sub]);
+       free(filter_options->sub);
        memset(filter_options, 0, sizeof(*filter_options));
 }
 
 void partial_clone_register(
        const char *remote,
-       const struct list_objects_filter_options *filter_options)
+       struct list_objects_filter_options *filter_options)
 {
        char *cfg_name;
        char *filter_name;
@@ -168,7 +349,9 @@ void partial_clone_register(
         * the default for subsequent fetches from this remote.
         */
        filter_name = xstrfmt("remote.%s.partialclonefilter", remote);
-       git_config_set(filter_name, filter_options->filter_spec);
+       /* NEEDSWORK: 'expand' result leaking??? */
+       git_config_set(filter_name,
+                      expand_list_objects_filter_spec(filter_options));
        free(filter_name);
 
        /* Make sure the config info are reset */
@@ -180,12 +363,18 @@ void partial_clone_get_default_filter_spec(
        const char *remote)
 {
        struct promisor_remote *promisor = promisor_remote_find(remote);
+       struct strbuf errbuf = STRBUF_INIT;
 
        /*
         * Parse default value, but silently ignore it if it is invalid.
         */
-       if (promisor)
-               gently_parse_list_objects_filter(filter_options,
-                                                promisor->partial_clone_filter,
-                                                NULL);
+       if (!promisor)
+               return;
+
+       string_list_append(&filter_options->filter_spec,
+                          promisor->partial_clone_filter);
+       gently_parse_list_objects_filter(filter_options,
+                                        promisor->partial_clone_filter,
+                                        &errbuf);
+       strbuf_release(&errbuf);
 }
index 8deaa287b57cbbfd290ce4aa040883b8a5ebcd42..b63c5ee1a368aa4d34cda8bbc41fce905692495b 100644 (file)
@@ -2,7 +2,7 @@
 #define LIST_OBJECTS_FILTER_OPTIONS_H
 
 #include "parse-options.h"
-#include "strbuf.h"
+#include "string-list.h"
 
 /*
  * The list of defined filters for list-objects.
@@ -13,6 +13,7 @@ enum list_objects_filter_choice {
        LOFC_BLOB_LIMIT,
        LOFC_TREE_DEPTH,
        LOFC_SPARSE_OID,
+       LOFC_COMBINE,
        LOFC__COUNT /* must be last */
 };
 
@@ -23,8 +24,10 @@ struct list_objects_filter_options {
         * commands that launch filtering sub-processes, or for communication
         * over the network, don't use this value; use the result of
         * expand_list_objects_filter_spec() instead.
+        * To get the raw filter spec given by the user, use the result of
+        * list_objects_filter_spec().
         */
-       char *filter_spec;
+       struct string_list filter_spec;
 
        /*
         * 'choice' is determined by parsing the filter-spec.  This indicates
@@ -38,19 +41,40 @@ struct list_objects_filter_options {
        unsigned int no_filter : 1;
 
        /*
-        * Parsed values (fields) from within the filter-spec.  These are
-        * choice-specific; not all values will be defined for any given
-        * choice.
+        * BEGIN choice-specific parsed values from within the filter-spec. Only
+        * some values will be defined for any given choice.
         */
+
        struct object_id *sparse_oid_value;
        unsigned long blob_limit_value;
        unsigned long tree_exclude_depth;
+
+       /* LOFC_COMBINE values */
+
+       /* This array contains all the subfilters which this filter combines. */
+       size_t sub_nr, sub_alloc;
+       struct list_objects_filter_options *sub;
+
+       /*
+        * END choice-specific parsed values.
+        */
 };
 
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
-int parse_list_objects_filter(
+void list_objects_filter_die_if_populated(
+       struct list_objects_filter_options *filter_options);
+
+/*
+ * Parses the filter spec string given by arg and either (1) simply places the
+ * result in filter_options if it is not yet populated or (2) combines it with
+ * the filter already in filter_options if it is already populated. In the case
+ * of (2), the filter specs are combined as if specified with 'combine:'.
+ *
+ * Dies and prints a user-facing message if an error occurs.
+ */
+void parse_list_objects_filter(
        struct list_objects_filter_options *filter_options,
        const char *arg);
 
@@ -65,13 +89,22 @@ int opt_parse_list_objects_filter(const struct option *opt,
 /*
  * Translates abbreviated numbers in the filter's filter_spec into their
  * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024").
+ * Returns a string owned by the list_objects_filter_options object.
  *
- * This form should be used instead of the raw filter_spec field when
- * communicating with a remote process or subprocess.
+ * This form should be used instead of the raw list_objects_filter_spec()
+ * value when communicating with a remote process or subprocess.
  */
-void expand_list_objects_filter_spec(
-       const struct list_objects_filter_options *filter,
-       struct strbuf *expanded_spec);
+const char *expand_list_objects_filter_spec(
+       struct list_objects_filter_options *filter);
+
+/*
+ * Returns the filter spec string more or less in the form as the user
+ * entered it. This form of the filter_spec can be used in user-facing
+ * messages.  Returns a string owned by the list_objects_filter_options
+ * object.
+ */
+const char *list_objects_filter_spec(
+       struct list_objects_filter_options *filter);
 
 void list_objects_filter_release(
        struct list_objects_filter_options *filter_options);
@@ -85,7 +118,7 @@ static inline void list_objects_filter_set_no_filter(
 
 void partial_clone_register(
        const char *remote,
-       const struct list_objects_filter_options *filter_options);
+       struct list_objects_filter_options *filter_options);
 void partial_clone_get_default_filter_spec(
        struct list_objects_filter_options *filter_options,
        const char *remote);
index 36e1f774bcfc50d0475ad835464cec092314eb79..d664264d65947f140d04d25d25db54af23b4a2d3 100644 (file)
  */
 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
 
-/*
- * A filter for list-objects to omit ALL blobs from the traversal.
- * And to OPTIONALLY collect a list of the omitted OIDs.
- */
-struct filter_blobs_none_data {
+struct subfilter {
+       struct filter *filter;
+       struct oidset seen;
+       struct oidset omits;
+       struct object_id skip_tree;
+       unsigned is_skipping_tree : 1;
+};
+
+struct filter {
+       enum list_objects_filter_result (*filter_object_fn)(
+               struct repository *r,
+               enum list_objects_filter_situation filter_situation,
+               struct object *obj,
+               const char *pathname,
+               const char *filename,
+               struct oidset *omits,
+               void *filter_data);
+
+       /*
+        * Optional. If this function is supplied and the filter needs
+        * to collect omits, then this function is called once before
+        * free_fn is called.
+        *
+        * This is required because the following two conditions hold:
+        *
+        *   a. A tree filter can add and remove objects as an object
+        *      graph is traversed.
+        *   b. A combine filter's omit set is the union of all its
+        *      subfilters, which may include tree: filters.
+        *
+        * As such, the omits sets must be separate sets, and can only
+        * be unioned after the traversal is completed.
+        */
+       void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
+
+       void (*free_fn)(void *filter_data);
+
+       void *filter_data;
+
+       /* If non-NULL, the filter collects a list of the omitted OIDs here. */
        struct oidset *omits;
 };
 
@@ -40,10 +75,9 @@ static enum list_objects_filter_result filter_blobs_none(
        struct object *obj,
        const char *pathname,
        const char *filename,
+       struct oidset *omits,
        void *filter_data_)
 {
-       struct filter_blobs_none_data *filter_data = filter_data_;
-
        switch (filter_situation) {
        default:
                BUG("unknown filter_situation: %d", filter_situation);
@@ -61,24 +95,18 @@ static enum list_objects_filter_result filter_blobs_none(
                assert(obj->type == OBJ_BLOB);
                assert((obj->flags & SEEN) == 0);
 
-               if (filter_data->omits)
-                       oidset_insert(filter_data->omits, &obj->oid);
+               if (omits)
+                       oidset_insert(omits, &obj->oid);
                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
        }
 }
 
-static void *filter_blobs_none__init(
-       struct oidset *omitted,
+static void filter_blobs_none__init(
        struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn)
+       struct filter *filter)
 {
-       struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
-       d->omits = omitted;
-
-       *filter_fn = filter_blobs_none;
-       *filter_free_fn = free;
-       return d;
+       filter->filter_object_fn = filter_blobs_none;
+       filter->free_fn = free;
 }
 
 /*
@@ -86,8 +114,6 @@ static void *filter_blobs_none__init(
  * Can OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_trees_depth_data {
-       struct oidset *omits;
-
        /*
         * Maps trees to the minimum depth at which they were seen. It is not
         * necessary to re-traverse a tree at deeper or equal depths than it has
@@ -110,16 +136,16 @@ struct seen_map_entry {
 /* Returns 1 if the oid was in the omits set before it was invoked. */
 static int filter_trees_update_omits(
        struct object *obj,
-       struct filter_trees_depth_data *filter_data,
+       struct oidset *omits,
        int include_it)
 {
-       if (!filter_data->omits)
+       if (!omits)
                return 0;
 
        if (include_it)
-               return oidset_remove(filter_data->omits, &obj->oid);
+               return oidset_remove(omits, &obj->oid);
        else
-               return oidset_insert(filter_data->omits, &obj->oid);
+               return oidset_insert(omits, &obj->oid);
 }
 
 static enum list_objects_filter_result filter_trees_depth(
@@ -128,6 +154,7 @@ static enum list_objects_filter_result filter_trees_depth(
        struct object *obj,
        const char *pathname,
        const char *filename,
+       struct oidset *omits,
        void *filter_data_)
 {
        struct filter_trees_depth_data *filter_data = filter_data_;
@@ -152,7 +179,7 @@ static enum list_objects_filter_result filter_trees_depth(
                return LOFR_ZERO;
 
        case LOFS_BLOB:
-               filter_trees_update_omits(obj, filter_data, include_it);
+               filter_trees_update_omits(obj, omits, include_it);
                return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
 
        case LOFS_BEGIN_TREE:
@@ -173,12 +200,12 @@ static enum list_objects_filter_result filter_trees_depth(
                        filter_res = LOFR_SKIP_TREE;
                } else {
                        int been_omitted = filter_trees_update_omits(
-                               obj, filter_data, include_it);
+                               obj, omits, include_it);
                        seen_info->depth = filter_data->current_depth;
 
                        if (include_it)
                                filter_res = LOFR_DO_SHOW;
-                       else if (filter_data->omits && !been_omitted)
+                       else if (omits && !been_omitted)
                                /*
                                 * Must update omit information of children
                                 * recursively; they have not been omitted yet.
@@ -201,21 +228,18 @@ static void filter_trees_free(void *filter_data) {
        free(d);
 }
 
-static void *filter_trees_depth__init(
-       struct oidset *omitted,
+static void filter_trees_depth__init(
        struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn)
+       struct filter *filter)
 {
        struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
-       d->omits = omitted;
        oidmap_init(&d->seen_at_depth, 0);
        d->exclude_depth = filter_options->tree_exclude_depth;
        d->current_depth = 0;
 
-       *filter_fn = filter_trees_depth;
-       *filter_free_fn = filter_trees_free;
-       return d;
+       filter->filter_data = d;
+       filter->filter_object_fn = filter_trees_depth;
+       filter->free_fn = filter_trees_free;
 }
 
 /*
@@ -223,7 +247,6 @@ static void *filter_trees_depth__init(
  * And to OPTIONALLY collect a list of the omitted OIDs.
  */
 struct filter_blobs_limit_data {
-       struct oidset *omits;
        unsigned long max_bytes;
 };
 
@@ -233,6 +256,7 @@ static enum list_objects_filter_result filter_blobs_limit(
        struct object *obj,
        const char *pathname,
        const char *filename,
+       struct oidset *omits,
        void *filter_data_)
 {
        struct filter_blobs_limit_data *filter_data = filter_data_;
@@ -270,30 +294,27 @@ static enum list_objects_filter_result filter_blobs_limit(
                if (object_length < filter_data->max_bytes)
                        goto include_it;
 
-               if (filter_data->omits)
-                       oidset_insert(filter_data->omits, &obj->oid);
+               if (omits)
+                       oidset_insert(omits, &obj->oid);
                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
        }
 
 include_it:
-       if (filter_data->omits)
-               oidset_remove(filter_data->omits, &obj->oid);
+       if (omits)
+               oidset_remove(omits, &obj->oid);
        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 }
 
-static void *filter_blobs_limit__init(
-       struct oidset *omitted,
+static void filter_blobs_limit__init(
        struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn)
+       struct filter *filter)
 {
        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
-       d->omits = omitted;
        d->max_bytes = filter_options->blob_limit_value;
 
-       *filter_fn = filter_blobs_limit;
-       *filter_free_fn = free;
-       return d;
+       filter->filter_data = d;
+       filter->filter_object_fn = filter_blobs_limit;
+       filter->free_fn = free;
 }
 
 /*
@@ -326,7 +347,6 @@ struct frame {
 };
 
 struct filter_sparse_data {
-       struct oidset *omits;
        struct exclude_list el;
 
        size_t nr, alloc;
@@ -339,6 +359,7 @@ static enum list_objects_filter_result filter_sparse(
        struct object *obj,
        const char *pathname,
        const char *filename,
+       struct oidset *omits,
        void *filter_data_)
 {
        struct filter_sparse_data *filter_data = filter_data_;
@@ -420,8 +441,8 @@ static enum list_objects_filter_result filter_sparse(
                if (val < 0)
                        val = frame->defval;
                if (val > 0) {
-                       if (filter_data->omits)
-                               oidset_remove(filter_data->omits, &obj->oid);
+                       if (omits)
+                               oidset_remove(omits, &obj->oid);
                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
                }
 
@@ -435,8 +456,8 @@ static enum list_objects_filter_result filter_sparse(
                 * Leave the LOFR_ bits unset so that if the blob appears
                 * again in the traversal, we will be asked again.
                 */
-               if (filter_data->omits)
-                       oidset_insert(filter_data->omits, &obj->oid);
+               if (omits)
+                       oidset_insert(omits, &obj->oid);
 
                /*
                 * Remember that at least 1 blob in this tree was
@@ -456,14 +477,11 @@ static void filter_sparse_free(void *filter_data)
        free(d);
 }
 
-static void *filter_sparse_oid__init(
-       struct oidset *omitted,
+static void filter_sparse_oid__init(
        struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn)
+       struct filter *filter)
 {
        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
-       d->omits = omitted;
        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
                                           NULL, 0, &d->el) < 0)
                die("could not load filter specification");
@@ -473,16 +491,147 @@ static void *filter_sparse_oid__init(
        d->array_frame[d->nr].child_prov_omit = 0;
        d->nr++;
 
-       *filter_fn = filter_sparse;
-       *filter_free_fn = filter_sparse_free;
-       return d;
+       filter->filter_data = d;
+       filter->filter_object_fn = filter_sparse;
+       filter->free_fn = filter_sparse_free;
 }
 
-typedef void *(*filter_init_fn)(
-       struct oidset *omitted,
+/* A filter which only shows objects shown by all sub-filters. */
+struct combine_filter_data {
+       struct subfilter *sub;
+       size_t nr;
+};
+
+static enum list_objects_filter_result process_subfilter(
+       struct repository *r,
+       enum list_objects_filter_situation filter_situation,
+       struct object *obj,
+       const char *pathname,
+       const char *filename,
+       struct subfilter *sub)
+{
+       enum list_objects_filter_result result;
+
+       /*
+        * Check and update is_skipping_tree before oidset_contains so
+        * that is_skipping_tree gets unset even when the object is
+        * marked as seen.  As of this writing, no filter uses
+        * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
+        * ordering is only theoretically important. Be cautious if you
+        * change the order of the below checks and more filters have
+        * been added!
+        */
+       if (sub->is_skipping_tree) {
+               if (filter_situation == LOFS_END_TREE &&
+                   oideq(&obj->oid, &sub->skip_tree))
+                       sub->is_skipping_tree = 0;
+               else
+                       return LOFR_ZERO;
+       }
+       if (oidset_contains(&sub->seen, &obj->oid))
+               return LOFR_ZERO;
+
+       result = list_objects_filter__filter_object(
+               r, filter_situation, obj, pathname, filename, sub->filter);
+
+       if (result & LOFR_MARK_SEEN)
+               oidset_insert(&sub->seen, &obj->oid);
+
+       if (result & LOFR_SKIP_TREE) {
+               sub->is_skipping_tree = 1;
+               sub->skip_tree = obj->oid;
+       }
+
+       return result;
+}
+
+static enum list_objects_filter_result filter_combine(
+       struct repository *r,
+       enum list_objects_filter_situation filter_situation,
+       struct object *obj,
+       const char *pathname,
+       const char *filename,
+       struct oidset *omits,
+       void *filter_data)
+{
+       struct combine_filter_data *d = filter_data;
+       enum list_objects_filter_result combined_result =
+               LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
+       size_t sub;
+
+       for (sub = 0; sub < d->nr; sub++) {
+               enum list_objects_filter_result sub_result = process_subfilter(
+                       r, filter_situation, obj, pathname, filename,
+                       &d->sub[sub]);
+               if (!(sub_result & LOFR_DO_SHOW))
+                       combined_result &= ~LOFR_DO_SHOW;
+               if (!(sub_result & LOFR_MARK_SEEN))
+                       combined_result &= ~LOFR_MARK_SEEN;
+               if (!d->sub[sub].is_skipping_tree)
+                       combined_result &= ~LOFR_SKIP_TREE;
+       }
+
+       return combined_result;
+}
+
+static void filter_combine__free(void *filter_data)
+{
+       struct combine_filter_data *d = filter_data;
+       size_t sub;
+       for (sub = 0; sub < d->nr; sub++) {
+               list_objects_filter__free(d->sub[sub].filter);
+               oidset_clear(&d->sub[sub].seen);
+               if (d->sub[sub].omits.set.size)
+                       BUG("expected oidset to be cleared already");
+       }
+       free(d->sub);
+}
+
+static void add_all(struct oidset *dest, struct oidset *src) {
+       struct oidset_iter iter;
+       struct object_id *src_oid;
+
+       oidset_iter_init(src, &iter);
+       while ((src_oid = oidset_iter_next(&iter)) != NULL)
+               oidset_insert(dest, src_oid);
+}
+
+static void filter_combine__finalize_omits(
+       struct oidset *omits,
+       void *filter_data)
+{
+       struct combine_filter_data *d = filter_data;
+       size_t sub;
+
+       for (sub = 0; sub < d->nr; sub++) {
+               add_all(omits, &d->sub[sub].omits);
+               oidset_clear(&d->sub[sub].omits);
+       }
+}
+
+static void filter_combine__init(
        struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn);
+       struct filter* filter)
+{
+       struct combine_filter_data *d = xcalloc(1, sizeof(*d));
+       size_t sub;
+
+       d->nr = filter_options->sub_nr;
+       d->sub = xcalloc(d->nr, sizeof(*d->sub));
+       for (sub = 0; sub < d->nr; sub++)
+               d->sub[sub].filter = list_objects_filter__init(
+                       filter->omits ? &d->sub[sub].omits : NULL,
+                       &filter_options->sub[sub]);
+
+       filter->filter_data = d;
+       filter->filter_object_fn = filter_combine;
+       filter->free_fn = filter_combine__free;
+       filter->finalize_omits_fn = filter_combine__finalize_omits;
+}
+
+typedef void (*filter_init_fn)(
+       struct list_objects_filter_options *filter_options,
+       struct filter *filter);
 
 /*
  * Must match "enum list_objects_filter_choice".
@@ -493,14 +642,14 @@ static filter_init_fn s_filters[] = {
        filter_blobs_limit__init,
        filter_trees_depth__init,
        filter_sparse_oid__init,
+       filter_combine__init,
 };
 
-void *list_objects_filter__init(
+struct filter *list_objects_filter__init(
        struct oidset *omitted,
-       struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn)
+       struct list_objects_filter_options *filter_options)
 {
+       struct filter *filter;
        filter_init_fn init_fn;
 
        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
@@ -510,10 +659,44 @@ void *list_objects_filter__init(
                    filter_options->choice);
 
        init_fn = s_filters[filter_options->choice];
-       if (init_fn)
-               return init_fn(omitted, filter_options,
-                              filter_fn, filter_free_fn);
-       *filter_fn = NULL;
-       *filter_free_fn = NULL;
-       return NULL;
+       if (!init_fn)
+               return NULL;
+
+       filter = xcalloc(1, sizeof(*filter));
+       filter->omits = omitted;
+       init_fn(filter_options, filter);
+       return filter;
+}
+
+enum list_objects_filter_result list_objects_filter__filter_object(
+       struct repository *r,
+       enum list_objects_filter_situation filter_situation,
+       struct object *obj,
+       const char *pathname,
+       const char *filename,
+       struct filter *filter)
+{
+       if (filter && (obj->flags & NOT_USER_GIVEN))
+               return filter->filter_object_fn(r, filter_situation, obj,
+                                               pathname, filename,
+                                               filter->omits,
+                                               filter->filter_data);
+       /*
+        * No filter is active or user gave object explicitly. In this case,
+        * always show the object (except when LOFS_END_TREE, since this tree
+        * had already been shown when LOFS_BEGIN_TREE).
+        */
+       if (filter_situation == LOFS_END_TREE)
+               return 0;
+       return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+}
+
+void list_objects_filter__free(struct filter *filter)
+{
+       if (!filter)
+               return;
+       if (filter->finalize_omits_fn && filter->omits)
+               filter->finalize_omits_fn(filter->omits, filter->filter_data);
+       filter->free_fn(filter->filter_data);
+       free(filter);
 }
index 1d45a4ad5786c915f1fd597ce782cd96b7301532..cfd784e203f30fa6d85980fb019e83c37cc935e2 100644 (file)
@@ -60,30 +60,36 @@ enum list_objects_filter_situation {
        LOFS_BLOB
 };
 
-typedef enum list_objects_filter_result (*filter_object_fn)(
+struct filter;
+
+/*
+ * Constructor for the set of defined list-objects filters.
+ * The `omitted` set is optional. It is populated with objects that the
+ * filter excludes. This set should not be considered finalized until
+ * after list_objects_filter__free is called on the returned `struct
+ * filter *`.
+ */
+struct filter *list_objects_filter__init(
+       struct oidset *omitted,
+       struct list_objects_filter_options *filter_options);
+
+/*
+ * Lets `filter` decide how to handle the `obj`. If `filter` is NULL, this
+ * function behaves as expected if no filter is configured: all objects are
+ * included.
+ */
+enum list_objects_filter_result list_objects_filter__filter_object(
        struct repository *r,
        enum list_objects_filter_situation filter_situation,
        struct object *obj,
        const char *pathname,
        const char *filename,
-       void *filter_data);
-
-typedef void (*filter_free_fn)(void *filter_data);
+       struct filter *filter);
 
 /*
- * Constructor for the set of defined list-objects filters.
- * Returns a generic "void *filter_data".
- *
- * The returned "filter_fn" will be used by traverse_commit_list()
- * to filter the results.
- *
- * The returned "filter_free_fn" is a destructor for the
- * filter_data.
+ * Destroys `filter` and finalizes the `omitted` set, if present. Does
+ * nothing if `filter` is null.
  */
-void *list_objects_filter__init(
-       struct oidset *omitted,
-       struct list_objects_filter_options *filter_options,
-       filter_object_fn *filter_fn,
-       filter_free_fn *filter_free_fn);
+void list_objects_filter__free(struct filter *filter);
 
 #endif /* LIST_OBJECTS_FILTER_H */
index b5651ddd5bfdd6cda4047f71cc3cb66fc31a793d..9307d91fb3fc8bee9cbeb1a6387b5d5bf92dbd4d 100644 (file)
@@ -18,8 +18,7 @@ struct traversal_context {
        show_object_fn show_object;
        show_commit_fn show_commit;
        void *show_data;
-       filter_object_fn filter_fn;
-       void *filter_data;
+       struct filter *filter;
 };
 
 static void process_blob(struct traversal_context *ctx,
@@ -29,7 +28,7 @@ static void process_blob(struct traversal_context *ctx,
 {
        struct object *obj = &blob->object;
        size_t pathlen;
-       enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+       enum list_objects_filter_result r;
 
        if (!ctx->revs->blob_objects)
                return;
@@ -54,11 +53,10 @@ static void process_blob(struct traversal_context *ctx,
 
        pathlen = path->len;
        strbuf_addstr(path, name);
-       if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-               r = ctx->filter_fn(ctx->revs->repo,
-                                  LOFS_BLOB, obj,
-                                  path->buf, &path->buf[pathlen],
-                                  ctx->filter_data);
+       r = list_objects_filter__filter_object(ctx->revs->repo,
+                                              LOFS_BLOB, obj,
+                                              path->buf, &path->buf[pathlen],
+                                              ctx->filter);
        if (r & LOFR_MARK_SEEN)
                obj->flags |= SEEN;
        if (r & LOFR_DO_SHOW)
@@ -157,7 +155,7 @@ static void process_tree(struct traversal_context *ctx,
        struct object *obj = &tree->object;
        struct rev_info *revs = ctx->revs;
        int baselen = base->len;
-       enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
+       enum list_objects_filter_result r;
        int failed_parse;
 
        if (!revs->tree_objects)
@@ -186,11 +184,10 @@ static void process_tree(struct traversal_context *ctx,
        }
 
        strbuf_addstr(base, name);
-       if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
-               r = ctx->filter_fn(ctx->revs->repo,
-                                  LOFS_BEGIN_TREE, obj,
-                                  base->buf, &base->buf[baselen],
-                                  ctx->filter_data);
+       r = list_objects_filter__filter_object(ctx->revs->repo,
+                                              LOFS_BEGIN_TREE, obj,
+                                              base->buf, &base->buf[baselen],
+                                              ctx->filter);
        if (r & LOFR_MARK_SEEN)
                obj->flags |= SEEN;
        if (r & LOFR_DO_SHOW)
@@ -203,16 +200,14 @@ static void process_tree(struct traversal_context *ctx,
        else if (!failed_parse)
                process_tree_contents(ctx, tree, base);
 
-       if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
-               r = ctx->filter_fn(ctx->revs->repo,
-                                  LOFS_END_TREE, obj,
-                                  base->buf, &base->buf[baselen],
-                                  ctx->filter_data);
-               if (r & LOFR_MARK_SEEN)
-                       obj->flags |= SEEN;
-               if (r & LOFR_DO_SHOW)
-                       ctx->show_object(obj, base->buf, ctx->show_data);
-       }
+       r = list_objects_filter__filter_object(ctx->revs->repo,
+                                              LOFS_END_TREE, obj,
+                                              base->buf, &base->buf[baselen],
+                                              ctx->filter);
+       if (r & LOFR_MARK_SEEN)
+               obj->flags |= SEEN;
+       if (r & LOFR_DO_SHOW)
+               ctx->show_object(obj, base->buf, ctx->show_data);
 
        strbuf_setlen(base, baselen);
        free_tree_buffer(tree);
@@ -402,8 +397,7 @@ void traverse_commit_list(struct rev_info *revs,
        ctx.show_commit = show_commit;
        ctx.show_object = show_object;
        ctx.show_data = show_data;
-       ctx.filter_fn = NULL;
-       ctx.filter_data = NULL;
+       ctx.filter = NULL;
        do_traverse(&ctx);
 }
 
@@ -416,17 +410,12 @@ void traverse_commit_list_filtered(
        struct oidset *omitted)
 {
        struct traversal_context ctx;
-       filter_free_fn filter_free_fn = NULL;
 
        ctx.revs = revs;
        ctx.show_object = show_object;
        ctx.show_commit = show_commit;
        ctx.show_data = show_data;
-       ctx.filter_fn = NULL;
-
-       ctx.filter_data = list_objects_filter__init(omitted, filter_options,
-                                                   &ctx.filter_fn, &filter_free_fn);
+       ctx.filter = list_objects_filter__init(omitted, filter_options);
        do_traverse(&ctx);
-       if (ctx.filter_data && filter_free_fn)
-               filter_free_fn(ctx.filter_data);
+       list_objects_filter__free(ctx.filter);
 }
index d30f916858883aa312bd824a53516cb099a2e922..aa48d179a9aec236069fc88501c5c26c3569d502 100644 (file)
--- a/strbuf.c
+++ b/strbuf.c
@@ -774,8 +774,10 @@ void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s)
        }
 }
 
-static int is_rfc3986_reserved(char ch)
+int is_rfc3986_reserved_or_unreserved(char ch)
 {
+       if (is_rfc3986_unreserved(ch))
+               return 1;
        switch (ch) {
                case '!': case '*': case '\'': case '(': case ')': case ';':
                case ':': case '@': case '&': case '=': case '+': case '$':
@@ -785,20 +787,19 @@ static int is_rfc3986_reserved(char ch)
        return 0;
 }
 
-static int is_rfc3986_unreserved(char ch)
+int is_rfc3986_unreserved(char ch)
 {
        return isalnum(ch) ||
                ch == '-' || ch == '_' || ch == '.' || ch == '~';
 }
 
 static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len,
-                                int reserved)
+                                char_predicate allow_unencoded_fn)
 {
        strbuf_grow(sb, len);
        while (len--) {
                char ch = *s++;
-               if (is_rfc3986_unreserved(ch) ||
-                   (!reserved && is_rfc3986_reserved(ch)))
+               if (allow_unencoded_fn(ch))
                        strbuf_addch(sb, ch);
                else
                        strbuf_addf(sb, "%%%02x", (unsigned char)ch);
@@ -806,9 +807,9 @@ static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len,
 }
 
 void strbuf_addstr_urlencode(struct strbuf *sb, const char *s,
-                            int reserved)
+                            char_predicate allow_unencoded_fn)
 {
-       strbuf_add_urlencode(sb, s, strlen(s), reserved);
+       strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn);
 }
 
 static void strbuf_humanise(struct strbuf *buf, off_t bytes,
index f62278a0be59be4c6cff17f0a0adcc6361e93e82..84cf96972144fae197c6350ead80880452e3d5b2 100644 (file)
--- a/strbuf.h
+++ b/strbuf.h
@@ -672,8 +672,13 @@ void strbuf_branchname(struct strbuf *sb, const char *name,
  */
 int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
 
+typedef int (*char_predicate)(char ch);
+
+int is_rfc3986_unreserved(char ch);
+int is_rfc3986_reserved_or_unreserved(char ch);
+
 void strbuf_addstr_urlencode(struct strbuf *sb, const char *name,
-                            int reserved);
+                            char_predicate allow_unencoded_fn);
 
 __attribute__((format (printf,1,2)))
 int printf_ln(const char *fmt, ...);
index 73cd95812f143bbea40df9242eed64d78e0bf53f..fc634a56b2076283198060de467e3a4c8ce3ffa6 100755 (executable)
@@ -208,6 +208,25 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr
        test_cmp unique_types.expected unique_types.observed
 '
 
+test_expect_success 'implicitly construct combine: filter with repeated flags' '
+       GIT_TRACE=$(pwd)/trace git clone --bare \
+               --filter=blob:none --filter=tree:1 \
+               "file://$(pwd)/srv.bare" pc2 &&
+       grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
+               trace &&
+       git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
+
+       # We should have gotten some root trees.
+       grep " $" objects &&
+       # Should not have gotten any non-root trees or blobs.
+       ! grep " ." objects &&
+
+       xargs -n 1 git -C pc2 cat-file -t <objects >types &&
+       sort -u types >unique_types.actual &&
+       test_write_lines commit tree >unique_types.expected &&
+       test_cmp unique_types.expected unique_types.actual
+'
+
 test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
        rm -rf src dst &&
        git init src &&
index acd7f5ab80d9c8a78c915ec9b868e704237841c5..de0e5a5d3646cdd7e60b98b566bb075200f311ad 100755 (executable)
@@ -278,7 +278,19 @@ test_expect_success 'verify skipping tree iteration when not collecting omits' '
        test_line_count = 2 actual &&
 
        # Make sure no other trees were considered besides the root.
-       ! grep "Skipping contents of tree [^.]" filter_trace
+       ! grep "Skipping contents of tree [^.]" filter_trace &&
+
+       # Try this again with "combine:". If both sub-filters are skipping
+       # trees, the composite filter should also skip trees. This is not
+       # important unless the user does combine:tree:X+tree:Y or another filter
+       # besides "tree:" is implemented in the future which can skip trees.
+       GIT_TRACE=1 git -C r3 rev-list \
+               --objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace &&
+
+       # Only skip the dir1/ tree, which is shared between the two commits.
+       grep "Skipping contents of tree " filter_trace >actual &&
+       test_write_lines "Skipping contents of tree dir1/..." >expected &&
+       test_cmp expected actual
 '
 
 # Test tree:# filters.
@@ -330,6 +342,148 @@ test_expect_success 'verify tree:3 includes everything expected' '
        test_line_count = 10 actual
 '
 
+test_expect_success 'combine:... for a simple combination' '
+       git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
+               >actual &&
+
+       expect_has HEAD "" &&
+       expect_has HEAD~1 "" &&
+       expect_has HEAD dir1 &&
+
+       # There are also 2 commit objects
+       test_line_count = 5 actual &&
+
+       cp actual expected &&
+
+       # Try again using repeated --filter - this is equivalent to a manual
+       # combine with "combine:...+..."
+       git -C r3 rev-list --objects --filter=combine:tree:2 \
+               --filter=blob:none HEAD >actual &&
+
+       test_cmp expected actual
+'
+
+test_expect_success 'combine:... with URL encoding' '
+       git -C r3 rev-list --objects \
+               --filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
+
+       expect_has HEAD "" &&
+       expect_has HEAD~1 "" &&
+       expect_has HEAD dir1 &&
+
+       # There are also 2 commit objects
+       test_line_count = 5 actual
+'
+
+expect_invalid_filter_spec () {
+       spec="$1" &&
+       err="$2" &&
+
+       test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \
+               >actual 2>actual_stderr &&
+       test_must_be_empty actual &&
+       test_i18ngrep "$err" actual_stderr
+}
+
+test_expect_success 'combine:... while URL-encoding things that should not be' '
+       expect_invalid_filter_spec combine%3Atree:2+blob:none \
+               "invalid filter-spec"
+'
+
+test_expect_success 'combine: with nothing after the :' '
+       expect_invalid_filter_spec combine: "expected something after combine:"
+'
+
+test_expect_success 'parse error in first sub-filter in combine:' '
+       expect_invalid_filter_spec combine:tree:asdf+blob:none \
+               "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with non-encoded reserved chars' '
+       expect_invalid_filter_spec combine:tree:2+sparse:@xyz \
+               "must escape char in sub-filter-spec: .@." &&
+       expect_invalid_filter_spec combine:tree:2+sparse:\` \
+               "must escape char in sub-filter-spec: .\`." &&
+       expect_invalid_filter_spec combine:tree:2+sparse:~abc \
+               "must escape char in sub-filter-spec: .\~."
+'
+
+test_expect_success 'validate err msg for "combine:<valid-filter>+"' '
+       expect_invalid_filter_spec combine:tree:2+ "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
+       git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \
+               HEAD >actual &&
+       test_line_count = 5 actual &&
+       git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
+               HEAD >actual &&
+       test_line_count = 5 actual &&
+       git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
+       test_line_count = 2 actual &&
+       git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
+               HEAD >actual &&
+       test_line_count = 5 actual
+'
+
+test_expect_success 'add sparse pattern blobs whose paths have reserved chars' '
+       cp r3/pattern r3/pattern1+renamed% &&
+       cp r3/pattern "r3/p;at%ter+n" &&
+       cp r3/pattern r3/^~pattern &&
+       git -C r3 add pattern1+renamed% "p;at%ter+n" ^~pattern &&
+       git -C r3 commit -m "add sparse pattern files with reserved chars"
+'
+
+test_expect_success 'combine:... with more than two sub-filters' '
+       git -C r3 rev-list --objects \
+               --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
+               HEAD >actual &&
+
+       expect_has HEAD "" &&
+       expect_has HEAD~1 "" &&
+       expect_has HEAD~2 "" &&
+       expect_has HEAD dir1 &&
+       expect_has HEAD dir1/sparse1 &&
+       expect_has HEAD dir1/sparse2 &&
+
+       # Should also have 3 commits
+       test_line_count = 9 actual &&
+
+       # Try again, this time making sure the last sub-filter is only
+       # URL-decoded once.
+       cp actual expect &&
+
+       git -C r3 rev-list --objects \
+               --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
+               HEAD >actual &&
+       test_cmp expect actual &&
+
+       # Use the same composite filter again, but with a pattern file name that
+       # requires encoding multiple characters, and use implicit filter
+       # combining.
+       test_when_finished "rm -f trace1" &&
+       GIT_TRACE=$(pwd)/trace1 git -C r3 rev-list --objects \
+               --filter=tree:3 --filter=blob:limit=40 \
+               --filter=sparse:oid="master:p;at%ter+n" \
+               HEAD >actual &&
+
+       test_cmp expect actual &&
+       grep "Add to combine filter-spec: sparse:oid=master:p%3bat%25ter%2bn" \
+               trace1 &&
+
+       # Repeat the above test, but this time, the characters to encode are in
+       # the LHS of the combined filter.
+       test_when_finished "rm -f trace2" &&
+       GIT_TRACE=$(pwd)/trace2 git -C r3 rev-list --objects \
+               --filter=sparse:oid=master:^~pattern \
+               --filter=tree:3 --filter=blob:limit=40 \
+               HEAD >actual &&
+
+       test_cmp expect actual &&
+       grep "Add to combine filter-spec: sparse:oid=master:%5e%7epattern" \
+               trace2
+'
+
 # Test provisional omit collection logic with a repo that has objects appearing
 # at multiple depths - first deeper than the filter's threshold, then shallow.
 
@@ -373,6 +527,37 @@ test_expect_success 'verify skipping tree iteration when collecting omits' '
        test_cmp expect actual
 '
 
+test_expect_success 'setup r5' '
+       git init r5 &&
+       mkdir -p r5/subdir &&
+
+       echo 1     >r5/short-root          &&
+       echo 12345 >r5/long-root           &&
+       echo a     >r5/subdir/short-subdir &&
+       echo abcde >r5/subdir/long-subdir  &&
+
+       git -C r5 add short-root long-root subdir &&
+       git -C r5 commit -m "commit msg"
+'
+
+test_expect_success 'verify collecting omits in combined: filter' '
+       # Note that this test guards against the naive implementation of simply
+       # giving both filters the same "omits" set and expecting it to
+       # automatically merge them.
+       git -C r5 rev-list --objects --quiet --filter-print-omitted \
+               --filter=combine:tree:2+blob:limit=3 HEAD >actual &&
+
+       # Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root)
+       omitted_1=$(echo 12345 | git hash-object --stdin) &&
+       omitted_2=$(echo a     | git hash-object --stdin) &&
+       omitted_3=$(echo abcde | git hash-object --stdin) &&
+
+       grep ~$omitted_1 actual &&
+       grep ~$omitted_2 actual &&
+       grep ~$omitted_3 actual &&
+       test_line_count = 3 actual
+'
+
 # Test tree:<depth> where a tree is iterated to twice - once where a subentry is
 # too deep to be included, and again where the blob inside it is shallow enough
 # to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
@@ -441,11 +626,4 @@ test_expect_success 'expand blob limit in protocol' '
        grep "blob:limit=1024" trace
 '
 
-test_expect_success 'expand tree depth limit in protocol' '
-       GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \
-               --filter=tree:0k "file://$(pwd)/r2" tree &&
-       ! grep "tree:0k" tree_trace &&
-       grep "tree:0" tree_trace
-'
-
 test_done
index 6b05a88faf59ee74840b0f9fa340a5d9397a0f5d..31691117725acacfb0069ce002eec844bc2ac52a 100644 (file)
@@ -682,13 +682,9 @@ static int fetch(struct transport *transport,
                set_helper_option(transport, "update-shallow", "true");
 
        if (data->transport_options.filter_options.choice) {
-               struct strbuf expanded_filter_spec = STRBUF_INIT;
-               expand_list_objects_filter_spec(
-                       &data->transport_options.filter_options,
-                       &expanded_filter_spec);
-               set_helper_option(transport, "filter",
-                                 expanded_filter_spec.buf);
-               strbuf_release(&expanded_filter_spec);
+               const char *spec = expand_list_objects_filter_spec(
+                       &data->transport_options.filter_options);
+               set_helper_option(transport, "filter", spec);
        }
 
        if (data->transport_options.negotiation_tips)
index 778c60bf573a714134435e438da5d390fa490484..6324cfc482c2ca48257229eeaf232dae3dbb053b 100644 (file)
@@ -224,6 +224,7 @@ static int set_git_option(struct git_transport_options *opts,
                opts->no_dependents = !!value;
                return 0;
        } else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
+               list_objects_filter_die_if_populated(&opts->filter_options);
                parse_list_objects_filter(&opts->filter_options, value);
                return 0;
        }
index 222cd3ad8960f352ee711915323ec1f36e5e7673..875db9299682c5d9fb593241e1907c3df9e7d597 100644 (file)
@@ -140,18 +140,17 @@ static void create_pack_file(const struct object_array *have_obj,
                argv_array_push(&pack_objects.args, "--delta-base-offset");
        if (use_include_tag)
                argv_array_push(&pack_objects.args, "--include-tag");
-       if (filter_options.filter_spec) {
-               struct strbuf expanded_filter_spec = STRBUF_INIT;
-               expand_list_objects_filter_spec(&filter_options,
-                                               &expanded_filter_spec);
+       if (filter_options.choice) {
+               const char *spec =
+                       expand_list_objects_filter_spec(&filter_options);
                if (pack_objects.use_shell) {
                        struct strbuf buf = STRBUF_INIT;
-                       sq_quote_buf(&buf, expanded_filter_spec.buf);
+                       sq_quote_buf(&buf, spec);
                        argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
                        strbuf_release(&buf);
                } else {
                        argv_array_pushf(&pack_objects.args, "--filter=%s",
-                                        expanded_filter_spec.buf);
+                                        spec);
                }
        }
 
@@ -884,6 +883,7 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
                if (skip_prefix(reader->line, "filter ", &arg)) {
                        if (!filter_capability_requested)
                                die("git upload-pack: filtering capability not negotiated");
+                       list_objects_filter_die_if_populated(&filter_options);
                        parse_list_objects_filter(&filter_options, arg);
                        continue;
                }
@@ -1305,6 +1305,7 @@ static void process_args(struct packet_reader *request,
                }
 
                if (allow_filter && skip_prefix(arg, "filter ", &p)) {
+                       list_objects_filter_die_if_populated(&filter_options);
                        parse_list_objects_filter(&filter_options, p);
                        continue;
                }
diff --git a/url.c b/url.c
index 1b8ef78ceab03784ad48f8411b20669e2ea1ea1f..e34e5e751737aeb10a6afecc0a2cdf0ec78fa5fc 100644 (file)
--- a/url.c
+++ b/url.c
@@ -86,6 +86,12 @@ char *url_decode_mem(const char *url, int len)
        return url_decode_internal(&url, len, NULL, &out, 0);
 }
 
+char *url_percent_decode(const char *encoded)
+{
+       struct strbuf out = STRBUF_INIT;
+       return url_decode_internal(&encoded, strlen(encoded), NULL, &out, 0);
+}
+
 char *url_decode_parameter_name(const char **query)
 {
        struct strbuf out = STRBUF_INIT;
diff --git a/url.h b/url.h
index 00b7d58c33e38b2a48e4e2ab557239b1ed0c759d..2a27c3427763b210b123b56c1e6553f8012f00bc 100644 (file)
--- a/url.h
+++ b/url.h
@@ -7,6 +7,14 @@ int is_url(const char *url);
 int is_urlschemechar(int first_flag, int ch);
 char *url_decode(const char *url);
 char *url_decode_mem(const char *url, int len);
+
+/*
+ * Similar to the url_decode_{,mem} methods above, but doesn't assume there
+ * is a scheme followed by a : at the start of the string. Instead, %-sequences
+ * before any : are also parsed.
+ */
+char *url_percent_decode(const char *encoded);
+
 char *url_decode_parameter_name(const char **query);
 char *url_decode_parameter_value(const char **query);