Merge branch 'ds/push-sparse-tree-walk'
authorJunio C Hamano <gitster@pobox.com>
Thu, 7 Feb 2019 06:05:24 +0000 (22:05 -0800)
committerJunio C Hamano <gitster@pobox.com>
Thu, 7 Feb 2019 06:05:25 +0000 (22:05 -0800)
"git pack-objects" learned another algorithm to compute the set of
objects to send, that trades the resulting packfile off to save
traversal cost to favor small pushes.

* ds/push-sparse-tree-walk:
pack-objects: create GIT_TEST_PACK_SPARSE
pack-objects: create pack.useSparse setting
revision: implement sparse algorithm
list-objects: consume sparse tree walk
revision: add mark_tree_uninteresting_sparse

1  2 
bisect.c
builtin/pack-objects.c
builtin/rev-list.c
http-push.c
list-objects.c
revision.c
revision.h
t/README
diff --combined bisect.c
index 6bf521138a590496a7b3fa7c2515ac53f815a1e3,842f8b4b8f373a29880f7248bc8a4f5b6cb1f543..3af955c4bc4e1c7cd155a5f2e460cee38af732cd
+++ b/bisect.c
@@@ -558,8 -558,7 +558,8 @@@ struct commit_list *filter_skipped(stru
   * is increased by one between each call, but that should not matter
   * for this application.
   */
 -static unsigned get_prn(unsigned count) {
 +static unsigned get_prn(unsigned count)
 +{
        count = count * 1103515245 + 12345;
        return (count/65536) % PRN_MODULO;
  }
@@@ -627,15 -626,14 +627,15 @@@ static struct commit_list *managed_skip
        return skip_away(list, count);
  }
  
 -static void bisect_rev_setup(struct rev_info *revs, const char *prefix,
 +static void bisect_rev_setup(struct repository *r, struct rev_info *revs,
 +                           const char *prefix,
                             const char *bad_format, const char *good_format,
                             int read_paths)
  {
        struct argv_array rev_argv = ARGV_ARRAY_INIT;
        int i;
  
 -      repo_init_revisions(the_repository, revs, prefix);
 +      repo_init_revisions(r, revs, prefix);
        revs->abbrev = 0;
        revs->commit_format = CMIT_FMT_UNSPECIFIED;
  
@@@ -658,7 -656,7 +658,7 @@@ static void bisect_common(struct rev_in
        if (prepare_revision_walk(revs))
                die("revision walk setup failed");
        if (revs->tree_objects)
-               mark_edges_uninteresting(revs, NULL);
+               mark_edges_uninteresting(revs, NULL, 0);
  }
  
  static void exit_if_skipped_commits(struct commit_list *tried,
@@@ -725,25 -723,23 +725,25 @@@ static int bisect_checkout(const struc
        return run_command_v_opt(argv_show_branch, RUN_GIT_CMD);
  }
  
 -static struct commit *get_commit_reference(const struct object_id *oid)
 +static struct commit *get_commit_reference(struct repository *r,
 +                                         const struct object_id *oid)
  {
 -      struct commit *r = lookup_commit_reference(the_repository, oid);
 -      if (!r)
 +      struct commit *c = lookup_commit_reference(r, oid);
 +      if (!c)
                die(_("Not a valid commit name %s"), oid_to_hex(oid));
 -      return r;
 +      return c;
  }
  
 -static struct commit **get_bad_and_good_commits(int *rev_nr)
 +static struct commit **get_bad_and_good_commits(struct repository *r,
 +                                              int *rev_nr)
  {
        struct commit **rev;
        int i, n = 0;
  
        ALLOC_ARRAY(rev, 1 + good_revs.nr);
 -      rev[n++] = get_commit_reference(current_bad_oid);
 +      rev[n++] = get_commit_reference(r, current_bad_oid);
        for (i = 0; i < good_revs.nr; i++)
 -              rev[n++] = get_commit_reference(good_revs.oid + i);
 +              rev[n++] = get_commit_reference(r, good_revs.oid + i);
        *rev_nr = n;
  
        return rev;
@@@ -827,13 -823,12 +827,13 @@@ static void check_merge_bases(int rev_n
        free_commit_list(result);
  }
  
 -static int check_ancestors(int rev_nr, struct commit **rev, const char *prefix)
 +static int check_ancestors(struct repository *r, int rev_nr,
 +                         struct commit **rev, const char *prefix)
  {
        struct rev_info revs;
        int res;
  
 -      bisect_rev_setup(&revs, prefix, "^%s", "%s", 0);
 +      bisect_rev_setup(r, &revs, prefix, "^%s", "%s", 0);
  
        bisect_common(&revs);
        res = (revs.commits != NULL);
   * If a merge base must be tested by the user, its source code will be
   * checked out to be tested by the user and we will exit.
   */
 -static void check_good_are_ancestors_of_bad(const char *prefix, int no_checkout)
 +static void check_good_are_ancestors_of_bad(struct repository *r,
 +                                          const char *prefix,
 +                                          int no_checkout)
  {
        char *filename = git_pathdup("BISECT_ANCESTORS_OK");
        struct stat st;
                goto done;
  
        /* Check if all good revs are ancestor of the bad rev. */
 -      rev = get_bad_and_good_commits(&rev_nr);
 -      if (check_ancestors(rev_nr, rev, prefix))
 +      rev = get_bad_and_good_commits(r, &rev_nr);
 +      if (check_ancestors(r, rev_nr, rev, prefix))
                check_merge_bases(rev_nr, rev, no_checkout);
        free(rev);
  
  /*
   * This does "git diff-tree --pretty COMMIT" without one fork+exec.
   */
 -static void show_diff_tree(const char *prefix, struct commit *commit)
 +static void show_diff_tree(struct repository *r,
 +                         const char *prefix,
 +                         struct commit *commit)
  {
        struct rev_info opt;
  
        /* diff-tree init */
 -      repo_init_revisions(the_repository, &opt, prefix);
 +      repo_init_revisions(r, &opt, prefix);
        git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
        opt.abbrev = 0;
        opt.diff = 1;
@@@ -954,7 -945,7 +954,7 @@@ void read_bisect_terms(const char **rea
   * If no_checkout is non-zero, the bisection process does not
   * checkout the trial commit but instead simply updates BISECT_HEAD.
   */
 -int bisect_next_all(const char *prefix, int no_checkout)
 +int bisect_next_all(struct repository *r, const char *prefix, int no_checkout)
  {
        struct rev_info revs;
        struct commit_list *tried;
        if (read_bisect_refs())
                die(_("reading bisect refs failed"));
  
 -      check_good_are_ancestors_of_bad(prefix, no_checkout);
 +      check_good_are_ancestors_of_bad(r, prefix, no_checkout);
  
 -      bisect_rev_setup(&revs, prefix, "%s", "^%s", 1);
 +      bisect_rev_setup(r, &revs, prefix, "%s", "^%s", 1);
        revs.limited = 1;
  
        bisect_common(&revs);
                exit_if_skipped_commits(tried, current_bad_oid);
                printf("%s is the first %s commit\n", oid_to_hex(bisect_rev),
                        term_bad);
 -              show_diff_tree(prefix, revs.commits->item);
 +              show_diff_tree(r, prefix, revs.commits->item);
                /* This means the bisection process succeeded. */
                exit(10);
        }
diff --combined builtin/pack-objects.c
index ffef8dcf2f279cfe31363fe5051ff2bac3c5a77f,507d381153511d52dac88f144a5887d0536c10d4..bd67491c1633602a7fab3c48e86bde4421f92c4e
@@@ -84,6 -84,7 +84,7 @@@ static unsigned long pack_size_limit
  static int depth = 50;
  static int delta_search_threads;
  static int pack_to_stdout;
+ static int sparse;
  static int thin;
  static int num_preferred_base;
  static struct progress *progress_state;
@@@ -970,7 -971,7 +971,7 @@@ static int no_try_delta(const char *pat
  
        if (!check)
                check = attr_check_initl("delta", NULL);
 -      git_check_attr(&the_index, path, check);
 +      git_check_attr(the_repository->index, path, check);
        if (ATTR_FALSE(check->items[0].value))
                return 1;
        return 0;
@@@ -1334,7 -1335,7 +1335,7 @@@ static void add_pbase_object(struct tre
                if (cmp < 0)
                        return;
                if (name[cmplen] != '/') {
 -                      add_object_entry(entry.oid,
 +                      add_object_entry(&entry.oid,
                                         object_type(entry.mode),
                                         fullname, 1);
                        return;
                        const char *down = name+cmplen+1;
                        int downlen = name_cmp_len(down);
  
 -                      tree = pbase_tree_get(entry.oid);
 +                      tree = pbase_tree_get(&entry.oid);
                        if (!tree)
                                return;
                        init_tree_desc(&sub, tree->tree_data, tree->tree_size);
@@@ -1953,6 -1954,11 +1954,6 @@@ static int delta_cacheable(unsigned lon
        return 0;
  }
  
 -/* Protect access to object database */
 -static pthread_mutex_t read_mutex;
 -#define read_lock()           pthread_mutex_lock(&read_mutex)
 -#define read_unlock()         pthread_mutex_unlock(&read_mutex)
 -
  /* Protect delta_cache_size */
  static pthread_mutex_t cache_mutex;
  #define cache_lock()          pthread_mutex_lock(&cache_mutex)
@@@ -1988,11 -1994,11 +1989,11 @@@ unsigned long oe_get_size_slow(struct p
        unsigned long used, avail, size;
  
        if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
 -              read_lock();
 +              packing_data_lock(&to_pack);
                if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
                        die(_("unable to get size of %s"),
                            oid_to_hex(&e->idx.oid));
 -              read_unlock();
 +              packing_data_unlock(&to_pack);
                return size;
        }
  
        if (!p)
                BUG("when e->type is a delta, it must belong to a pack");
  
 -      read_lock();
 +      packing_data_lock(&to_pack);
        w_curs = NULL;
        buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
        used = unpack_object_header_buffer(buf, avail, &type, &size);
                    oid_to_hex(&e->idx.oid));
  
        unuse_pack(&w_curs);
 -      read_unlock();
 +      packing_data_unlock(&to_pack);
        return size;
  }
  
@@@ -2071,9 -2077,9 +2072,9 @@@ static int try_delta(struct unpacked *t
  
        /* Load data if not already done */
        if (!trg->data) {
 -              read_lock();
 +              packing_data_lock(&to_pack);
                trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz);
 -              read_unlock();
 +              packing_data_unlock(&to_pack);
                if (!trg->data)
                        die(_("object %s cannot be read"),
                            oid_to_hex(&trg_entry->idx.oid));
                *mem_usage += sz;
        }
        if (!src->data) {
 -              read_lock();
 +              packing_data_lock(&to_pack);
                src->data = read_object_file(&src_entry->idx.oid, &type, &sz);
 -              read_unlock();
 +              packing_data_unlock(&to_pack);
                if (!src->data) {
                        if (src_entry->preferred_base) {
                                static int warned = 0;
@@@ -2332,9 -2338,9 +2333,9 @@@ static void find_deltas(struct object_e
  
  static void try_to_free_from_threads(size_t size)
  {
 -      read_lock();
 +      packing_data_lock(&to_pack);
        release_pack_memory(size);
 -      read_unlock();
 +      packing_data_unlock(&to_pack);
  }
  
  static try_to_free_t old_try_to_free_routine;
@@@ -2376,6 -2382,7 +2377,6 @@@ static pthread_cond_t progress_cond
   */
  static void init_threaded_search(void)
  {
 -      init_recursive_mutex(&read_mutex);
        pthread_mutex_init(&cache_mutex, NULL);
        pthread_mutex_init(&progress_mutex, NULL);
        pthread_cond_init(&progress_cond, NULL);
@@@ -2386,6 -2393,7 +2387,6 @@@ static void cleanup_threaded_search(voi
  {
        set_try_to_free_routine(old_try_to_free_routine);
        pthread_cond_destroy(&progress_cond);
 -      pthread_mutex_destroy(&read_mutex);
        pthread_mutex_destroy(&cache_mutex);
        pthread_mutex_destroy(&progress_mutex);
  }
@@@ -2603,7 -2611,7 +2604,7 @@@ static void prepare_pack(int window, in
        unsigned n;
  
        if (use_delta_islands)
 -              resolve_tree_islands(progress, &to_pack);
 +              resolve_tree_islands(the_repository, progress, &to_pack);
  
        get_object_details();
  
@@@ -2703,6 -2711,10 +2704,10 @@@ static int git_pack_config(const char *
                use_bitmap_index_default = git_config_bool(k, v);
                return 0;
        }
+       if (!strcmp(k, "pack.usesparse")) {
+               sparse = git_config_bool(k, v);
+               return 0;
+       }
        if (!strcmp(k, "pack.threads")) {
                delta_search_threads = git_config_int(k, v);
                if (delta_search_threads < 0)
@@@ -3077,16 -3089,14 +3082,16 @@@ static void record_recent_commit(struc
  static void get_object_list(int ac, const char **av)
  {
        struct rev_info revs;
 +      struct setup_revision_opt s_r_opt = {
 +              .allow_exclude_promisor_objects = 1,
 +      };
        char line[1000];
        int flags = 0;
        int save_warning;
  
        repo_init_revisions(the_repository, &revs, NULL);
        save_commit_buffer = 0;
 -      revs.allow_exclude_promisor_objects_opt = 1;
 -      setup_revisions(ac, av, &revs, NULL);
 +      setup_revisions(ac, av, &revs, &s_r_opt);
  
        /* make sure shallows are read */
        is_repository_shallow(the_repository);
                return;
  
        if (use_delta_islands)
 -              load_delta_islands();
 +              load_delta_islands(the_repository);
  
        if (prepare_revision_walk(&revs))
                die(_("revision walk setup failed"));
-       mark_edges_uninteresting(&revs, show_edge);
+       mark_edges_uninteresting(&revs, show_edge, sparse);
  
        if (!fn_show_object)
                fn_show_object = show_object;
@@@ -3287,6 -3297,8 +3292,8 @@@ int cmd_pack_objects(int argc, const ch
                { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
                  N_("unpack unreachable objects newer than <time>"),
                  PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
+               OPT_BOOL(0, "sparse", &sparse,
+                        N_("use the sparse reachability algorithm")),
                OPT_BOOL(0, "thin", &thin,
                         N_("create thin packs")),
                OPT_BOOL(0, "shallow", &shallow,
  
        read_replace_refs = 0;
  
+       sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
        reset_pack_idx_option(&pack_idx_opts);
        git_config(git_pack_config, NULL);
  
                }
        }
  
 -      prepare_packing_data(&to_pack);
 +      prepare_packing_data(the_repository, &to_pack);
  
        if (progress)
                progress_state = start_progress(_("Enumerating objects"), 0);
diff --combined builtin/rev-list.c
index 14ef659c12a49e25bfc8b8143a7fb5b4138d1787,9663cbfae0b8edc7fc12ff993d9e368c16127689..5b5b6dbb1c9b6f9893f3d0420c99655eb19501a8
@@@ -197,8 -197,7 +197,8 @@@ static void finish_commit(struct commi
                free_commit_list(commit->parents);
                commit->parents = NULL;
        }
 -      free_commit_buffer(commit);
 +      free_commit_buffer(the_repository->parsed_objects,
 +                         commit);
  }
  
  static inline void finish_object__ma(struct object *obj)
@@@ -362,9 -361,6 +362,9 @@@ int cmd_rev_list(int argc, const char *
  {
        struct rev_info revs;
        struct rev_list_info info;
 +      struct setup_revision_opt s_r_opt = {
 +              .allow_exclude_promisor_objects = 1,
 +      };
        int i;
        int bisect_list = 0;
        int bisect_show_vars = 0;
        git_config(git_default_config, NULL);
        repo_init_revisions(the_repository, &revs, prefix);
        revs.abbrev = DEFAULT_ABBREV;
 -      revs.allow_exclude_promisor_objects_opt = 1;
        revs.commit_format = CMIT_FMT_UNSPECIFIED;
        revs.do_not_die_on_missing_tree = 1;
  
                }
        }
  
 -      argc = setup_revisions(argc, argv, &revs, NULL);
 +      argc = setup_revisions(argc, argv, &revs, &s_r_opt);
  
        memset(&info, 0, sizeof(info));
        info.revs = &revs;
        if (prepare_revision_walk(&revs))
                die("revision walk setup failed");
        if (revs.tree_objects)
-               mark_edges_uninteresting(&revs, show_edge);
+               mark_edges_uninteresting(&revs, show_edge, 0);
  
        if (bisect_list) {
                int reaches, all;
diff --combined http-push.c
index bb802d80ee08945b0008d8f6862e29c783410b35,ea52d6f9f6481657aa0c4591d193ab1fe4864ae0..77e2e22852d769a04dc0275498964eff86e362d8
@@@ -1311,11 -1311,11 +1311,11 @@@ static struct object_list **process_tre
        while (tree_entry(&desc, &entry))
                switch (object_type(entry.mode)) {
                case OBJ_TREE:
 -                      p = process_tree(lookup_tree(the_repository, entry.oid),
 +                      p = process_tree(lookup_tree(the_repository, &entry.oid),
                                         p);
                        break;
                case OBJ_BLOB:
 -                      p = process_blob(lookup_blob(the_repository, entry.oid),
 +                      p = process_blob(lookup_blob(the_repository, &entry.oid),
                                         p);
                        break;
                default:
@@@ -1933,7 -1933,7 +1933,7 @@@ int cmd_main(int argc, const char **arg
                pushing = 0;
                if (prepare_revision_walk(&revs))
                        die("revision walk setup failed");
-               mark_edges_uninteresting(&revs, NULL);
+               mark_edges_uninteresting(&revs, NULL, 0);
                objects_to_send = get_delta(&revs, ref_lock);
                finish_all_active_slots();
  
diff --combined list-objects.c
index a2296a8e7b42a3d5d044c648d353ac94f9b4ed81,fb728f784267268594777e6a15d7f94ae4e6fb71..dc77361e11d02a4eaa994f4351310a0d54d33ef6
@@@ -55,8 -55,7 +55,8 @@@ static void process_blob(struct travers
        pathlen = path->len;
        strbuf_addstr(path, name);
        if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
 -              r = ctx->filter_fn(LOFS_BLOB, obj,
 +              r = ctx->filter_fn(ctx->revs->repo,
 +                                 LOFS_BLOB, obj,
                                   path->buf, &path->buf[pathlen],
                                   ctx->filter_data);
        if (r & LOFR_MARK_SEEN)
@@@ -114,8 -113,7 +114,8 @@@ static void process_tree_contents(struc
  
        while (tree_entry(&desc, &entry)) {
                if (match != all_entries_interesting) {
 -                      match = tree_entry_interesting(&entry, base, 0,
 +                      match = tree_entry_interesting(ctx->revs->repo->index,
 +                                                     &entry, base, 0,
                                                       &ctx->revs->diffopt.pathspec);
                        if (match == all_entries_not_interesting)
                                break;
                }
  
                if (S_ISDIR(entry.mode)) {
 -                      struct tree *t = lookup_tree(the_repository, entry.oid);
 +                      struct tree *t = lookup_tree(ctx->revs->repo, &entry.oid);
                        t->object.flags |= NOT_USER_GIVEN;
                        process_tree(ctx, t, base, entry.path);
                }
                else if (S_ISGITLINK(entry.mode))
 -                      process_gitlink(ctx, entry.oid->hash,
 +                      process_gitlink(ctx, entry.oid.hash,
                                        base, entry.path);
                else {
 -                      struct blob *b = lookup_blob(the_repository, entry.oid);
 +                      struct blob *b = lookup_blob(ctx->revs->repo, &entry.oid);
                        b->object.flags |= NOT_USER_GIVEN;
                        process_blob(ctx, b, base, entry.path);
                }
@@@ -177,8 -175,7 +177,8 @@@ static void process_tree(struct travers
  
        strbuf_addstr(base, name);
        if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
 -              r = ctx->filter_fn(LOFS_BEGIN_TREE, obj,
 +              r = ctx->filter_fn(ctx->revs->repo,
 +                                 LOFS_BEGIN_TREE, obj,
                                   base->buf, &base->buf[baselen],
                                   ctx->filter_data);
        if (r & LOFR_MARK_SEEN)
                process_tree_contents(ctx, tree, base);
  
        if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
 -              r = ctx->filter_fn(LOFS_END_TREE, obj,
 +              r = ctx->filter_fn(ctx->revs->repo,
 +                                 LOFS_END_TREE, obj,
                                   base->buf, &base->buf[baselen],
                                   ctx->filter_data);
                if (r & LOFR_MARK_SEEN)
@@@ -226,25 -222,73 +226,73 @@@ static void mark_edge_parents_uninteres
        }
  }
  
- void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
+ static void add_edge_parents(struct commit *commit,
+                            struct rev_info *revs,
+                            show_edge_fn show_edge,
+                            struct oidset *set)
+ {
+       struct commit_list *parents;
+       for (parents = commit->parents; parents; parents = parents->next) {
+               struct commit *parent = parents->item;
+               struct tree *tree = get_commit_tree(parent);
+               if (!tree)
+                       continue;
+               oidset_insert(set, &tree->object.oid);
+               if (!(parent->object.flags & UNINTERESTING))
+                       continue;
+               tree->object.flags |= UNINTERESTING;
+               if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
+                       parent->object.flags |= SHOWN;
+                       show_edge(parent);
+               }
+       }
+ }
+ void mark_edges_uninteresting(struct rev_info *revs,
+                             show_edge_fn show_edge,
+                             int sparse)
  {
        struct commit_list *list;
        int i;
  
-       for (list = revs->commits; list; list = list->next) {
-               struct commit *commit = list->item;
+       if (sparse) {
+               struct oidset set;
+               oidset_init(&set, 16);
+               for (list = revs->commits; list; list = list->next) {
+                       struct commit *commit = list->item;
+                       struct tree *tree = get_commit_tree(commit);
  
-               if (commit->object.flags & UNINTERESTING) {
-                       mark_tree_uninteresting(revs->repo,
-                                               get_commit_tree(commit));
-                       if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
-                               commit->object.flags |= SHOWN;
-                               show_edge(commit);
+                       if (commit->object.flags & UNINTERESTING)
+                               tree->object.flags |= UNINTERESTING;
+                       oidset_insert(&set, &tree->object.oid);
+                       add_edge_parents(commit, revs, show_edge, &set);
+               }
+               mark_trees_uninteresting_sparse(revs->repo, &set);
+               oidset_clear(&set);
+       } else {
+               for (list = revs->commits; list; list = list->next) {
+                       struct commit *commit = list->item;
+                       if (commit->object.flags & UNINTERESTING) {
+                               mark_tree_uninteresting(revs->repo,
+                                                       get_commit_tree(commit));
+                               if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
+                                       commit->object.flags |= SHOWN;
+                                       show_edge(commit);
+                               }
+                               continue;
                        }
-                       continue;
+                       mark_edge_parents_uninteresting(commit, revs, show_edge);
                }
-               mark_edge_parents_uninteresting(commit, revs, show_edge);
        }
        if (revs->edge_hint_aggressive) {
                for (i = 0; i < revs->cmdline.nr; i++) {
                        struct object *obj = revs->cmdline.rev[i].item;
diff --combined revision.c
index 5c7d3c75d7d8697dbf4ab41441a9e22b7f0a536d,5de739384a0bfc9b61f7c0052debca0283c6f1b5..162d511d4686bc43f2032a57fa95e3601eb3d6b2
@@@ -27,6 -27,7 +27,7 @@@
  #include "commit-reach.h"
  #include "commit-graph.h"
  #include "prio-queue.h"
+ #include "hashmap.h"
  
  volatile show_early_output_fn_t show_early_output;
  
@@@ -67,10 -68,10 +68,10 @@@ static void mark_tree_contents_unintere
        while (tree_entry(&desc, &entry)) {
                switch (object_type(entry.mode)) {
                case OBJ_TREE:
 -                      mark_tree_uninteresting(r, lookup_tree(r, entry.oid));
 +                      mark_tree_uninteresting(r, lookup_tree(r, &entry.oid));
                        break;
                case OBJ_BLOB:
 -                      mark_blob_uninteresting(lookup_blob(r, entry.oid));
 +                      mark_blob_uninteresting(lookup_blob(r, &entry.oid));
                        break;
                default:
                        /* Subproject commit - not in this repository */
@@@ -99,6 -100,148 +100,148 @@@ void mark_tree_uninteresting(struct rep
        mark_tree_contents_uninteresting(r, tree);
  }
  
 -                      paths_and_oids_insert(map, entry.path, entry.oid);
+ struct path_and_oids_entry {
+       struct hashmap_entry ent;
+       char *path;
+       struct oidset trees;
+ };
+ static int path_and_oids_cmp(const void *hashmap_cmp_fn_data,
+                            const struct path_and_oids_entry *e1,
+                            const struct path_and_oids_entry *e2,
+                            const void *keydata)
+ {
+       return strcmp(e1->path, e2->path);
+ }
+ static void paths_and_oids_init(struct hashmap *map)
+ {
+       hashmap_init(map, (hashmap_cmp_fn) path_and_oids_cmp, NULL, 0);
+ }
+ static void paths_and_oids_clear(struct hashmap *map)
+ {
+       struct hashmap_iter iter;
+       struct path_and_oids_entry *entry;
+       hashmap_iter_init(map, &iter);
+       while ((entry = (struct path_and_oids_entry *)hashmap_iter_next(&iter))) {
+               oidset_clear(&entry->trees);
+               free(entry->path);
+       }
+       hashmap_free(map, 1);
+ }
+ static void paths_and_oids_insert(struct hashmap *map,
+                                 const char *path,
+                                 const struct object_id *oid)
+ {
+       int hash = strhash(path);
+       struct path_and_oids_entry key;
+       struct path_and_oids_entry *entry;
+       hashmap_entry_init(&key, hash);
+       /* use a shallow copy for the lookup */
+       key.path = (char *)path;
+       oidset_init(&key.trees, 0);
+       if (!(entry = (struct path_and_oids_entry *)hashmap_get(map, &key, NULL))) {
+               entry = xcalloc(1, sizeof(struct path_and_oids_entry));
+               hashmap_entry_init(entry, hash);
+               entry->path = xstrdup(key.path);
+               oidset_init(&entry->trees, 16);
+               hashmap_put(map, entry);
+       }
+       oidset_insert(&entry->trees, oid);
+ }
+ static void add_children_by_path(struct repository *r,
+                                struct tree *tree,
+                                struct hashmap *map)
+ {
+       struct tree_desc desc;
+       struct name_entry entry;
+       if (!tree)
+               return;
+       if (parse_tree_gently(tree, 1) < 0)
+               return;
+       init_tree_desc(&desc, tree->buffer, tree->size);
+       while (tree_entry(&desc, &entry)) {
+               switch (object_type(entry.mode)) {
+               case OBJ_TREE:
 -                              struct tree *child = lookup_tree(r, entry.oid);
++                      paths_and_oids_insert(map, entry.path, &entry.oid);
+                       if (tree->object.flags & UNINTERESTING) {
 -                              struct blob *child = lookup_blob(r, entry.oid);
++                              struct tree *child = lookup_tree(r, &entry.oid);
+                               if (child)
+                                       child->object.flags |= UNINTERESTING;
+                       }
+                       break;
+               case OBJ_BLOB:
+                       if (tree->object.flags & UNINTERESTING) {
++                              struct blob *child = lookup_blob(r, &entry.oid);
+                               if (child)
+                                       child->object.flags |= UNINTERESTING;
+                       }
+                       break;
+               default:
+                       /* Subproject commit - not in this repository */
+                       break;
+               }
+       }
+       free_tree_buffer(tree);
+ }
+ void mark_trees_uninteresting_sparse(struct repository *r,
+                                    struct oidset *trees)
+ {
+       unsigned has_interesting = 0, has_uninteresting = 0;
+       struct hashmap map;
+       struct hashmap_iter map_iter;
+       struct path_and_oids_entry *entry;
+       struct object_id *oid;
+       struct oidset_iter iter;
+       oidset_iter_init(trees, &iter);
+       while ((!has_interesting || !has_uninteresting) &&
+              (oid = oidset_iter_next(&iter))) {
+               struct tree *tree = lookup_tree(r, oid);
+               if (!tree)
+                       continue;
+               if (tree->object.flags & UNINTERESTING)
+                       has_uninteresting = 1;
+               else
+                       has_interesting = 1;
+       }
+       /* Do not walk unless we have both types of trees. */
+       if (!has_uninteresting || !has_interesting)
+               return;
+       paths_and_oids_init(&map);
+       oidset_iter_init(trees, &iter);
+       while ((oid = oidset_iter_next(&iter))) {
+               struct tree *tree = lookup_tree(r, oid);
+               add_children_by_path(r, tree, &map);
+       }
+       hashmap_iter_init(&map, &map_iter);
+       while ((entry = hashmap_iter_next(&map_iter)))
+               mark_trees_uninteresting_sparse(r, &entry->trees);
+       paths_and_oids_clear(&map);
+ }
  struct commit_stack {
        struct commit **items;
        size_t nr, alloc;
@@@ -213,20 -356,7 +356,20 @@@ static struct object *get_reference(str
  {
        struct object *object;
  
 -      object = parse_object(revs->repo, oid);
 +      /*
 +       * If the repository has commit graphs, repo_parse_commit() avoids
 +       * reading the object buffer, so use it whenever possible.
 +       */
 +      if (oid_object_info(revs->repo, oid, NULL) == OBJ_COMMIT) {
 +              struct commit *c = lookup_commit(revs->repo, oid);
 +              if (!repo_parse_commit(revs->repo, c))
 +                      object = (struct object *) c;
 +              else
 +                      object = NULL;
 +      } else {
 +              object = parse_object(revs->repo, oid);
 +      }
 +
        if (!object) {
                if (revs->ignore_missing)
                        return object;
@@@ -1397,7 -1527,7 +1540,7 @@@ void add_index_objects_to_pending(struc
  {
        struct worktree **worktrees, **p;
  
 -      read_index(revs->repo->index);
 +      repo_read_index(revs->repo);
        do_add_index_objects_to_pending(revs, revs->repo->index, flags);
  
        if (revs->single_worktree)
@@@ -1476,7 -1606,6 +1619,7 @@@ void repo_init_revisions(struct reposit
        revs->abbrev = DEFAULT_ABBREV;
        revs->ignore_merges = 1;
        revs->simplify_history = 1;
 +      revs->pruning.repo = r;
        revs->pruning.flags.recursive = 1;
        revs->pruning.flags.quick = 1;
        revs->pruning.add_remove = file_add_remove;
  }
  
  static void add_pending_commit_list(struct rev_info *revs,
 -                                    struct commit_list *commit_list,
 -                                    unsigned int flags)
 +                                  struct commit_list *commit_list,
 +                                  unsigned int flags)
  {
        while (commit_list) {
                struct object *object = &commit_list->item->object;
@@@ -1544,7 -1673,7 +1687,7 @@@ static void prepare_show_merge(struct r
        head->object.flags |= SYMMETRIC_LEFT;
  
        if (!istate->cache_nr)
 -              read_index(istate);
 +              repo_read_index(revs->repo);
        for (i = 0; i < istate->cache_nr; i++) {
                const struct cache_entry *ce = istate->cache[i];
                if (!ce_stage(ce))
@@@ -1603,8 -1732,8 +1746,8 @@@ static int handle_dotdot_1(const char *
        if (!*b_name)
                b_name = "HEAD";
  
 -      if (get_oid_with_context(a_name, oc_flags, &a_oid, a_oc) ||
 -          get_oid_with_context(b_name, oc_flags, &b_oid, b_oc))
 +      if (get_oid_with_context(revs->repo, a_name, oc_flags, &a_oid, a_oc) ||
 +          get_oid_with_context(revs->repo, b_name, oc_flags, &b_oid, b_oc))
                return -1;
  
        if (!cant_be_filename) {
@@@ -1738,13 -1867,11 +1881,13 @@@ int handle_revision_arg(const char *arg
        if (revarg_opt & REVARG_COMMITTISH)
                get_sha1_flags |= GET_OID_COMMITTISH;
  
 -      if (get_oid_with_context(arg, get_sha1_flags, &oid, &oc))
 +      if (get_oid_with_context(revs->repo, arg, get_sha1_flags, &oid, &oc))
                return revs->ignore_missing ? 0 : -1;
        if (!cant_be_filename)
                verify_non_filename(revs->prefix, arg);
        object = get_reference(revs, arg, &oid, flags ^ local_flags);
 +      if (!object)
 +              return revs->ignore_missing ? 0 : -1;
        add_rev_cmdline(revs, object, arg_, REV_CMD_REV, flags ^ local_flags);
        add_pending_object_with_path(revs, object, arg, oc.mode, oc.path);
        free(oc.path);
@@@ -1807,8 -1934,7 +1950,8 @@@ static void add_message_grep(struct rev
  }
  
  static int handle_revision_opt(struct rev_info *revs, int argc, const char **argv,
 -                             int *unkc, const char **unkv)
 +                             int *unkc, const char **unkv,
 +                             const struct setup_revision_opt* opt)
  {
        const char *arg = argv[0];
        const char *optarg;
                revs->limited = 1;
        } else if (!strcmp(arg, "--ignore-missing")) {
                revs->ignore_missing = 1;
 -      } else if (revs->allow_exclude_promisor_objects_opt &&
 +      } else if (opt && opt->allow_exclude_promisor_objects &&
                   !strcmp(arg, "--exclude-promisor-objects")) {
                if (fetch_if_missing)
                        BUG("exclude_promisor_objects can only be used when fetch_if_missing is 0");
@@@ -2190,7 -2316,7 +2333,7 @@@ void parse_revision_opt(struct rev_inf
                        const char * const usagestr[])
  {
        int n = handle_revision_opt(revs, ctx->argc, ctx->argv,
 -                                  &ctx->cpidx, ctx->out);
 +                                  &ctx->cpidx, ctx->out, NULL);
        if (n <= 0) {
                error("unknown option `%s'", ctx->argv[0]);
                usage_with_options(usagestr, options);
@@@ -2408,8 -2534,7 +2551,8 @@@ int setup_revisions(int argc, const cha
                                continue;
                        }
  
 -                      opts = handle_revision_opt(revs, argc - i, argv + i, &left, argv);
 +                      opts = handle_revision_opt(revs, argc - i, argv + i,
 +                                                 &left, argv, opt);
                        if (opts > 0) {
                                i += opts - 1;
                                continue;
                struct object_id oid;
                struct object *object;
                struct object_context oc;
 -              if (get_oid_with_context(revs->def, 0, &oid, &oc))
 +              if (get_oid_with_context(revs->repo, revs->def, 0, &oid, &oc))
                        diagnose_missing_default(revs->def);
                object = get_reference(revs, revs->def, &oid, 0);
                add_pending_object_with_mode(revs, object, revs->def, oc.mode);
diff --combined revision.h
index 52e5a88ff5725862dced5c72fbc2aa6435b94e3b,df684701b9386131e9243069a31cc60926b279f7..d32d62abc6a7e65ba04a9f25c5024d6f5bbb5f49
@@@ -67,6 -67,7 +67,7 @@@ struct rev_cmdline_info 
  #define REVISION_WALK_NO_WALK_SORTED 1
  #define REVISION_WALK_NO_WALK_UNSORTED 2
  
+ struct oidset;
  struct topo_walk_info;
  
  struct rev_info {
                        do_not_die_on_missing_tree:1,
  
                        /* for internal use only */
 -                      allow_exclude_promisor_objects_opt:1,
                        exclude_promisor_objects:1;
  
        /* Diff flags */
@@@ -296,8 -298,7 +297,8 @@@ struct setup_revision_opt 
        const char *def;
        void (*tweak)(struct rev_info *, struct setup_revision_opt *);
        const char *submodule;  /* TODO: drop this and use rev_info->repo */
 -      int assume_dashdash;
 +      unsigned int    assume_dashdash:1,
 +                      allow_exclude_promisor_objects:1;
        unsigned revarg_opt;
  };
  
@@@ -327,6 -328,7 +328,7 @@@ void put_revision_mark(const struct rev
  
  void mark_parents_uninteresting(struct commit *commit);
  void mark_tree_uninteresting(struct repository *r, struct tree *tree);
+ void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
  
  void show_object_with_name(FILE *, struct object *, const char *);
  
diff --combined t/README
index 25864ec88384850342f3e6122f82470424e49953,8b6dfe1864d2307560c8785aac6cace8addafb91..6140b8c45bc7890c9166d44b852782364fa675ca
+++ b/t/README
@@@ -186,22 -186,6 +186,22 @@@ appropriately before running "make"
        this feature by setting the GIT_TEST_CHAIN_LINT environment
        variable to "1" or "0", respectively.
  
 +--stress::
 +--stress=<N>::
 +      Run the test script repeatedly in multiple parallel jobs until
 +      one of them fails.  Useful for reproducing rare failures in
 +      flaky tests.  The number of parallel jobs is, in order of
 +      precedence: <N>, or the value of the GIT_TEST_STRESS_LOAD
 +      environment variable, or twice the number of available
 +      processors (as shown by the 'getconf' utility), or 8.
 +      Implies `--verbose -x --immediate` to get the most information
 +      about the failure.  Note that the verbose output of each test
 +      job is saved to 't/test-results/$TEST_NAME.stress-<nr>.out',
 +      and only the output of the failed test job is shown on the
 +      terminal.  The names of the trash directories get a
 +      '.stress-<nr>' suffix, and the trash directory of the failed
 +      test job is renamed to end with a '.stress-failed' suffix.
 +
  You can also set the GIT_TEST_INSTALLED environment variable to
  the bindir of an existing git installation to test that installation.
  You still need to have built this git sandbox, from which various
@@@ -358,6 -342,10 +358,10 @@@ GIT_TEST_INDEX_VERSION=<n> exercises th
  for the index version specified.  Can be set to any valid version
  (currently 2, 3, or 4).
  
+ GIT_TEST_PACK_SPARSE=<boolean> if enabled will default the pack-objects
+ builtin to use the sparse object walk. This can still be overridden by
+ the --no-sparse command-line argument.
  GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path
  by overriding the minimum number of cache entries required per thread.
  
@@@ -374,11 -362,6 +378,11 @@@ GIT_TEST_MULTI_PACK_INDEX=<boolean>, wh
  index to be written after every 'git repack' command, and overrides the
  'core.multiPackIndex' setting to true.
  
 +GIT_TEST_SIDEBAND_ALL=<boolean>, when true, overrides the
 +'uploadpack.allowSidebandAll' setting to true, and when false, forces
 +fetch-pack to not request sideband-all (even if the server advertises
 +sideband-all).
 +
  Naming Tests
  ------------
  
@@@ -446,8 -429,7 +450,8 @@@ This test harness library does the foll
   - Creates an empty test directory with an empty .git/objects database
     and chdir(2) into it.  This directory is 't/trash
     directory.$test_name_without_dotsh', with t/ subject to change by
 -   the --root option documented above.
 +   the --root option documented above, and a '.stress-<N>' suffix
 +   appended by the --stress option.
  
   - Defines standard test helper functions for your scripts to
     use.  These functions are designed to make all scripts behave