Merge branch 'ti/fetch-everything-local-optim'
authorJunio C Hamano <gitster@pobox.com>
Mon, 9 Apr 2018 23:25:43 +0000 (08:25 +0900)
committerJunio C Hamano <gitster@pobox.com>
Mon, 9 Apr 2018 23:25:43 +0000 (08:25 +0900)
A "git fetch" from a repository with insane number of refs into a
repository that is already up-to-date still wasted too many cycles
making many lstat(2) calls to see if these objects at the tips
exist as loose objects locally. These lstat(2) calls are optimized
away by enumerating all loose objects beforehand.

It is unknown if the new strategy negatively affects existing use
cases, fetching into a repository with many loose objects from a
repository with small number of refs.

* ti/fetch-everything-local-optim:
fetch-pack.c: use oidset to check existence of loose object

1  2 
cache.h
fetch-pack.c
sha1_file.c
diff --combined cache.h
index a61b2d3f0d79b0f56992e0343803811f5265d716,6a72f54d797268637f8e09cbe98b445489b78136..09f78084dad1377d81a1ba0a028a96f6770bf051
+++ b/cache.h
@@@ -599,7 -599,6 +599,7 @@@ extern int read_index_unmerged(struct i
  
  /* For use with `write_locked_index()`. */
  #define COMMIT_LOCK           (1 << 0)
 +#define SKIP_IF_UNCHANGED     (1 << 1)
  
  /*
   * Write the index while holding an already-taken lock. Close the lock,
   * With `COMMIT_LOCK`, the lock is always committed or rolled back.
   * Without it, the lock is closed, but neither committed nor rolled
   * back.
 + *
 + * If `SKIP_IF_UNCHANGED` is given and the index is unchanged, nothing
 + * is written (and the lock is rolled back if `COMMIT_LOCK` is given).
   */
  extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags);
  
@@@ -1777,6 -1773,8 +1777,8 @@@ struct object_info 
  #define OBJECT_INFO_SKIP_CACHED 4
  /* Do not retry packed storage after checking packed and loose storage */
  #define OBJECT_INFO_QUICK 8
+ /* Do not check loose object */
+ #define OBJECT_INFO_IGNORE_LOOSE 16
  extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags);
  
  /*
diff --combined fetch-pack.c
index 1d6117565c2067460efc50aa4e6ca2ecb167a976,2ea358861dacb91697137dc22891122e35b6a472..52932b37f8dce61296c37d6fa821d564f0f4a38b
@@@ -711,6 -711,28 +711,28 @@@ static void mark_alternate_complete(str
        mark_complete(&obj->oid);
  }
  
+ struct loose_object_iter {
+       struct oidset *loose_object_set;
+       struct ref *refs;
+ };
+ /*
+  *  If the number of refs is not larger than the number of loose objects,
+  *  this function stops inserting.
+  */
+ static int add_loose_objects_to_set(const struct object_id *oid,
+                                   const char *path,
+                                   void *data)
+ {
+       struct loose_object_iter *iter = data;
+       oidset_insert(iter->loose_object_set, oid);
+       if (iter->refs == NULL)
+               return 1;
+       iter->refs = iter->refs->next;
+       return 0;
+ }
  static int everything_local(struct fetch_pack_args *args,
                            struct ref **refs,
                            struct ref **sought, int nr_sought)
        int retval;
        int old_save_commit_buffer = save_commit_buffer;
        timestamp_t cutoff = 0;
+       struct oidset loose_oid_set = OIDSET_INIT;
+       int use_oidset = 0;
+       struct loose_object_iter iter = {&loose_oid_set, *refs};
+       /* Enumerate all loose objects or know refs are not so many. */
+       use_oidset = !for_each_loose_object(add_loose_objects_to_set,
+                                           &iter, 0);
  
        save_commit_buffer = 0;
  
        for (ref = *refs; ref; ref = ref->next) {
                struct object *o;
+               unsigned int flags = OBJECT_INFO_QUICK;
  
-               if (!has_object_file_with_flags(&ref->old_oid,
-                                               OBJECT_INFO_QUICK))
-                       continue;
+               if (use_oidset &&
+                   !oidset_contains(&loose_oid_set, &ref->old_oid)) {
+                       /*
+                        * I know this does not exist in the loose form,
+                        * so check if it exists in a non-loose form.
+                        */
+                       flags |= OBJECT_INFO_IGNORE_LOOSE;
+               }
  
+               if (!has_object_file_with_flags(&ref->old_oid, flags))
+                       continue;
                o = parse_object(&ref->old_oid);
                if (!o)
                        continue;
                }
        }
  
+       oidset_clear(&loose_oid_set);
        if (!args->no_dependents) {
                if (!args->deepen) {
                        for_each_ref(mark_complete_oid, NULL);
@@@ -886,17 -925,8 +925,17 @@@ static int get_pack(struct fetch_pack_a
            ? fetch_fsck_objects
            : transfer_fsck_objects >= 0
            ? transfer_fsck_objects
 -          : 0)
 -              argv_array_push(&cmd.args, "--strict");
 +          : 0) {
 +              if (args->from_promisor)
 +                      /*
 +                       * We cannot use --strict in index-pack because it
 +                       * checks both broken objects and links, but we only
 +                       * want to check for broken objects.
 +                       */
 +                      argv_array_push(&cmd.args, "--fsck-objects");
 +              else
 +                      argv_array_push(&cmd.args, "--strict");
 +      }
  
        cmd.in = demux.out;
        cmd.git_cmd = 1;
diff --combined sha1_file.c
index cc0f43ea849569664d1af9fe05a6b42917990a7f,c0a19794795caa8bf7c1e5fe8d4094ca57bde41f..ad775495659ee09cf287965f2f7b3044bb1b04e2
@@@ -1262,16 -1262,17 +1262,19 @@@ int sha1_object_info_extended(const uns
                if (find_pack_entry(real, &e))
                        break;
  
+               if (flags & OBJECT_INFO_IGNORE_LOOSE)
+                       return -1;
                /* Most likely it's a loose object. */
                if (!sha1_loose_object_info(real, oi, flags))
                        return 0;
  
                /* Not a loose object; someone else may have just packed it. */
 -              reprepare_packed_git();
 -              if (find_pack_entry(real, &e))
 -                      break;
 +              if (!(flags & OBJECT_INFO_QUICK)) {
 +                      reprepare_packed_git();
 +                      if (find_pack_entry(real, &e))
 +                              break;
 +              }
  
                /* Check if it is a missing object */
                if (fetch_if_missing && repository_format_partial_clone &&