Merge branch 'jk/alternate-ref-optim'
authorJunio C Hamano <gitster@pobox.com>
Mon, 27 Feb 2017 21:57:13 +0000 (13:57 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 27 Feb 2017 21:57:13 +0000 (13:57 -0800)
Optimizes resource usage while enumerating refs from alternate
object store, to help receiving end of "push" that hosts a
repository with many "forks".

* jk/alternate-ref-optim:
receive-pack: avoid duplicates between our refs and alternates
receive-pack: treat namespace .have lines like alternates
receive-pack: fix misleading namespace/.have comment
receive-pack: use oidset to de-duplicate .have lines
add oidset API
fetch-pack: cache results of for_each_alternate_ref
for_each_alternate_ref: replace transport code with for-each-ref
for_each_alternate_ref: pass name/oid instead of ref struct
for_each_alternate_ref: use strbuf for path allocation
for_each_alternate_ref: stop trimming trailing slashes
for_each_alternate_ref: handle failure from real_pathdup()

Makefile
builtin/receive-pack.c
fetch-pack.c
object.h
oidset.c [new file with mode: 0644]
oidset.h [new file with mode: 0644]
t/t5400-send-pack.sh
transport.c
transport.h
index 8e4081e0619f8a9927a3a4c544048f576a194fd0..a5433978e3714fb9a37273e07fb235ea6d63d31b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += oidset.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-bitmap-write.o
 LIB_OBJS += pack-check.o
index 1dbb8a069225be1e9d9fe27ad4b83a8bd66ca511..9ed8fbbfad71daf45381dbccf0aebe4a5b9af03d 100644 (file)
@@ -21,6 +21,7 @@
 #include "sigchain.h"
 #include "fsck.h"
 #include "tmp-objdir.h"
+#include "oidset.h"
 
 static const char * const receive_pack_usage[] = {
        N_("git receive-pack <git-dir>"),
@@ -250,8 +251,9 @@ static void show_ref(const char *path, const unsigned char *sha1)
 }
 
 static int show_ref_cb(const char *path_full, const struct object_id *oid,
-                      int flag, void *unused)
+                      int flag, void *data)
 {
+       struct oidset *seen = data;
        const char *path = strip_namespace(path_full);
 
        if (ref_is_hidden(path, path_full))
@@ -260,37 +262,38 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
        /*
         * Advertise refs outside our current namespace as ".have"
         * refs, so that the client can use them to minimize data
-        * transfer but will otherwise ignore them. This happens to
-        * cover ".have" that are thrown in by add_one_alternate_ref()
-        * to mark histories that are complete in our alternates as
-        * well.
+        * transfer but will otherwise ignore them.
         */
-       if (!path)
+       if (!path) {
+               if (oidset_insert(seen, oid))
+                       return 0;
                path = ".have";
+       } else {
+               oidset_insert(seen, oid);
+       }
        show_ref(path, oid->hash);
        return 0;
 }
 
-static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused)
+static void show_one_alternate_ref(const char *refname,
+                                  const struct object_id *oid,
+                                  void *data)
 {
-       show_ref(".have", sha1);
-       return 0;
-}
+       struct oidset *seen = data;
 
-static void collect_one_alternate_ref(const struct ref *ref, void *data)
-{
-       struct sha1_array *sa = data;
-       sha1_array_append(sa, ref->old_oid.hash);
+       if (oidset_insert(seen, oid))
+               return;
+
+       show_ref(".have", oid->hash);
 }
 
 static void write_head_info(void)
 {
-       struct sha1_array sa = SHA1_ARRAY_INIT;
+       static struct oidset seen = OIDSET_INIT;
 
-       for_each_alternate_ref(collect_one_alternate_ref, &sa);
-       sha1_array_for_each_unique(&sa, show_one_alternate_sha1, NULL);
-       sha1_array_clear(&sa);
-       for_each_ref(show_ref_cb, NULL);
+       for_each_ref(show_ref_cb, &seen);
+       for_each_alternate_ref(show_one_alternate_ref, &seen);
+       oidset_clear(&seen);
        if (!sent_capabilities)
                show_ref("capabilities^{}", null_sha1);
 
index 601f0779a1903a2b860412e3404e4ea8e4bce573..e0f5d5ce875acad79b5ddcfc4e24b6ff8bc33df9 100644 (file)
@@ -35,6 +35,7 @@ static const char *alternate_shallow_file;
 #define COMMON_REF     (1U << 2)
 #define SEEN           (1U << 3)
 #define POPPED         (1U << 4)
+#define ALTERNATE      (1U << 5)
 
 static int marked;
 
@@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args,
        fputc('\n', stderr);
 }
 
+struct alternate_object_cache {
+       struct object **items;
+       size_t nr, alloc;
+};
+
+static void cache_one_alternate(const char *refname,
+                               const struct object_id *oid,
+                               void *vcache)
+{
+       struct alternate_object_cache *cache = vcache;
+       struct object *obj = parse_object(oid->hash);
+
+       if (!obj || (obj->flags & ALTERNATE))
+               return;
+
+       obj->flags |= ALTERNATE;
+       ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
+       cache->items[cache->nr++] = obj;
+}
+
+static void for_each_cached_alternate(void (*cb)(struct object *))
+{
+       static int initialized;
+       static struct alternate_object_cache cache;
+       size_t i;
+
+       if (!initialized) {
+               for_each_alternate_ref(cache_one_alternate, &cache);
+               initialized = 1;
+       }
+
+       for (i = 0; i < cache.nr; i++)
+               cb(cache.items[i]);
+}
+
 static void rev_list_push(struct commit *commit, int mark)
 {
        if (!(commit->object.flags & mark)) {
@@ -253,9 +289,9 @@ static void send_request(struct fetch_pack_args *args,
                write_or_die(fd, buf->buf, buf->len);
 }
 
-static void insert_one_alternate_ref(const struct ref *ref, void *unused)
+static void insert_one_alternate_object(struct object *obj)
 {
-       rev_list_insert_ref(NULL, ref->old_oid.hash);
+       rev_list_insert_ref(NULL, obj->oid.hash);
 }
 
 #define INITIAL_FLUSH 16
@@ -298,7 +334,7 @@ static int find_common(struct fetch_pack_args *args,
        marked = 1;
 
        for_each_ref(rev_list_insert_ref_oid, NULL);
-       for_each_alternate_ref(insert_one_alternate_ref, NULL);
+       for_each_cached_alternate(insert_one_alternate_object);
 
        fetching = 0;
        for ( ; refs ; refs = refs->next) {
@@ -619,9 +655,9 @@ static void filter_refs(struct fetch_pack_args *args,
        *refs = newlist;
 }
 
-static void mark_alternate_complete(const struct ref *ref, void *unused)
+static void mark_alternate_complete(struct object *obj)
 {
-       mark_complete(ref->old_oid.hash);
+       mark_complete(obj->oid.hash);
 }
 
 static int everything_local(struct fetch_pack_args *args,
@@ -657,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args,
 
        if (!args->deepen) {
                for_each_ref(mark_complete_oid, NULL);
-               for_each_alternate_ref(mark_alternate_complete, NULL);
+               for_each_cached_alternate(mark_alternate_complete);
                commit_list_sort_by_date(&complete);
                if (cutoff)
                        mark_recent_complete_commits(args, cutoff);
index 614a0067566733dc91998423f6dc05634cbedc61..f52957dcb34a3340de2d398014ad4605e7620360 100644 (file)
--- a/object.h
+++ b/object.h
@@ -29,7 +29,7 @@ struct object_array {
 /*
  * object flag allocation:
  * revision.h:      0---------10                                26
- * fetch-pack.c:    0---4
+ * fetch-pack.c:    0---5
  * walker.c:        0-2
  * upload-pack.c:       4       11----------------19
  * builtin/blame.c:               12-13
diff --git a/oidset.c b/oidset.c
new file mode 100644 (file)
index 0000000..ac169f0
--- /dev/null
+++ b/oidset.c
@@ -0,0 +1,49 @@
+#include "cache.h"
+#include "oidset.h"
+
+struct oidset_entry {
+       struct hashmap_entry hash;
+       struct object_id oid;
+};
+
+static int oidset_hashcmp(const void *va, const void *vb,
+                         const void *vkey)
+{
+       const struct oidset_entry *a = va, *b = vb;
+       const struct object_id *key = vkey;
+       return oidcmp(&a->oid, key ? key : &b->oid);
+}
+
+int oidset_contains(const struct oidset *set, const struct object_id *oid)
+{
+       struct hashmap_entry key;
+
+       if (!set->map.cmpfn)
+               return 0;
+
+       hashmap_entry_init(&key, sha1hash(oid->hash));
+       return !!hashmap_get(&set->map, &key, oid);
+}
+
+int oidset_insert(struct oidset *set, const struct object_id *oid)
+{
+       struct oidset_entry *entry;
+
+       if (!set->map.cmpfn)
+               hashmap_init(&set->map, oidset_hashcmp, 0);
+
+       if (oidset_contains(set, oid))
+               return 1;
+
+       entry = xmalloc(sizeof(*entry));
+       hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
+       oidcpy(&entry->oid, oid);
+
+       hashmap_add(&set->map, entry);
+       return 0;
+}
+
+void oidset_clear(struct oidset *set)
+{
+       hashmap_free(&set->map, 1);
+}
diff --git a/oidset.h b/oidset.h
new file mode 100644 (file)
index 0000000..b7eaab5
--- /dev/null
+++ b/oidset.h
@@ -0,0 +1,45 @@
+#ifndef OIDSET_H
+#define OIDSET_H
+
+/**
+ * This API is similar to sha1-array, in that it maintains a set of object ids
+ * in a memory-efficient way. The major differences are:
+ *
+ *   1. It uses a hash, so we can do online duplicate removal, rather than
+ *      sort-and-uniq at the end. This can reduce memory footprint if you have
+ *      a large list of oids with many duplicates.
+ *
+ *   2. The per-unique-oid memory footprint is slightly higher due to hash
+ *      table overhead.
+ */
+
+/**
+ * A single oidset; should be zero-initialized (or use OIDSET_INIT).
+ */
+struct oidset {
+       struct hashmap map;
+};
+
+#define OIDSET_INIT { { NULL } }
+
+/**
+ * Returns true iff `set` contains `oid`.
+ */
+int oidset_contains(const struct oidset *set, const struct object_id *oid);
+
+/**
+ * Insert the oid into the set; a copy is made, so "oid" does not need
+ * to persist after this function is called.
+ *
+ * Returns 1 if the oid was already in the set, 0 otherwise. This can be used
+ * to perform an efficient check-and-add.
+ */
+int oidset_insert(struct oidset *set, const struct object_id *oid);
+
+/**
+ * Remove all entries from the oidset, freeing any resources associated with
+ * it.
+ */
+void oidset_clear(struct oidset *set);
+
+#endif /* OIDSET_H */
index 305ca7a9308685c7d040ac254fd34cebbbc86cc8..3331e0f53443f9a8566d01ac08dd0cbc5aa2d33b 100755 (executable)
@@ -255,4 +255,42 @@ test_expect_success 'deny pushing to delete current branch' '
        )
 '
 
+extract_ref_advertisement () {
+       perl -lne '
+               # \\ is there to skip capabilities after \0
+               /push< ([^\\]+)/ or next;
+               exit 0 if $1 eq "0000";
+               print $1;
+       '
+}
+
+test_expect_success 'receive-pack de-dupes .have lines' '
+       git init shared &&
+       git -C shared commit --allow-empty -m both &&
+       git clone -s shared fork &&
+       (
+               cd shared &&
+               git checkout -b only-shared &&
+               git commit --allow-empty -m only-shared &&
+               git update-ref refs/heads/foo HEAD
+       ) &&
+
+       # Notable things in this expectation:
+       #  - local refs are not de-duped
+       #  - .have does not duplicate locals
+       #  - .have does not duplicate itself
+       local=$(git -C fork rev-parse HEAD) &&
+       shared=$(git -C shared rev-parse only-shared) &&
+       cat >expect <<-EOF &&
+       $local refs/heads/master
+       $local refs/remotes/origin/HEAD
+       $local refs/remotes/origin/master
+       $shared .have
+       EOF
+
+       GIT_TRACE_PACKET=$(pwd)/trace git push fork HEAD:foo &&
+       extract_ref_advertisement <trace >refs &&
+       test_cmp expect refs
+'
+
 test_done
index d72e0894840fc384d67339b549a9a6bce7ba03ec..48864b3a9e66ce42abc7e2350a0afb424df1e93f 100644 (file)
@@ -1206,6 +1206,42 @@ char *transport_anonymize_url(const char *url)
        return xstrdup(url);
 }
 
+static void read_alternate_refs(const char *path,
+                               alternate_ref_fn *cb,
+                               void *data)
+{
+       struct child_process cmd = CHILD_PROCESS_INIT;
+       struct strbuf line = STRBUF_INIT;
+       FILE *fh;
+
+       cmd.git_cmd = 1;
+       argv_array_pushf(&cmd.args, "--git-dir=%s", path);
+       argv_array_push(&cmd.args, "for-each-ref");
+       argv_array_push(&cmd.args, "--format=%(objectname) %(refname)");
+       cmd.env = local_repo_env;
+       cmd.out = -1;
+
+       if (start_command(&cmd))
+               return;
+
+       fh = xfdopen(cmd.out, "r");
+       while (strbuf_getline_lf(&line, fh) != EOF) {
+               struct object_id oid;
+
+               if (get_oid_hex(line.buf, &oid) ||
+                   line.buf[GIT_SHA1_HEXSZ] != ' ') {
+                       warning("invalid line while parsing alternate refs: %s",
+                               line.buf);
+                       break;
+               }
+
+               cb(line.buf + GIT_SHA1_HEXSZ + 1, &oid, data);
+       }
+
+       fclose(fh);
+       finish_command(&cmd);
+}
+
 struct alternate_refs_data {
        alternate_ref_fn *fn;
        void *data;
@@ -1214,34 +1250,26 @@ struct alternate_refs_data {
 static int refs_from_alternate_cb(struct alternate_object_database *e,
                                  void *data)
 {
-       char *other;
-       size_t len;
-       struct remote *remote;
-       struct transport *transport;
-       const struct ref *extra;
+       struct strbuf path = STRBUF_INIT;
+       size_t base_len;
        struct alternate_refs_data *cb = data;
 
-       other = real_pathdup(e->path);
-       len = strlen(other);
-
-       while (other[len-1] == '/')
-               other[--len] = '\0';
-       if (len < 8 || memcmp(other + len - 8, "/objects", 8))
+       if (!strbuf_realpath(&path, e->path, 0))
                goto out;
+       if (!strbuf_strip_suffix(&path, "/objects"))
+               goto out;
+       base_len = path.len;
+
        /* Is this a git repository with refs? */
-       memcpy(other + len - 8, "/refs", 6);
-       if (!is_directory(other))
+       strbuf_addstr(&path, "/refs");
+       if (!is_directory(path.buf))
                goto out;
-       other[len - 8] = '\0';
-       remote = remote_get(other);
-       transport = transport_get(remote, other);
-       for (extra = transport_get_remote_refs(transport);
-            extra;
-            extra = extra->next)
-               cb->fn(extra, cb->data);
-       transport_disconnect(transport);
+       strbuf_setlen(&path, base_len);
+
+       read_alternate_refs(path.buf, cb->fn, cb->data);
+
 out:
-       free(other);
+       strbuf_release(&path);
        return 0;
 }
 
index e597b31b38d9d7bb374af203b80e4b6e83abbc3d..bc5571574b67803249567bcee501dfcc342b0b0f 100644 (file)
@@ -255,6 +255,6 @@ int transport_refs_pushed(struct ref *ref);
 void transport_print_push_status(const char *dest, struct ref *refs,
                  int verbose, int porcelain, unsigned int *reject_reasons);
 
-typedef void alternate_ref_fn(const struct ref *, void *);
+typedef void alternate_ref_fn(const char *refname, const struct object_id *oid, void *);
 extern void for_each_alternate_ref(alternate_ref_fn, void *);
 #endif