midx: use midx in abbreviation calculations
authorDerrick Stolee <stolee@gmail.com>
Thu, 12 Jul 2018 19:39:35 +0000 (15:39 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 20 Jul 2018 18:27:28 +0000 (11:27 -0700)
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
midx.c
midx.h
packfile.c
packfile.h
sha1-name.c
diff --git a/midx.c b/midx.c
index 182535933c33ed9a7874e38957e0d04d64a36c9d..4e014ff6e3b189ebb7c92634703df7233920dbf4 100644 (file)
--- a/midx.c
+++ b/midx.c
@@ -203,6 +203,17 @@ int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32
                            MIDX_HASH_LEN, result);
 }
 
+struct object_id *nth_midxed_object_oid(struct object_id *oid,
+                                       struct multi_pack_index *m,
+                                       uint32_t n)
+{
+       if (n >= m->num_objects)
+               return NULL;
+
+       hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n);
+       return oid;
+}
+
 static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
 {
        const unsigned char *offset_data;
diff --git a/midx.h b/midx.h
index 377838c9ca2724b34b1b85eeb1402b8b402dfc22..1b976df8735da646ede60ecdf57397e32b13ab34 100644 (file)
--- a/midx.h
+++ b/midx.h
@@ -31,6 +31,9 @@ struct multi_pack_index {
 
 struct multi_pack_index *load_multi_pack_index(const char *object_dir);
 int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result);
+struct object_id *nth_midxed_object_oid(struct object_id *oid,
+                                       struct multi_pack_index *m,
+                                       uint32_t n);
 int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m);
 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir);
 
index bc763d91b9b64c8ad6e9edc79e23d3b7b1480b41..c0eb5ac885f5ffe0ccab00b244619b7ce25af45d 100644 (file)
@@ -961,6 +961,12 @@ struct packed_git *get_packed_git(struct repository *r)
        return r->objects->packed_git;
 }
 
+struct multi_pack_index *get_multi_pack_index(struct repository *r)
+{
+       prepare_packed_git(r);
+       return r->objects->multi_pack_index;
+}
+
 struct list_head *get_packed_git_mru(struct repository *r)
 {
        prepare_packed_git(r);
index b0eed44c0b1e35c8001db4e382ec5d1165764e5d..046280caf372ff82c52e2e4eca4844fcb2396ee7 100644 (file)
@@ -45,6 +45,7 @@ extern void install_packed_git(struct repository *r, struct packed_git *pack);
 
 struct packed_git *get_packed_git(struct repository *r);
 struct list_head *get_packed_git_mru(struct repository *r);
+struct multi_pack_index *get_multi_pack_index(struct repository *r);
 
 /*
  * Give a rough count of objects in the repository. This sacrifices accuracy
index 60d9ef3c7e7108c859647656972c171cce4e7d7f..7dc71201e68be99f0ae9f159c1ec7c002f5e9a31 100644 (file)
@@ -12,6 +12,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "repository.h"
+#include "midx.h"
 
 static int get_oid_oneline(const char *, struct object_id *, struct commit_list *);
 
@@ -149,6 +150,32 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char *
        return 1;
 }
 
+static void unique_in_midx(struct multi_pack_index *m,
+                          struct disambiguate_state *ds)
+{
+       uint32_t num, i, first = 0;
+       const struct object_id *current = NULL;
+       num = m->num_objects;
+
+       if (!num)
+               return;
+
+       bsearch_midx(&ds->bin_pfx, m, &first);
+
+       /*
+        * At this point, "first" is the location of the lowest object
+        * with an object name that could match "bin_pfx".  See if we have
+        * 0, 1 or more objects that actually match(es).
+        */
+       for (i = first; i < num && !ds->ambiguous; i++) {
+               struct object_id oid;
+               current = nth_midxed_object_oid(&oid, m, i);
+               if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash))
+                       break;
+               update_candidates(ds, current);
+       }
+}
+
 static void unique_in_pack(struct packed_git *p,
                           struct disambiguate_state *ds)
 {
@@ -177,8 +204,12 @@ static void unique_in_pack(struct packed_git *p,
 
 static void find_short_packed_object(struct disambiguate_state *ds)
 {
+       struct multi_pack_index *m;
        struct packed_git *p;
 
+       for (m = get_multi_pack_index(the_repository); m && !ds->ambiguous;
+            m = m->next)
+               unique_in_midx(m, ds);
        for (p = get_packed_git(the_repository); p && !ds->ambiguous;
             p = p->next)
                unique_in_pack(p, ds);
@@ -527,6 +558,42 @@ static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
        return 0;
 }
 
+static void find_abbrev_len_for_midx(struct multi_pack_index *m,
+                                    struct min_abbrev_data *mad)
+{
+       int match = 0;
+       uint32_t num, first = 0;
+       struct object_id oid;
+       const struct object_id *mad_oid;
+
+       if (!m->num_objects)
+               return;
+
+       num = m->num_objects;
+       mad_oid = mad->oid;
+       match = bsearch_midx(mad_oid, m, &first);
+
+       /*
+        * first is now the position in the packfile where we would insert
+        * mad->hash if it does not exist (or the position of mad->hash if
+        * it does exist). Hence, we consider a maximum of two objects
+        * nearby for the abbreviation length.
+        */
+       mad->init_len = 0;
+       if (!match) {
+               if (nth_midxed_object_oid(&oid, m, first))
+                       extend_abbrev_len(&oid, mad);
+       } else if (first < num - 1) {
+               if (nth_midxed_object_oid(&oid, m, first + 1))
+                       extend_abbrev_len(&oid, mad);
+       }
+       if (first > 0) {
+               if (nth_midxed_object_oid(&oid, m, first - 1))
+                       extend_abbrev_len(&oid, mad);
+       }
+       mad->init_len = mad->cur_len;
+}
+
 static void find_abbrev_len_for_pack(struct packed_git *p,
                                     struct min_abbrev_data *mad)
 {
@@ -565,8 +632,11 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
 
 static void find_abbrev_len_packed(struct min_abbrev_data *mad)
 {
+       struct multi_pack_index *m;
        struct packed_git *p;
 
+       for (m = get_multi_pack_index(the_repository); m; m = m->next)
+               find_abbrev_len_for_midx(m, mad);
        for (p = get_packed_git(the_repository); p; p = p->next)
                find_abbrev_len_for_pack(p, mad);
 }