Merge branch 'ds/bsearch-hash'
authorJunio C Hamano <gitster@pobox.com>
Tue, 10 Apr 2018 07:28:22 +0000 (16:28 +0900)
committerJunio C Hamano <gitster@pobox.com>
Tue, 10 Apr 2018 07:28:22 +0000 (16:28 +0900)
Code to find the length to uniquely abbreviate object names based
on packfile content, which is a relatively recent addtion, has been
optimized to use the same fan-out table.

* ds/bsearch-hash:
sha1_name: use bsearch_pack() in unique_in_pack()
sha1_name: use bsearch_pack() for abbreviations
packfile: define and use bsearch_pack()
sha1_name: convert struct min_abbrev_data to object_id

packfile.c
packfile.h
sha1_name.c
index f26395ecabb39470f0ddfd127a417439a09de432..69d3afda6c5b3fd15cab7a32eb50028bba0b6c67 100644 (file)
@@ -1654,6 +1654,29 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
        return data;
 }
 
+int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)
+{
+       const unsigned char *index_fanout = p->index_data;
+       const unsigned char *index_lookup;
+       int index_lookup_width;
+
+       if (!index_fanout)
+               BUG("bsearch_pack called without a valid pack-index");
+
+       index_lookup = index_fanout + 4 * 256;
+       if (p->index_version == 1) {
+               index_lookup_width = 24;
+               index_lookup += 4;
+       } else {
+               index_lookup_width = 20;
+               index_fanout += 8;
+               index_lookup += 8;
+       }
+
+       return bsearch_hash(oid->hash, (const uint32_t*)index_fanout,
+                           index_lookup, index_lookup_width, result);
+}
+
 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
                                            uint32_t n)
 {
@@ -1720,30 +1743,17 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
 off_t find_pack_entry_one(const unsigned char *sha1,
                                  struct packed_git *p)
 {
-       const uint32_t *level1_ofs = p->index_data;
        const unsigned char *index = p->index_data;
-       unsigned stride;
+       struct object_id oid;
        uint32_t result;
 
        if (!index) {
                if (open_pack_index(p))
                        return 0;
-               level1_ofs = p->index_data;
-               index = p->index_data;
-       }
-       if (p->index_version > 1) {
-               level1_ofs += 2;
-               index += 8;
-       }
-       index += 4 * 256;
-       if (p->index_version > 1) {
-               stride = 20;
-       } else {
-               stride = 24;
-               index += 4;
        }
 
-       if (bsearch_hash(sha1, level1_ofs, index, stride, &result))
+       hashcpy(oid.hash, sha1);
+       if (bsearch_pack(&oid, p, &result))
                return nth_packed_object_offset(p, result);
        return 0;
 }
index a7fca598d672b73010a5fb99e4507da4634002ff..ec08cb2bb039ef005b90e09bcf35e90c5cbfb54b 100644 (file)
@@ -78,6 +78,14 @@ extern struct packed_git *add_packed_git(const char *path, size_t path_len, int
  */
 extern void check_pack_index_ptr(const struct packed_git *p, const void *ptr);
 
+/*
+ * Perform binary search on a pack-index for a given oid. Packfile is expected to
+ * have a valid pack-index.
+ *
+ * See 'bsearch_hash' for more information.
+ */
+int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result);
+
 /*
  * Return the SHA-1 of the nth object within the specified packfile.
  * Open the index if it is not already open.  The return value points
index 39e911c8bab21535ea6aef88c612cd282788e495..0185c6081a16d25b2ffbc220cc028ca2bfc22d32 100644 (file)
@@ -150,31 +150,14 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char *
 static void unique_in_pack(struct packed_git *p,
                           struct disambiguate_state *ds)
 {
-       uint32_t num, last, i, first = 0;
+       uint32_t num, i, first = 0;
        const struct object_id *current = NULL;
 
        if (open_pack_index(p) || !p->num_objects)
                return;
 
        num = p->num_objects;
-       last = num;
-       while (first < last) {
-               uint32_t mid = first + (last - first) / 2;
-               const unsigned char *current;
-               int cmp;
-
-               current = nth_packed_object_sha1(p, mid);
-               cmp = hashcmp(ds->bin_pfx.hash, current);
-               if (!cmp) {
-                       first = mid;
-                       break;
-               }
-               if (cmp > 0) {
-                       first = mid+1;
-                       continue;
-               }
-               last = mid;
-       }
+       bsearch_pack(&ds->bin_pfx, p, &first);
 
        /*
         * At this point, "first" is the location of the lowest object
@@ -480,7 +463,7 @@ struct min_abbrev_data {
        unsigned int init_len;
        unsigned int cur_len;
        char *hex;
-       const unsigned char *hash;
+       const struct object_id *oid;
 };
 
 static inline char get_hex_char_from_oid(const struct object_id *oid,
@@ -512,32 +495,16 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
                                     struct min_abbrev_data *mad)
 {
        int match = 0;
-       uint32_t num, last, first = 0;
+       uint32_t num, first = 0;
        struct object_id oid;
+       const struct object_id *mad_oid;
 
        if (open_pack_index(p) || !p->num_objects)
                return;
 
        num = p->num_objects;
-       last = num;
-       while (first < last) {
-               uint32_t mid = first + (last - first) / 2;
-               const unsigned char *current;
-               int cmp;
-
-               current = nth_packed_object_sha1(p, mid);
-               cmp = hashcmp(mad->hash, current);
-               if (!cmp) {
-                       match = 1;
-                       first = mid;
-                       break;
-               }
-               if (cmp > 0) {
-                       first = mid + 1;
-                       continue;
-               }
-               last = mid;
-       }
+       mad_oid = mad->oid;
+       match = bsearch_pack(mad_oid, p, &first);
 
        /*
         * first is now the position in the packfile where we would insert
@@ -603,7 +570,7 @@ int find_unique_abbrev_r(char *hex, const struct object_id *oid, int len)
        mad.init_len = len;
        mad.cur_len = len;
        mad.hex = hex;
-       mad.hash = oid->hash;
+       mad.oid = oid;
 
        find_abbrev_len_packed(&mad);