Merge branch 'ds/find-unique-abbrev-optim'
authorJunio C Hamano <gitster@pobox.com>
Mon, 6 Nov 2017 05:24:25 +0000 (14:24 +0900)
committerJunio C Hamano <gitster@pobox.com>
Mon, 6 Nov 2017 05:24:25 +0000 (14:24 +0900)
Optimize the code to find shortest unique prefix of object names.

* ds/find-unique-abbrev-optim:
sha1_name: minimize OID comparisons during disambiguation
sha1_name: parse less while finding common prefix
sha1_name: unroll len loop in find_unique_abbrev_r()
p4211-line-log.sh: add log --online --raw --parents perf test

sha1_name.c
t/perf/p4211-line-log.sh
index c7c5ab376ccb56b3a4f4533f61293bd543ea8097..2d6b48047b3cfb3ddb3f830389fa03b821c4d721 100644 (file)
@@ -153,7 +153,9 @@ static void unique_in_pack(struct packed_git *p,
        uint32_t num, last, i, first = 0;
        const struct object_id *current = NULL;
 
-       open_pack_index(p);
+       if (open_pack_index(p) || !p->num_objects)
+               return;
+
        num = p->num_objects;
        last = num;
        while (first < last) {
@@ -474,10 +476,104 @@ static unsigned msb(unsigned long val)
        return r;
 }
 
-int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+struct min_abbrev_data {
+       unsigned int init_len;
+       unsigned int cur_len;
+       char *hex;
+       const unsigned char *hash;
+};
+
+static inline char get_hex_char_from_oid(const struct object_id *oid,
+                                        unsigned int pos)
 {
-       int status, exists;
+       static const char hex[] = "0123456789abcdef";
 
+       if ((pos & 1) == 0)
+               return hex[oid->hash[pos >> 1] >> 4];
+       else
+               return hex[oid->hash[pos >> 1] & 0xf];
+}
+
+static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
+{
+       struct min_abbrev_data *mad = cb_data;
+
+       unsigned int i = mad->init_len;
+       while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
+               i++;
+
+       if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
+               mad->cur_len = i + 1;
+
+       return 0;
+}
+
+static void find_abbrev_len_for_pack(struct packed_git *p,
+                                    struct min_abbrev_data *mad)
+{
+       int match = 0;
+       uint32_t num, last, first = 0;
+       struct object_id oid;
+
+       if (open_pack_index(p) || !p->num_objects)
+               return;
+
+       num = p->num_objects;
+       last = num;
+       while (first < last) {
+               uint32_t mid = first + (last - first) / 2;
+               const unsigned char *current;
+               int cmp;
+
+               current = nth_packed_object_sha1(p, mid);
+               cmp = hashcmp(mad->hash, current);
+               if (!cmp) {
+                       match = 1;
+                       first = mid;
+                       break;
+               }
+               if (cmp > 0) {
+                       first = mid + 1;
+                       continue;
+               }
+               last = mid;
+       }
+
+       /*
+        * first is now the position in the packfile where we would insert
+        * mad->hash if it does not exist (or the position of mad->hash if
+        * it does exist). Hence, we consider a maximum of three objects
+        * nearby for the abbreviation length.
+        */
+       mad->init_len = 0;
+       if (!match) {
+               nth_packed_object_oid(&oid, p, first);
+               extend_abbrev_len(&oid, mad);
+       } else if (first < num - 1) {
+               nth_packed_object_oid(&oid, p, first + 1);
+               extend_abbrev_len(&oid, mad);
+       }
+       if (first > 0) {
+               nth_packed_object_oid(&oid, p, first - 1);
+               extend_abbrev_len(&oid, mad);
+       }
+       mad->init_len = mad->cur_len;
+}
+
+static void find_abbrev_len_packed(struct min_abbrev_data *mad)
+{
+       struct packed_git *p;
+
+       prepare_packed_git();
+       for (p = packed_git; p; p = p->next)
+               find_abbrev_len_for_pack(p, mad);
+}
+
+int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+{
+       struct disambiguate_state ds;
+       struct min_abbrev_data mad;
+       struct object_id oid_ret;
        if (len < 0) {
                unsigned long count = approximate_object_count();
                /*
@@ -503,19 +599,26 @@ int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
        sha1_to_hex_r(hex, sha1);
        if (len == GIT_SHA1_HEXSZ || !len)
                return GIT_SHA1_HEXSZ;
-       exists = has_sha1_file(sha1);
-       while (len < GIT_SHA1_HEXSZ) {
-               struct object_id oid_ret;
-               status = get_short_oid(hex, len, &oid_ret, GET_OID_QUIETLY);
-               if (exists
-                   ? !status
-                   : status == SHORT_NAME_NOT_FOUND) {
-                       hex[len] = 0;
-                       return len;
-               }
-               len++;
-       }
-       return len;
+
+       mad.init_len = len;
+       mad.cur_len = len;
+       mad.hex = hex;
+       mad.hash = sha1;
+
+       find_abbrev_len_packed(&mad);
+
+       if (init_object_disambiguation(hex, mad.cur_len, &ds) < 0)
+               return -1;
+
+       ds.fn = extend_abbrev_len;
+       ds.always_call_fn = 1;
+       ds.cb_data = (void *)&mad;
+
+       find_short_object_filename(&ds);
+       (void)finish_object_disambiguation(&ds, &oid_ret);
+
+       hex[mad.cur_len] = 0;
+       return mad.cur_len;
 }
 
 const char *find_unique_abbrev(const unsigned char *sha1, int len)
index b7ff68d4fa3de5559af672795191a0b81deaa4f8..e0ed05907c7277def1e9fcda15bd1e1fe98ae6cc 100755 (executable)
@@ -31,4 +31,8 @@ test_perf 'git log -L (renames on)' '
        git log -M -L 1:"$file" >/dev/null
 '
 
+test_perf 'git log --oneline --raw --parents' '
+       git log --oneline --raw --parents >/dev/null
+'
+
 test_done