sha1_name: unroll len loop in find_unique_abbrev_r()
authorDerrick Stolee <dstolee@microsoft.com>
Sun, 8 Oct 2017 18:49:40 +0000 (14:49 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 13 Oct 2017 00:26:03 +0000 (09:26 +0900)
Unroll the while loop inside find_unique_abbrev_r to avoid iterating
through all loose objects and packfiles multiple times when the short
name is longer than the predicted length.

Instead, inspect each object that collides with the estimated
abbreviation to find the longest common prefix.

The focus of this change is to refactor the existing method in a way
that clearly does not change the current behavior. In some cases, the
new method is slower than the previous method. Later changes will
correct all performance loss.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
sha1_name.c
index 134ac9742f9eed3dc6e587875bdb9aa77b7a8307..4b6c08975d086a11cb93194345810955489008c8 100644 (file)
@@ -474,10 +474,32 @@ static unsigned msb(unsigned long val)
        return r;
 }
 
-int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+struct min_abbrev_data {
+       unsigned int init_len;
+       unsigned int cur_len;
+       char *hex;
+};
+
+static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
 {
-       int status, exists;
+       struct min_abbrev_data *mad = cb_data;
+
+       char *hex = oid_to_hex(oid);
+       unsigned int i = mad->init_len;
+       while (mad->hex[i] && mad->hex[i] == hex[i])
+               i++;
+
+       if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
+               mad->cur_len = i + 1;
+
+       return 0;
+}
 
+int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+{
+       struct disambiguate_state ds;
+       struct min_abbrev_data mad;
+       struct object_id oid_ret;
        if (len < 0) {
                unsigned long count = approximate_object_count();
                /*
@@ -503,19 +525,24 @@ int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
        sha1_to_hex_r(hex, sha1);
        if (len == GIT_SHA1_HEXSZ || !len)
                return GIT_SHA1_HEXSZ;
-       exists = has_sha1_file(sha1);
-       while (len < GIT_SHA1_HEXSZ) {
-               struct object_id oid_ret;
-               status = get_short_oid(hex, len, &oid_ret, GET_OID_QUIETLY);
-               if (exists
-                   ? !status
-                   : status == SHORT_NAME_NOT_FOUND) {
-                       hex[len] = 0;
-                       return len;
-               }
-               len++;
-       }
-       return len;
+
+       if (init_object_disambiguation(hex, len, &ds) < 0)
+               return -1;
+
+       mad.init_len = len;
+       mad.cur_len = len;
+       mad.hex = hex;
+
+       ds.fn = extend_abbrev_len;
+       ds.always_call_fn = 1;
+       ds.cb_data = (void *)&mad;
+
+       find_short_object_filename(&ds);
+       find_short_packed_object(&ds);
+       (void)finish_object_disambiguation(&ds, &oid_ret);
+
+       hex[mad.cur_len] = 0;
+       return mad.cur_len;
 }
 
 const char *find_unique_abbrev(const unsigned char *sha1, int len)