Merge branch 'ds/find-unique-abbrev-optim'
authorJunio C Hamano <gitster@pobox.com>
Mon, 6 Nov 2017 05:24:25 +0000 (14:24 +0900)
committerJunio C Hamano <gitster@pobox.com>
Mon, 6 Nov 2017 05:24:25 +0000 (14:24 +0900)
Optimize the code to find shortest unique prefix of object names.

* ds/find-unique-abbrev-optim:
sha1_name: minimize OID comparisons during disambiguation
sha1_name: parse less while finding common prefix
sha1_name: unroll len loop in find_unique_abbrev_r()
p4211-line-log.sh: add log --online --raw --parents perf test

1  2 
sha1_name.c
diff --combined sha1_name.c
index c7c5ab376ccb56b3a4f4533f61293bd543ea8097,05a635911be3c6b27cb3c572500483c47cee9b56..2d6b48047b3cfb3ddb3f830389fa03b821c4d721
@@@ -153,11 -153,13 +153,13 @@@ static void unique_in_pack(struct packe
        uint32_t num, last, i, first = 0;
        const struct object_id *current = NULL;
  
-       open_pack_index(p);
+       if (open_pack_index(p) || !p->num_objects)
+               return;
        num = p->num_objects;
        last = num;
        while (first < last) {
 -              uint32_t mid = (first + last) / 2;
 +              uint32_t mid = first + (last - first) / 2;
                const unsigned char *current;
                int cmp;
  
@@@ -474,10 -476,104 +476,104 @@@ static unsigned msb(unsigned long val
        return r;
  }
  
- int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+ struct min_abbrev_data {
+       unsigned int init_len;
+       unsigned int cur_len;
+       char *hex;
+       const unsigned char *hash;
+ };
+ static inline char get_hex_char_from_oid(const struct object_id *oid,
+                                        unsigned int pos)
+ {
+       static const char hex[] = "0123456789abcdef";
+       if ((pos & 1) == 0)
+               return hex[oid->hash[pos >> 1] >> 4];
+       else
+               return hex[oid->hash[pos >> 1] & 0xf];
+ }
+ static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
+ {
+       struct min_abbrev_data *mad = cb_data;
+       unsigned int i = mad->init_len;
+       while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
+               i++;
+       if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
+               mad->cur_len = i + 1;
+       return 0;
+ }
+ static void find_abbrev_len_for_pack(struct packed_git *p,
+                                    struct min_abbrev_data *mad)
  {
-       int status, exists;
+       int match = 0;
+       uint32_t num, last, first = 0;
+       struct object_id oid;
+       if (open_pack_index(p) || !p->num_objects)
+               return;
+       num = p->num_objects;
+       last = num;
+       while (first < last) {
+               uint32_t mid = first + (last - first) / 2;
+               const unsigned char *current;
+               int cmp;
  
+               current = nth_packed_object_sha1(p, mid);
+               cmp = hashcmp(mad->hash, current);
+               if (!cmp) {
+                       match = 1;
+                       first = mid;
+                       break;
+               }
+               if (cmp > 0) {
+                       first = mid + 1;
+                       continue;
+               }
+               last = mid;
+       }
+       /*
+        * first is now the position in the packfile where we would insert
+        * mad->hash if it does not exist (or the position of mad->hash if
+        * it does exist). Hence, we consider a maximum of three objects
+        * nearby for the abbreviation length.
+        */
+       mad->init_len = 0;
+       if (!match) {
+               nth_packed_object_oid(&oid, p, first);
+               extend_abbrev_len(&oid, mad);
+       } else if (first < num - 1) {
+               nth_packed_object_oid(&oid, p, first + 1);
+               extend_abbrev_len(&oid, mad);
+       }
+       if (first > 0) {
+               nth_packed_object_oid(&oid, p, first - 1);
+               extend_abbrev_len(&oid, mad);
+       }
+       mad->init_len = mad->cur_len;
+ }
+ static void find_abbrev_len_packed(struct min_abbrev_data *mad)
+ {
+       struct packed_git *p;
+       prepare_packed_git();
+       for (p = packed_git; p; p = p->next)
+               find_abbrev_len_for_pack(p, mad);
+ }
+ int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+ {
+       struct disambiguate_state ds;
+       struct min_abbrev_data mad;
+       struct object_id oid_ret;
        if (len < 0) {
                unsigned long count = approximate_object_count();
                /*
        sha1_to_hex_r(hex, sha1);
        if (len == GIT_SHA1_HEXSZ || !len)
                return GIT_SHA1_HEXSZ;
-       exists = has_sha1_file(sha1);
-       while (len < GIT_SHA1_HEXSZ) {
-               struct object_id oid_ret;
-               status = get_short_oid(hex, len, &oid_ret, GET_OID_QUIETLY);
-               if (exists
-                   ? !status
-                   : status == SHORT_NAME_NOT_FOUND) {
-                       hex[len] = 0;
-                       return len;
-               }
-               len++;
-       }
-       return len;
+       mad.init_len = len;
+       mad.cur_len = len;
+       mad.hex = hex;
+       mad.hash = sha1;
+       find_abbrev_len_packed(&mad);
+       if (init_object_disambiguation(hex, mad.cur_len, &ds) < 0)
+               return -1;
+       ds.fn = extend_abbrev_len;
+       ds.always_call_fn = 1;
+       ds.cb_data = (void *)&mad;
+       find_short_object_filename(&ds);
+       (void)finish_object_disambiguation(&ds, &oid_ret);
+       hex[mad.cur_len] = 0;
+       return mad.cur_len;
  }
  
  const char *find_unique_abbrev(const unsigned char *sha1, int len)