From b4e00f7306a160639f047b3421985e8f3d0c6fb1 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 13 Feb 2018 10:39:39 -0800 Subject: [PATCH] packfile: refactor hash search with fanout table Subsequent patches will introduce file formats that make use of a fanout array and a sorted table containing hashes, just like packfiles. Refactor the hash search in packfile.c into its own function, so that those patches can make use of it as well. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- packfile.c | 18 ++++-------------- sha1-lookup.c | 28 ++++++++++++++++++++++++++++ sha1-lookup.h | 22 ++++++++++++++++++++++ 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/packfile.c b/packfile.c index 58bdced3b1..59648a1820 100644 --- a/packfile.c +++ b/packfile.c @@ -1712,7 +1712,8 @@ off_t find_pack_entry_one(const unsigned char *sha1, { const uint32_t *level1_ofs = p->index_data; const unsigned char *index = p->index_data; - unsigned hi, lo, stride; + unsigned stride; + uint32_t result; if (!index) { if (open_pack_index(p)) @@ -1725,8 +1726,6 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 8; } index += 4 * 256; - hi = ntohl(level1_ofs[*sha1]); - lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); if (p->index_version > 1) { stride = 20; } else { @@ -1734,17 +1733,8 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 4; } - while (lo < hi) { - unsigned mi = lo + (hi - lo) / 2; - int cmp = hashcmp(index + mi * stride, sha1); - - if (!cmp) - return nth_packed_object_offset(p, mi); - if (cmp > 0) - hi = mi; - else - lo = mi+1; - } + if (bsearch_hash(sha1, level1_ofs, index, stride, &result)) + return nth_packed_object_offset(p, result); return 0; } diff --git a/sha1-lookup.c b/sha1-lookup.c index 4cf3ebd921..8d0b1db3e2 100644 --- a/sha1-lookup.c +++ b/sha1-lookup.c @@ -99,3 +99,31 @@ int sha1_pos(const unsigned char *sha1, void *table, size_t nr, } while (lo < hi); return -lo-1; } + +int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo, + const unsigned char *table, size_t stride, uint32_t *result) +{ + uint32_t hi, lo; + + hi = ntohl(fanout_nbo[*sha1]); + lo = ((*sha1 == 0x0) ? 0 : ntohl(fanout_nbo[*sha1 - 1])); + + while (lo < hi) { + unsigned mi = lo + (hi - lo) / 2; + int cmp = hashcmp(table + mi * stride, sha1); + + if (!cmp) { + if (result) + *result = mi; + return 1; + } + if (cmp > 0) + hi = mi; + else + lo = mi + 1; + } + + if (result) + *result = lo; + return 0; +} diff --git a/sha1-lookup.h b/sha1-lookup.h index cf5314f402..7678b23b36 100644 --- a/sha1-lookup.h +++ b/sha1-lookup.h @@ -7,4 +7,26 @@ extern int sha1_pos(const unsigned char *sha1, void *table, size_t nr, sha1_access_fn fn); + +/* + * Searches for sha1 in table, using the given fanout table to determine the + * interval to search, then using binary search. Returns 1 if found, 0 if not. + * + * Takes the following parameters: + * + * - sha1: the hash to search for + * - fanout_nbo: a 256-element array of NETWORK-order 32-bit integers; the + * integer at position i represents the number of elements in table whose + * first byte is less than or equal to i + * - table: a sorted list of hashes with optional extra information in between + * - stride: distance between two consecutive elements in table (should be + * GIT_MAX_RAWSZ or greater) + * - result: if not NULL, this function stores the element index of the + * position found (if the search is successful) or the index of the least + * element that is greater than sha1 (if the search is not successful) + * + * This function does not verify the validity of the fanout table. + */ +int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo, + const unsigned char *table, size_t stride, uint32_t *result); #endif -- 2.47.1