From: Junio C Hamano Date: Fri, 19 Jul 2019 18:30:20 +0000 (-0700) Subject: Merge branch 'br/blame-ignore' X-Git-Tag: v2.23.0-rc0~39 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/209f0755934a0c9b448408f9b7c9849c15041ecc?hp=-c Merge branch 'br/blame-ignore' "git blame" learned to "ignore" commits in the history, whose effects (as well as their presence) get ignored. * br/blame-ignore: t8014: remove unnecessary braces blame: drop some unused function parameters blame: add a test to cover blame_coalesce() blame: use the fingerprint heuristic to match ignored lines blame: add a fingerprint heuristic to match ignored lines blame: optionally track line fingerprints during fill_blame_origin() blame: add config options for the output of ignored or unblamable lines blame: add the ability to ignore commits and their changes blame: use a helper function in blame_chunk() Move oidset_parse_file() to oidset.c fsck: rename and touch up init_skiplist() --- 209f0755934a0c9b448408f9b7c9849c15041ecc diff --combined blame.c index 145eaf2faf,acb1bf7f7a..7f04580ad5 --- a/blame.c +++ b/blame.c @@@ -99,7 -99,7 +99,7 @@@ static void verify_working_tree_path(st for (parents = work_tree->parents; parents; parents = parents->next) { const struct object_id *commit_oid = &parents->item->object.oid; struct object_id blob_oid; - unsigned mode; + unsigned short mode; if (!get_tree_entry(commit_oid, path, &blob_oid, &mode) && oid_object_info(r, &blob_oid, NULL) == OBJ_BLOB) @@@ -188,7 -188,7 +188,7 @@@ static struct commit *fake_working_tree unsigned mode; struct strbuf msg = STRBUF_INIT; - read_index(r->index); + repo_read_index(r); time(&now); commit = alloc_commit_node(r); commit->object.parsed = 1; @@@ -204,8 -204,7 +204,8 @@@ origin = make_origin(commit, path); - ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0); + ident = fmt_ident("Not Committed Yet", "not.committed.yet", + WANT_BLANK_IDENT, NULL, 0); strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n"); for (parent = commit->parents; parent; parent = parent->next) strbuf_addf(&msg, "parent %s\n", @@@ -271,7 -270,7 +271,7 @@@ * want to run "diff-index --cached". */ discard_index(r->index); - read_index(r->index); + repo_read_index(r); len = strlen(path); if (!mode) { @@@ -311,12 -310,707 +311,707 @@@ static int diff_hunks(mmfile_t *file_a return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb); } + static const char *get_next_line(const char *start, const char *end) + { + const char *nl = memchr(start, '\n', end - start); + + return nl ? nl + 1 : end; + } + + static int find_line_starts(int **line_starts, const char *buf, + unsigned long len) + { + const char *end = buf + len; + const char *p; + int *lineno; + int num = 0; + + for (p = buf; p < end; p = get_next_line(p, end)) + num++; + + ALLOC_ARRAY(*line_starts, num + 1); + lineno = *line_starts; + + for (p = buf; p < end; p = get_next_line(p, end)) + *lineno++ = p - buf; + + *lineno = len; + + return num; + } + + struct fingerprint_entry; + + /* A fingerprint is intended to loosely represent a string, such that two + * fingerprints can be quickly compared to give an indication of the similarity + * of the strings that they represent. + * + * A fingerprint is represented as a multiset of the lower-cased byte pairs in + * the string that it represents. Whitespace is added at each end of the + * string. Whitespace pairs are ignored. Whitespace is converted to '\0'. + * For example, the string "Darth Radar" will be converted to the following + * fingerprint: + * {"\0d", "da", "da", "ar", "ar", "rt", "th", "h\0", "\0r", "ra", "ad", "r\0"} + * + * The similarity between two fingerprints is the size of the intersection of + * their multisets, including repeated elements. See fingerprint_similarity for + * examples. + * + * For ease of implementation, the fingerprint is implemented as a map + * of byte pairs to the count of that byte pair in the string, instead of + * allowing repeated elements in a set. + */ + struct fingerprint { + struct hashmap map; + /* As we know the maximum number of entries in advance, it's + * convenient to store the entries in a single array instead of having + * the hashmap manage the memory. + */ + struct fingerprint_entry *entries; + }; + + /* A byte pair in a fingerprint. Stores the number of times the byte pair + * occurs in the string that the fingerprint represents. + */ + struct fingerprint_entry { + /* The hashmap entry - the hash represents the byte pair in its + * entirety so we don't need to store the byte pair separately. + */ + struct hashmap_entry entry; + /* The number of times the byte pair occurs in the string that the + * fingerprint represents. + */ + int count; + }; + + /* See `struct fingerprint` for an explanation of what a fingerprint is. + * \param result the fingerprint of the string is stored here. This must be + * freed later using free_fingerprint. + * \param line_begin the start of the string + * \param line_end the end of the string + */ + static void get_fingerprint(struct fingerprint *result, + const char *line_begin, + const char *line_end) + { + unsigned int hash, c0 = 0, c1; + const char *p; + int max_map_entry_count = 1 + line_end - line_begin; + struct fingerprint_entry *entry = xcalloc(max_map_entry_count, + sizeof(struct fingerprint_entry)); + struct fingerprint_entry *found_entry; + + hashmap_init(&result->map, NULL, NULL, max_map_entry_count); + result->entries = entry; + for (p = line_begin; p <= line_end; ++p, c0 = c1) { + /* Always terminate the string with whitespace. + * Normalise whitespace to 0, and normalise letters to + * lower case. This won't work for multibyte characters but at + * worst will match some unrelated characters. + */ + if ((p == line_end) || isspace(*p)) + c1 = 0; + else + c1 = tolower(*p); + hash = c0 | (c1 << 8); + /* Ignore whitespace pairs */ + if (hash == 0) + continue; + hashmap_entry_init(entry, hash); + + found_entry = hashmap_get(&result->map, entry, NULL); + if (found_entry) { + found_entry->count += 1; + } else { + entry->count = 1; + hashmap_add(&result->map, entry); + ++entry; + } + } + } + + static void free_fingerprint(struct fingerprint *f) + { + hashmap_free(&f->map, 0); + free(f->entries); + } + + /* Calculates the similarity between two fingerprints as the size of the + * intersection of their multisets, including repeated elements. See + * `struct fingerprint` for an explanation of the fingerprint representation. + * The similarity between "cat mat" and "father rather" is 2 because "at" is + * present twice in both strings while the similarity between "tim" and "mit" + * is 0. + */ + static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b) + { + int intersection = 0; + struct hashmap_iter iter; + const struct fingerprint_entry *entry_a, *entry_b; + + hashmap_iter_init(&b->map, &iter); + + while ((entry_b = hashmap_iter_next(&iter))) { + if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) { + intersection += entry_a->count < entry_b->count ? + entry_a->count : entry_b->count; + } + } + return intersection; + } + + /* Subtracts byte-pair elements in B from A, modifying A in place. + */ + static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b) + { + struct hashmap_iter iter; + struct fingerprint_entry *entry_a; + const struct fingerprint_entry *entry_b; + + hashmap_iter_init(&b->map, &iter); + + while ((entry_b = hashmap_iter_next(&iter))) { + if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) { + if (entry_a->count <= entry_b->count) + hashmap_remove(&a->map, entry_b, NULL); + else + entry_a->count -= entry_b->count; + } + } + } + + /* Calculate fingerprints for a series of lines. + * Puts the fingerprints in the fingerprints array, which must have been + * preallocated to allow storing line_count elements. + */ + static void get_line_fingerprints(struct fingerprint *fingerprints, + const char *content, const int *line_starts, + long first_line, long line_count) + { + int i; + const char *linestart, *lineend; + + line_starts += first_line; + for (i = 0; i < line_count; ++i) { + linestart = content + line_starts[i]; + lineend = content + line_starts[i + 1]; + get_fingerprint(fingerprints + i, linestart, lineend); + } + } + + static void free_line_fingerprints(struct fingerprint *fingerprints, + int nr_fingerprints) + { + int i; + + for (i = 0; i < nr_fingerprints; i++) + free_fingerprint(&fingerprints[i]); + } + + /* This contains the data necessary to linearly map a line number in one half + * of a diff chunk to the line in the other half of the diff chunk that is + * closest in terms of its position as a fraction of the length of the chunk. + */ + struct line_number_mapping { + int destination_start, destination_length, + source_start, source_length; + }; + + /* Given a line number in one range, offset and scale it to map it onto the + * other range. + * Essentially this mapping is a simple linear equation but the calculation is + * more complicated to allow performing it with integer operations. + * Another complication is that if a line could map onto many lines in the + * destination range then we want to choose the line at the center of those + * possibilities. + * Example: if the chunk is 2 lines long in A and 10 lines long in B then the + * first 5 lines in B will map onto the first line in the A chunk, while the + * last 5 lines will all map onto the second line in the A chunk. + * Example: if the chunk is 10 lines long in A and 2 lines long in B then line + * 0 in B will map onto line 2 in A, and line 1 in B will map onto line 7 in A. + */ + static int map_line_number(int line_number, + const struct line_number_mapping *mapping) + { + return ((line_number - mapping->source_start) * 2 + 1) * + mapping->destination_length / + (mapping->source_length * 2) + + mapping->destination_start; + } + + /* Get a pointer to the element storing the similarity between a line in A + * and a line in B. + * + * The similarities are stored in a 2-dimensional array. Each "row" in the + * array contains the similarities for a line in B. The similarities stored in + * a row are the similarities between the line in B and the nearby lines in A. + * To keep the length of each row the same, it is padded out with values of -1 + * where the search range extends beyond the lines in A. + * For example, if max_search_distance_a is 2 and the two sides of a diff chunk + * look like this: + * a | m + * b | n + * c | o + * d | p + * e | q + * Then the similarity array will contain: + * [-1, -1, am, bm, cm, + * -1, an, bn, cn, dn, + * ao, bo, co, do, eo, + * bp, cp, dp, ep, -1, + * cq, dq, eq, -1, -1] + * Where similarities are denoted either by -1 for invalid, or the + * concatenation of the two lines in the diff being compared. + * + * \param similarities array of similarities between lines in A and B + * \param line_a the index of the line in A, in the same frame of reference as + * closest_line_a. + * \param local_line_b the index of the line in B, relative to the first line + * in B that similarities represents. + * \param closest_line_a the index of the line in A that is deemed to be + * closest to local_line_b. This must be in the same + * frame of reference as line_a. This value defines + * where similarities is centered for the line in B. + * \param max_search_distance_a maximum distance in lines from the closest line + * in A for other lines in A for which + * similarities may be calculated. + */ + static int *get_similarity(int *similarities, + int line_a, int local_line_b, + int closest_line_a, int max_search_distance_a) + { + assert(abs(line_a - closest_line_a) <= + max_search_distance_a); + return similarities + line_a - closest_line_a + + max_search_distance_a + + local_line_b * (max_search_distance_a * 2 + 1); + } + + #define CERTAIN_NOTHING_MATCHES -2 + #define CERTAINTY_NOT_CALCULATED -1 + + /* Given a line in B, first calculate its similarities with nearby lines in A + * if not already calculated, then identify the most similar and second most + * similar lines. The "certainty" is calculated based on those two + * similarities. + * + * \param start_a the index of the first line of the chunk in A + * \param length_a the length in lines of the chunk in A + * \param local_line_b the index of the line in B, relative to the first line + * in the chunk. + * \param fingerprints_a array of fingerprints for the chunk in A + * \param fingerprints_b array of fingerprints for the chunk in B + * \param similarities 2-dimensional array of similarities between lines in A + * and B. See get_similarity() for more details. + * \param certainties array of values indicating how strongly a line in B is + * matched with some line in A. + * \param second_best_result array of absolute indices in A for the second + * closest match of a line in B. + * \param result array of absolute indices in A for the closest match of a line + * in B. + * \param max_search_distance_a maximum distance in lines from the closest line + * in A for other lines in A for which + * similarities may be calculated. + * \param map_line_number_in_b_to_a parameter to map_line_number(). + */ + static void find_best_line_matches( + int start_a, + int length_a, + int start_b, + int local_line_b, + struct fingerprint *fingerprints_a, + struct fingerprint *fingerprints_b, + int *similarities, + int *certainties, + int *second_best_result, + int *result, + const int max_search_distance_a, + const struct line_number_mapping *map_line_number_in_b_to_a) + { + + int i, search_start, search_end, closest_local_line_a, *similarity, + best_similarity = 0, second_best_similarity = 0, + best_similarity_index = 0, second_best_similarity_index = 0; + + /* certainty has already been calculated so no need to redo the work */ + if (certainties[local_line_b] != CERTAINTY_NOT_CALCULATED) + return; + + closest_local_line_a = map_line_number( + local_line_b + start_b, map_line_number_in_b_to_a) - start_a; + + search_start = closest_local_line_a - max_search_distance_a; + if (search_start < 0) + search_start = 0; + + search_end = closest_local_line_a + max_search_distance_a + 1; + if (search_end > length_a) + search_end = length_a; + + for (i = search_start; i < search_end; ++i) { + similarity = get_similarity(similarities, + i, local_line_b, + closest_local_line_a, + max_search_distance_a); + if (*similarity == -1) { + /* This value will never exceed 10 but assert just in + * case + */ + assert(abs(i - closest_local_line_a) < 1000); + /* scale the similarity by (1000 - distance from + * closest line) to act as a tie break between lines + * that otherwise are equally similar. + */ + *similarity = fingerprint_similarity( + fingerprints_b + local_line_b, + fingerprints_a + i) * + (1000 - abs(i - closest_local_line_a)); + } + if (*similarity > best_similarity) { + second_best_similarity = best_similarity; + second_best_similarity_index = best_similarity_index; + best_similarity = *similarity; + best_similarity_index = i; + } else if (*similarity > second_best_similarity) { + second_best_similarity = *similarity; + second_best_similarity_index = i; + } + } + + if (best_similarity == 0) { + /* this line definitely doesn't match with anything. Mark it + * with this special value so it doesn't get invalidated and + * won't be recalculated. + */ + certainties[local_line_b] = CERTAIN_NOTHING_MATCHES; + result[local_line_b] = -1; + } else { + /* Calculate the certainty with which this line matches. + * If the line matches well with two lines then that reduces + * the certainty. However we still want to prioritise matching + * a line that matches very well with two lines over matching a + * line that matches poorly with one line, hence doubling + * best_similarity. + * This means that if we have + * line X that matches only one line with a score of 3, + * line Y that matches two lines equally with a score of 5, + * and line Z that matches only one line with a score or 2, + * then the lines in order of certainty are X, Y, Z. + */ + certainties[local_line_b] = best_similarity * 2 - + second_best_similarity; + + /* We keep both the best and second best results to allow us to + * check at a later stage of the matching process whether the + * result needs to be invalidated. + */ + result[local_line_b] = start_a + best_similarity_index; + second_best_result[local_line_b] = + start_a + second_best_similarity_index; + } + } + + /* + * This finds the line that we can match with the most confidence, and + * uses it as a partition. It then calls itself on the lines on either side of + * that partition. In this way we avoid lines appearing out of order, and + * retain a sensible line ordering. + * \param start_a index of the first line in A with which lines in B may be + * compared. + * \param start_b index of the first line in B for which matching should be + * done. + * \param length_a number of lines in A with which lines in B may be compared. + * \param length_b number of lines in B for which matching should be done. + * \param fingerprints_a mutable array of fingerprints in A. The first element + * corresponds to the line at start_a. + * \param fingerprints_b array of fingerprints in B. The first element + * corresponds to the line at start_b. + * \param similarities 2-dimensional array of similarities between lines in A + * and B. See get_similarity() for more details. + * \param certainties array of values indicating how strongly a line in B is + * matched with some line in A. + * \param second_best_result array of absolute indices in A for the second + * closest match of a line in B. + * \param result array of absolute indices in A for the closest match of a line + * in B. + * \param max_search_distance_a maximum distance in lines from the closest line + * in A for other lines in A for which + * similarities may be calculated. + * \param max_search_distance_b an upper bound on the greatest possible + * distance between lines in B such that they will + * both be compared with the same line in A + * according to max_search_distance_a. + * \param map_line_number_in_b_to_a parameter to map_line_number(). + */ + static void fuzzy_find_matching_lines_recurse( + int start_a, int start_b, + int length_a, int length_b, + struct fingerprint *fingerprints_a, + struct fingerprint *fingerprints_b, + int *similarities, + int *certainties, + int *second_best_result, + int *result, + int max_search_distance_a, + int max_search_distance_b, + const struct line_number_mapping *map_line_number_in_b_to_a) + { + int i, invalidate_min, invalidate_max, offset_b, + second_half_start_a, second_half_start_b, + second_half_length_a, second_half_length_b, + most_certain_line_a, most_certain_local_line_b = -1, + most_certain_line_certainty = -1, + closest_local_line_a; + + for (i = 0; i < length_b; ++i) { + find_best_line_matches(start_a, + length_a, + start_b, + i, + fingerprints_a, + fingerprints_b, + similarities, + certainties, + second_best_result, + result, + max_search_distance_a, + map_line_number_in_b_to_a); + + if (certainties[i] > most_certain_line_certainty) { + most_certain_line_certainty = certainties[i]; + most_certain_local_line_b = i; + } + } + + /* No matches. */ + if (most_certain_local_line_b == -1) + return; + + most_certain_line_a = result[most_certain_local_line_b]; + + /* + * Subtract the most certain line's fingerprint in B from the matched + * fingerprint in A. This means that other lines in B can't also match + * the same parts of the line in A. + */ + fingerprint_subtract(fingerprints_a + most_certain_line_a - start_a, + fingerprints_b + most_certain_local_line_b); + + /* Invalidate results that may be affected by the choice of most + * certain line. + */ + invalidate_min = most_certain_local_line_b - max_search_distance_b; + invalidate_max = most_certain_local_line_b + max_search_distance_b + 1; + if (invalidate_min < 0) + invalidate_min = 0; + if (invalidate_max > length_b) + invalidate_max = length_b; + + /* As the fingerprint in A has changed, discard previously calculated + * similarity values with that fingerprint. + */ + for (i = invalidate_min; i < invalidate_max; ++i) { + closest_local_line_a = map_line_number( + i + start_b, map_line_number_in_b_to_a) - start_a; + + /* Check that the lines in A and B are close enough that there + * is a similarity value for them. + */ + if (abs(most_certain_line_a - start_a - closest_local_line_a) > + max_search_distance_a) { + continue; + } + + *get_similarity(similarities, most_certain_line_a - start_a, + i, closest_local_line_a, + max_search_distance_a) = -1; + } + + /* More invalidating of results that may be affected by the choice of + * most certain line. + * Discard the matches for lines in B that are currently matched with a + * line in A such that their ordering contradicts the ordering imposed + * by the choice of most certain line. + */ + for (i = most_certain_local_line_b - 1; i >= invalidate_min; --i) { + /* In this loop we discard results for lines in B that are + * before most-certain-line-B but are matched with a line in A + * that is after most-certain-line-A. + */ + if (certainties[i] >= 0 && + (result[i] >= most_certain_line_a || + second_best_result[i] >= most_certain_line_a)) { + certainties[i] = CERTAINTY_NOT_CALCULATED; + } + } + for (i = most_certain_local_line_b + 1; i < invalidate_max; ++i) { + /* In this loop we discard results for lines in B that are + * after most-certain-line-B but are matched with a line in A + * that is before most-certain-line-A. + */ + if (certainties[i] >= 0 && + (result[i] <= most_certain_line_a || + second_best_result[i] <= most_certain_line_a)) { + certainties[i] = CERTAINTY_NOT_CALCULATED; + } + } + + /* Repeat the matching process for lines before the most certain line. + */ + if (most_certain_local_line_b > 0) { + fuzzy_find_matching_lines_recurse( + start_a, start_b, + most_certain_line_a + 1 - start_a, + most_certain_local_line_b, + fingerprints_a, fingerprints_b, similarities, + certainties, second_best_result, result, + max_search_distance_a, + max_search_distance_b, + map_line_number_in_b_to_a); + } + /* Repeat the matching process for lines after the most certain line. + */ + if (most_certain_local_line_b + 1 < length_b) { + second_half_start_a = most_certain_line_a; + offset_b = most_certain_local_line_b + 1; + second_half_start_b = start_b + offset_b; + second_half_length_a = + length_a + start_a - second_half_start_a; + second_half_length_b = + length_b + start_b - second_half_start_b; + fuzzy_find_matching_lines_recurse( + second_half_start_a, second_half_start_b, + second_half_length_a, second_half_length_b, + fingerprints_a + second_half_start_a - start_a, + fingerprints_b + offset_b, + similarities + + offset_b * (max_search_distance_a * 2 + 1), + certainties + offset_b, + second_best_result + offset_b, result + offset_b, + max_search_distance_a, + max_search_distance_b, + map_line_number_in_b_to_a); + } + } + + /* Find the lines in the parent line range that most closely match the lines in + * the target line range. This is accomplished by matching fingerprints in each + * blame_origin, and choosing the best matches that preserve the line ordering. + * See struct fingerprint for details of fingerprint matching, and + * fuzzy_find_matching_lines_recurse for details of preserving line ordering. + * + * The performance is believed to be O(n log n) in the typical case and O(n^2) + * in a pathological case, where n is the number of lines in the target range. + */ + static int *fuzzy_find_matching_lines(struct blame_origin *parent, + struct blame_origin *target, + int tlno, int parent_slno, int same, + int parent_len) + { + /* We use the terminology "A" for the left hand side of the diff AKA + * parent, and "B" for the right hand side of the diff AKA target. */ + int start_a = parent_slno; + int length_a = parent_len; + int start_b = tlno; + int length_b = same - tlno; + + struct line_number_mapping map_line_number_in_b_to_a = { + start_a, length_a, start_b, length_b + }; + + struct fingerprint *fingerprints_a = parent->fingerprints; + struct fingerprint *fingerprints_b = target->fingerprints; + + int i, *result, *second_best_result, + *certainties, *similarities, similarity_count; + + /* + * max_search_distance_a means that given a line in B, compare it to + * the line in A that is closest to its position, and the lines in A + * that are no greater than max_search_distance_a lines away from the + * closest line in A. + * + * max_search_distance_b is an upper bound on the greatest possible + * distance between lines in B such that they will both be compared + * with the same line in A according to max_search_distance_a. + */ + int max_search_distance_a = 10, max_search_distance_b; + + if (length_a <= 0) + return NULL; + + if (max_search_distance_a >= length_a) + max_search_distance_a = length_a ? length_a - 1 : 0; + + max_search_distance_b = ((2 * max_search_distance_a + 1) * length_b + - 1) / length_a; + + result = xcalloc(sizeof(int), length_b); + second_best_result = xcalloc(sizeof(int), length_b); + certainties = xcalloc(sizeof(int), length_b); + + /* See get_similarity() for details of similarities. */ + similarity_count = length_b * (max_search_distance_a * 2 + 1); + similarities = xcalloc(sizeof(int), similarity_count); + + for (i = 0; i < length_b; ++i) { + result[i] = -1; + second_best_result[i] = -1; + certainties[i] = CERTAINTY_NOT_CALCULATED; + } + + for (i = 0; i < similarity_count; ++i) + similarities[i] = -1; + + fuzzy_find_matching_lines_recurse(start_a, start_b, + length_a, length_b, + fingerprints_a + start_a, + fingerprints_b + start_b, + similarities, + certainties, + second_best_result, + result, + max_search_distance_a, + max_search_distance_b, + &map_line_number_in_b_to_a); + + free(similarities); + free(certainties); + free(second_best_result); + + return result; + } + + static void fill_origin_fingerprints(struct blame_origin *o) + { + int *line_starts; + + if (o->fingerprints) + return; + o->num_lines = find_line_starts(&line_starts, o->file.ptr, + o->file.size); + o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines); + get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts, + 0, o->num_lines); + free(line_starts); + } + + static void drop_origin_fingerprints(struct blame_origin *o) + { + if (o->fingerprints) { + free_line_fingerprints(o->fingerprints, o->num_lines); + o->num_lines = 0; + FREE_AND_NULL(o->fingerprints); + } + } + /* * Given an origin, prepare mmfile_t structure to be used by the * diff machinery */ static void fill_origin_blob(struct diff_options *opt, - struct blame_origin *o, mmfile_t *file, int *num_read_blob) + struct blame_origin *o, mmfile_t *file, + int *num_read_blob, int fill_fingerprints) { if (!o->file.ptr) { enum object_type type; @@@ -340,11 -1034,14 +1035,14 @@@ } else *file = o->file; + if (fill_fingerprints) + fill_origin_fingerprints(o); } static void drop_origin_blob(struct blame_origin *o) { FREE_AND_NULL(o->file.ptr); + drop_origin_fingerprints(o); } /* @@@ -480,7 -1177,9 +1178,9 @@@ void blame_coalesce(struct blame_scoreb for (ent = sb->ent; ent && (next = ent->next); ent = next) { if (ent->suspect == next->suspect && - ent->s_lno + ent->num_lines == next->s_lno) { + ent->s_lno + ent->num_lines == next->s_lno && + ent->ignored == next->ignored && + ent->unblamable == next->unblamable) { ent->num_lines += next->num_lines; ent->next = next->next; blame_origin_decref(next->suspect); @@@ -730,8 -1429,14 +1430,14 @@@ static void split_overlap(struct blame_ struct blame_origin *parent) { int chunk_end_lno; + int i; memset(split, 0, sizeof(struct blame_entry [3])); + for (i = 0; i < 3; i++) { + split[i].ignored = e->ignored; + split[i].unblamable = e->unblamable; + } + if (e->s_lno < tlno) { /* there is a pre-chunk part not blamed on parent */ split[0].suspect = blame_origin_incref(e->suspect); @@@ -839,6 -1544,164 +1545,164 @@@ static struct blame_entry *reverse_blam return tail; } + /* + * Splits a blame entry into two entries at 'len' lines. The original 'e' + * consists of len lines, i.e. [e->lno, e->lno + len), and the second part, + * which is returned, consists of the remainder: [e->lno + len, e->lno + + * e->num_lines). The caller needs to sort out the reference counting for the + * new entry's suspect. + */ + static struct blame_entry *split_blame_at(struct blame_entry *e, int len, + struct blame_origin *new_suspect) + { + struct blame_entry *n = xcalloc(1, sizeof(struct blame_entry)); + + n->suspect = new_suspect; + n->ignored = e->ignored; + n->unblamable = e->unblamable; + n->lno = e->lno + len; + n->s_lno = e->s_lno + len; + n->num_lines = e->num_lines - len; + e->num_lines = len; + e->score = 0; + return n; + } + + struct blame_line_tracker { + int is_parent; + int s_lno; + }; + + static int are_lines_adjacent(struct blame_line_tracker *first, + struct blame_line_tracker *second) + { + return first->is_parent == second->is_parent && + first->s_lno + 1 == second->s_lno; + } + + static int scan_parent_range(struct fingerprint *p_fps, + struct fingerprint *t_fps, int t_idx, + int from, int nr_lines) + { + int sim, p_idx; + #define FINGERPRINT_FILE_THRESHOLD 10 + int best_sim_val = FINGERPRINT_FILE_THRESHOLD; + int best_sim_idx = -1; + + for (p_idx = from; p_idx < from + nr_lines; p_idx++) { + sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]); + if (sim < best_sim_val) + continue; + /* Break ties with the closest-to-target line number */ + if (sim == best_sim_val && best_sim_idx != -1 && + abs(best_sim_idx - t_idx) < abs(p_idx - t_idx)) + continue; + best_sim_val = sim; + best_sim_idx = p_idx; + } + return best_sim_idx; + } + + /* + * The first pass checks the blame entry (from the target) against the parent's + * diff chunk. If that fails for a line, the second pass tries to match that + * line to any part of parent file. That catches cases where a change was + * broken into two chunks by 'context.' + */ + static void guess_line_blames(struct blame_origin *parent, + struct blame_origin *target, + int tlno, int offset, int same, int parent_len, + struct blame_line_tracker *line_blames) + { + int i, best_idx, target_idx; + int parent_slno = tlno + offset; + int *fuzzy_matches; + + fuzzy_matches = fuzzy_find_matching_lines(parent, target, + tlno, parent_slno, same, + parent_len); + for (i = 0; i < same - tlno; i++) { + target_idx = tlno + i; + if (fuzzy_matches && fuzzy_matches[i] >= 0) { + best_idx = fuzzy_matches[i]; + } else { + best_idx = scan_parent_range(parent->fingerprints, + target->fingerprints, + target_idx, 0, + parent->num_lines); + } + if (best_idx >= 0) { + line_blames[i].is_parent = 1; + line_blames[i].s_lno = best_idx; + } else { + line_blames[i].is_parent = 0; + line_blames[i].s_lno = target_idx; + } + } + free(fuzzy_matches); + } + + /* + * This decides which parts of a blame entry go to the parent (added to the + * ignoredp list) and which stay with the target (added to the diffp list). The + * actual decision was made in a separate heuristic function, and those answers + * for the lines in 'e' are in line_blames. This consumes e, essentially + * putting it on a list. + * + * Note that the blame entries on the ignoredp list are not necessarily sorted + * with respect to the parent's line numbers yet. + */ + static void ignore_blame_entry(struct blame_entry *e, + struct blame_origin *parent, + struct blame_entry **diffp, + struct blame_entry **ignoredp, + struct blame_line_tracker *line_blames) + { + int entry_len, nr_lines, i; + + /* + * We carve new entries off the front of e. Each entry comes from a + * contiguous chunk of lines: adjacent lines from the same origin + * (either the parent or the target). + */ + entry_len = 1; + nr_lines = e->num_lines; /* e changes in the loop */ + for (i = 0; i < nr_lines; i++) { + struct blame_entry *next = NULL; + + /* + * We are often adjacent to the next line - only split the blame + * entry when we have to. + */ + if (i + 1 < nr_lines) { + if (are_lines_adjacent(&line_blames[i], + &line_blames[i + 1])) { + entry_len++; + continue; + } + next = split_blame_at(e, entry_len, + blame_origin_incref(e->suspect)); + } + if (line_blames[i].is_parent) { + e->ignored = 1; + blame_origin_decref(e->suspect); + e->suspect = blame_origin_incref(parent); + e->s_lno = line_blames[i - entry_len + 1].s_lno; + e->next = *ignoredp; + *ignoredp = e; + } else { + e->unblamable = 1; + /* e->s_lno is already in the target's address space. */ + e->next = *diffp; + *diffp = e; + } + assert(e->num_lines == entry_len); + e = next; + entry_len = 1; + } + assert(!e); + } + /* * Process one hunk from the patch between the current suspect for * blame_entry e and its parent. This first blames any unfinished @@@ -848,13 -1711,20 +1712,20 @@@ * -C options may lead to overlapping/duplicate source line number * ranges, all we can rely on from sorting/merging is the order of the * first suspect line number. + * + * tlno: line number in the target where this chunk begins + * same: line number in the target where this chunk ends + * offset: add to tlno to get the chunk starting point in the parent + * parent_len: number of lines in the parent chunk */ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq, - int tlno, int offset, int same, - struct blame_origin *parent) + int tlno, int offset, int same, int parent_len, + struct blame_origin *parent, + struct blame_origin *target, int ignore_diffs) { struct blame_entry *e = **srcq; - struct blame_entry *samep = NULL, *diffp = NULL; + struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL; + struct blame_line_tracker *line_blames = NULL; while (e && e->s_lno < tlno) { struct blame_entry *next = e->next; @@@ -865,14 -1735,9 +1736,9 @@@ */ if (e->s_lno + e->num_lines > tlno) { /* Move second half to a new record */ - int len = tlno - e->s_lno; - struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry)); - n->suspect = e->suspect; - n->lno = e->lno + len; - n->s_lno = e->s_lno + len; - n->num_lines = e->num_lines - len; - e->num_lines = len; - e->score = 0; + struct blame_entry *n; + + n = split_blame_at(e, tlno - e->s_lno, e->suspect); /* Push new record to diffp */ n->next = diffp; diffp = n; @@@ -908,6 -1773,14 +1774,14 @@@ */ samep = NULL; diffp = NULL; + + if (ignore_diffs && same - tlno > 0) { + line_blames = xcalloc(sizeof(struct blame_line_tracker), + same - tlno); + guess_line_blames(parent, target, tlno, offset, same, + parent_len, line_blames); + } + while (e && e->s_lno < same) { struct blame_entry *next = e->next; @@@ -919,22 -1792,37 +1793,37 @@@ * Move second half to a new record to be * processed by later chunks */ - int len = same - e->s_lno; - struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry)); - n->suspect = blame_origin_incref(e->suspect); - n->lno = e->lno + len; - n->s_lno = e->s_lno + len; - n->num_lines = e->num_lines - len; - e->num_lines = len; - e->score = 0; + struct blame_entry *n; + + n = split_blame_at(e, same - e->s_lno, + blame_origin_incref(e->suspect)); /* Push new record to samep */ n->next = samep; samep = n; } - e->next = diffp; - diffp = e; + if (ignore_diffs) { + ignore_blame_entry(e, parent, &diffp, &ignoredp, + line_blames + e->s_lno - tlno); + } else { + e->next = diffp; + diffp = e; + } e = next; } + free(line_blames); + if (ignoredp) { + /* + * Note ignoredp is not sorted yet, and thus neither is dstq. + * That list must be sorted before we queue_blames(). We defer + * sorting until after all diff hunks are processed, so that + * guess_line_blames() can pick *any* line in the parent. The + * slight drawback is that we end up sorting all blame entries + * passed to the parent, including those that are unrelated to + * changes made by the ignored commit. + */ + **dstq = reverse_blame(ignoredp, **dstq); + *dstq = &ignoredp->next; + } **srcq = reverse_blame(diffp, reverse_blame(samep, e)); /* Move across elements that are in the unblamable portion */ if (diffp) @@@ -943,7 -1831,9 +1832,9 @@@ struct blame_chunk_cb_data { struct blame_origin *parent; + struct blame_origin *target; long offset; + int ignore_diffs; struct blame_entry **dstq; struct blame_entry **srcq; }; @@@ -956,7 -1846,8 +1847,8 @@@ static int blame_chunk_cb(long start_a if (start_a - start_b != d->offset) die("internal error in blame::blame_chunk_cb"); blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b, - start_b + count_b, d->parent); + start_b + count_b, count_a, d->parent, d->target, + d->ignore_diffs); d->offset = start_a + count_a - (start_b + count_b); return 0; } @@@ -968,7 -1859,7 +1860,7 @@@ */ static void pass_blame_to_parent(struct blame_scoreboard *sb, struct blame_origin *target, - struct blame_origin *parent) + struct blame_origin *parent, int ignore_diffs) { mmfile_t file_p, file_o; struct blame_chunk_cb_data d; @@@ -978,11 -1869,15 +1870,15 @@@ return; /* nothing remains for this target */ d.parent = parent; + d.target = target; d.offset = 0; + d.ignore_diffs = ignore_diffs; d.dstq = &newdest; d.srcq = &target->suspects; - fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob); - fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob); + fill_origin_blob(&sb->revs->diffopt, parent, &file_p, + &sb->num_read_blob, ignore_diffs); + fill_origin_blob(&sb->revs->diffopt, target, &file_o, + &sb->num_read_blob, ignore_diffs); sb->num_get_patch++; if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts)) @@@ -990,8 -1885,13 +1886,13 @@@ oid_to_hex(&parent->commit->object.oid), oid_to_hex(&target->commit->object.oid)); /* The rest are the same as the parent */ - blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent); + blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0, + parent, target, 0); *d.dstq = NULL; + if (ignore_diffs) + newdest = llist_mergesort(newdest, get_next_blame, + set_next_blame, + compare_blame_suspect); queue_blames(sb, parent, newdest); return; @@@ -1188,7 -2088,8 +2089,8 @@@ static void find_move_in_parent(struct if (!unblamed) return; /* nothing remains for this target */ - fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob); + fill_origin_blob(&sb->revs->diffopt, parent, &file_p, + &sb->num_read_blob, 0); if (!file_p.ptr) return; @@@ -1317,7 -2218,8 +2219,8 @@@ static void find_copy_in_parent(struct norigin = get_origin(parent, p->one->path); oidcpy(&norigin->blob_oid, &p->one->oid); norigin->mode = p->one->mode; - fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob); + fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, + &sb->num_read_blob, 0); if (!file_p.ptr) continue; @@@ -1495,11 -2397,34 +2398,34 @@@ static void pass_blame(struct blame_sco blame_origin_incref(porigin); origin->previous = porigin; } - pass_blame_to_parent(sb, origin, porigin); + pass_blame_to_parent(sb, origin, porigin, 0); if (!origin->suspects) goto finish; } + /* + * Pass remaining suspects for ignored commits to their parents. + */ + if (oidset_contains(&sb->ignore_list, &commit->object.oid)) { + for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse); + i < num_sg && sg; + sg = sg->next, i++) { + struct blame_origin *porigin = sg_origin[i]; + + if (!porigin) + continue; + pass_blame_to_parent(sb, origin, porigin, 1); + /* + * Preemptively drop porigin so we can refresh the + * fingerprints if we use the parent again, which can + * occur if you ignore back-to-back commits. + */ + drop_origin_blob(porigin); + if (!origin->suspects) + goto finish; + } + } + /* * Optionally find moves in parents' files. */ @@@ -1562,8 -2487,7 +2488,8 @@@ finish } for (i = 0; i < num_sg; i++) { if (sg_origin[i]) { - drop_origin_blob(sg_origin[i]); + if (!sg_origin[i]->suspects) + drop_origin_blob(sg_origin[i]); blame_origin_decref(sg_origin[i]); } } @@@ -1640,37 -2564,14 +2566,14 @@@ void assign_blame(struct blame_scoreboa } } - static const char *get_next_line(const char *start, const char *end) - { - const char *nl = memchr(start, '\n', end - start); - return nl ? nl + 1 : end; - } - /* * To allow quick access to the contents of nth line in the * final image, prepare an index in the scoreboard. */ static int prepare_lines(struct blame_scoreboard *sb) { - const char *buf = sb->final_buf; - unsigned long len = sb->final_buf_size; - const char *end = buf + len; - const char *p; - int *lineno; - int num = 0; - - for (p = buf; p < end; p = get_next_line(p, end)) - num++; - - ALLOC_ARRAY(sb->lineno, num + 1); - lineno = sb->lineno; - - for (p = buf; p < end; p = get_next_line(p, end)) - *lineno++ = p - buf; - - *lineno = len; - - sb->num_lines = num; + sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf, + sb->final_buf_size); return sb->num_lines; } diff --combined blame.h index d62f80fa74,5dd877bb78..4a9e1270b0 --- a/blame.h +++ b/blame.h @@@ -51,8 -51,10 +51,10 @@@ struct blame_origin */ struct blame_entry *suspects; mmfile_t file; + int num_lines; + void *fingerprints; struct object_id blob_oid; - unsigned mode; + unsigned short mode; /* guilty gets set when shipping any suspects to the final * blame list instead of other commits */ @@@ -92,6 -94,8 +94,8 @@@ struct blame_entry * scanning the lines over and over. */ unsigned score; + int ignored; + int unblamable; }; /* @@@ -117,6 -121,8 +121,8 @@@ struct blame_scoreboard /* linked list of blames */ struct blame_entry *ent; + struct oidset ignore_list; + /* look-up a line in the final buffer */ int num_lines; int *lineno; @@@ -177,6 -183,6 +183,6 @@@ struct blame_entry *blame_entry_prepend long start, long end, struct blame_origin *o); -extern struct blame_origin *get_blame_suspects(struct commit *commit); +struct blame_origin *get_blame_suspects(struct commit *commit); #endif /* BLAME_H */ diff --combined builtin/blame.c index 50e3d4a265,bb321eb200..b6534d4dea --- a/builtin/blame.c +++ b/builtin/blame.c @@@ -27,7 -27,6 +27,7 @@@ #include "object-store.h" #include "blame.h" #include "string-list.h" +#include "refs.h" static char blame_usage[] = N_("git blame [] [] [] [--] "); @@@ -53,14 -52,17 +53,17 @@@ static int no_whole_file_rename static int show_progress; static char repeated_meta_color[COLOR_MAXLEN]; static int coloring_mode; + static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP; + static int mark_unblamable_lines; + static int mark_ignored_lines; static struct date_mode blame_date_mode = { DATE_ISO8601 }; static size_t blame_date_width; static struct string_list mailmap = STRING_LIST_INIT_NODUP; -#ifndef DEBUG -#define DEBUG 0 +#ifndef DEBUG_BLAME +#define DEBUG_BLAME 0 #endif static unsigned blame_move_score; @@@ -480,6 -482,14 +483,14 @@@ static void emit_other(struct blame_sco } } + if (mark_unblamable_lines && ent->unblamable) { + length--; + putchar('*'); + } + if (mark_ignored_lines && ent->ignored) { + length--; + putchar('?'); + } printf("%.*s", length, hex); if (opt & OUTPUT_ANNOTATE_COMPAT) { const char *name; @@@ -696,6 -706,24 +707,24 @@@ static int git_blame_config(const char parse_date_format(value, &blame_date_mode); return 0; } + if (!strcmp(var, "blame.ignorerevsfile")) { + const char *str; + int ret; + + ret = git_config_pathname(&str, var, value); + if (ret) + return ret; + string_list_insert(&ignore_revs_file_list, str); + return 0; + } + if (!strcmp(var, "blame.markunblamablelines")) { + mark_unblamable_lines = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "blame.markignoredlines")) { + mark_ignored_lines = git_config_bool(var, value); + return 0; + } if (!strcmp(var, "color.blame.repeatedlines")) { if (color_parse_mem(value, strlen(value), repeated_meta_color)) warning(_("invalid color '%s' in color.blame.repeatedLines"), @@@ -775,6 -803,27 +804,27 @@@ static int is_a_rev(const char *name return OBJ_NONE < oid_object_info(the_repository, &oid, NULL); } + static void build_ignorelist(struct blame_scoreboard *sb, + struct string_list *ignore_revs_file_list, + struct string_list *ignore_rev_list) + { + struct string_list_item *i; + struct object_id oid; + + oidset_init(&sb->ignore_list, 0); + for_each_string_list_item(i, ignore_revs_file_list) { + if (!strcmp(i->string, "")) + oidset_clear(&sb->ignore_list); + else + oidset_parse_file(&sb->ignore_list, i->string); + } + for_each_string_list_item(i, ignore_rev_list) { + if (get_oid_committish(i->string, &oid)) + die(_("cannot find revision %s to ignore"), i->string); + oidset_insert(&sb->ignore_list, &oid); + } + } + int cmd_blame(int argc, const char **argv, const char *prefix) { struct rev_info revs; @@@ -786,6 -835,7 +836,7 @@@ struct progress_info pi = { NULL, 0 }; struct string_list range_list = STRING_LIST_INIT_NODUP; + struct string_list ignore_rev_list = STRING_LIST_INIT_NODUP; int output_option = 0, opt = 0; int show_stats = 0; const char *revs_file = NULL; @@@ -807,6 -857,8 +858,8 @@@ OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR), OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL), OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE), + OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("Ignore when blaming")), + OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("Ignore revisions from ")), OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE), OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR), @@@ -815,7 -867,7 +868,7 @@@ * and are only included here to get included in the "-h" * output: */ - { OPTION_LOWLEVEL_CALLBACK, 0, "indent-heuristic", NULL, NULL, N_("Use an experimental heuristic to improve diffs"), PARSE_OPT_NOARG, parse_opt_unknown_cb }, + { OPTION_LOWLEVEL_CALLBACK, 0, "indent-heuristic", NULL, NULL, N_("Use an experimental heuristic to improve diffs"), PARSE_OPT_NOARG, NULL, 0, parse_opt_unknown_cb }, OPT_BIT(0, "minimal", &xdl_opts, N_("Spend extra cycles to find better match"), XDF_NEED_MINIMAL), OPT_STRING('S', NULL, &revs_file, N_("file"), N_("Use revisions from instead of calling git-rev-list")), @@@ -926,10 -978,6 +979,10 @@@ parse_done */ blame_date_width = utf8_strwidth(_("4 years, 11 months ago")) + 1; /* add the null */ break; + case DATE_HUMAN: + /* If the year is shown, no time is shown */ + blame_date_width = sizeof("Thu Oct 19 16:00"); + break; case DATE_NORMAL: blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700"); break; @@@ -994,24 -1042,15 +1047,27 @@@ revs.disable_stdin = 1; setup_revisions(argc, argv, &revs, NULL); + if (!revs.pending.nr && is_bare_repository()) { + struct commit *head_commit; + struct object_id head_oid; + + if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, + &head_oid, NULL) || + !(head_commit = lookup_commit_reference_gently(revs.repo, + &head_oid, 1))) + die("no such ref: HEAD"); + + add_pending_object(&revs, &head_commit->object, "HEAD"); + } init_scoreboard(&sb); sb.revs = &revs; sb.contents_from = contents_from; sb.reverse = reverse; sb.repo = the_repository; + build_ignorelist(&sb, &ignore_revs_file_list, &ignore_rev_list); + string_list_clear(&ignore_revs_file_list, 0); + string_list_clear(&ignore_rev_list, 0); setup_scoreboard(&sb, path, &o); lno = sb.num_lines; @@@ -1024,8 -1063,7 +1080,8 @@@ long bottom, top; if (parse_range_arg(range_list.items[range_i].string, nth_line_cb, &sb, lno, anchor, - &bottom, &top, sb.path, &the_index)) + &bottom, &top, sb.path, + the_repository->index)) usage(blame_usage); if ((!lno && (top || bottom)) || lno < bottom) die(Q_("file %s has only %lu line", @@@ -1062,7 -1100,7 +1118,7 @@@ if (blame_copy_score) sb.copy_score = blame_copy_score; - sb.debug = DEBUG; + sb.debug = DEBUG_BLAME; sb.on_sanity_fail = &sanity_check_on_fail; sb.show_root = show_root; diff --combined fsck.c index 117c4a978f,80b53e6f49..cdb7d8db03 --- a/fsck.c +++ b/fsck.c @@@ -181,41 -181,6 +181,6 @@@ static int fsck_msg_type(enum fsck_msg_ return msg_type; } - static void init_skiplist(struct fsck_options *options, const char *path) - { - FILE *fp; - struct strbuf sb = STRBUF_INIT; - struct object_id oid; - - fp = fopen(path, "r"); - if (!fp) - die("Could not open skip list: %s", path); - while (!strbuf_getline(&sb, fp)) { - const char *p; - const char *hash; - - /* - * Allow trailing comments, leading whitespace - * (including before commits), and empty or whitespace - * only lines. - */ - hash = strchr(sb.buf, '#'); - if (hash) - strbuf_setlen(&sb, hash - sb.buf); - strbuf_trim(&sb); - if (!sb.len) - continue; - - if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0') - die("Invalid SHA-1: %s", sb.buf); - oidset_insert(&options->skiplist, &oid); - } - if (ferror(fp)) - die_errno("Could not read '%s'", path); - fclose(fp); - strbuf_release(&sb); - } - static int parse_msg_type(const char *str) { if (!strcmp(str, "error")) @@@ -284,7 -249,7 +249,7 @@@ void fsck_set_msg_types(struct fsck_opt if (!strcmp(buf, "skiplist")) { if (equal == len) die("skiplist requires a path"); - init_skiplist(options, buf + equal + 1); + oidset_parse_file(&options->skiplist, buf + equal + 1); buf += len + 1; continue; } @@@ -410,14 -375,14 +375,14 @@@ static int fsck_walk_tree(struct tree * continue; if (S_ISDIR(entry.mode)) { - obj = (struct object *)lookup_tree(the_repository, entry.oid); + obj = (struct object *)lookup_tree(the_repository, &entry.oid); if (name && obj) put_object_name(options, obj, "%s%s/", name, entry.path); result = options->walk(obj, OBJ_TREE, data, options); } else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) { - obj = (struct object *)lookup_blob(the_repository, entry.oid); + obj = (struct object *)lookup_blob(the_repository, &entry.oid); if (name && obj) put_object_name(options, obj, "%s%s", name, entry.path); @@@ -604,7 -569,7 +569,7 @@@ static int fsck_tree(struct tree *item o_name = NULL; while (desc.size) { - unsigned mode; + unsigned short mode; const char *name; const struct object_id *oid; @@@ -1092,7 -1057,7 +1057,7 @@@ int fsck_finish(struct fsck_options *op blob = lookup_blob(the_repository, oid); if (!blob) { - struct object *obj = lookup_unknown_object(oid->hash); + struct object *obj = lookup_unknown_object(oid); ret |= report(options, obj, FSCK_MSG_GITMODULES_BLOB, "non-blob found at .gitmodules"); diff --combined oidset.c index 8bdecb13de,584be63e52..f63ce818f6 --- a/oidset.c +++ b/oidset.c @@@ -5,33 -5,68 +5,68 @@@ void oidset_init(struct oidset *set, si { memset(&set->set, 0, sizeof(set->set)); if (initial_size) - kh_resize_oid(&set->set, initial_size); + kh_resize_oid_set(&set->set, initial_size); } int oidset_contains(const struct oidset *set, const struct object_id *oid) { - khiter_t pos = kh_get_oid(&set->set, *oid); + khiter_t pos = kh_get_oid_set(&set->set, *oid); return pos != kh_end(&set->set); } int oidset_insert(struct oidset *set, const struct object_id *oid) { int added; - kh_put_oid(&set->set, *oid, &added); + kh_put_oid_set(&set->set, *oid, &added); return !added; } int oidset_remove(struct oidset *set, const struct object_id *oid) { - khiter_t pos = kh_get_oid(&set->set, *oid); + khiter_t pos = kh_get_oid_set(&set->set, *oid); if (pos == kh_end(&set->set)) return 0; - kh_del_oid(&set->set, pos); + kh_del_oid_set(&set->set, pos); return 1; } void oidset_clear(struct oidset *set) { - kh_release_oid(&set->set); + kh_release_oid_set(&set->set); oidset_init(set, 0); } + + void oidset_parse_file(struct oidset *set, const char *path) + { + FILE *fp; + struct strbuf sb = STRBUF_INIT; + struct object_id oid; + + fp = fopen(path, "r"); + if (!fp) + die("could not open object name list: %s", path); + while (!strbuf_getline(&sb, fp)) { + const char *p; + const char *name; + + /* + * Allow trailing comments, leading whitespace + * (including before commits), and empty or whitespace + * only lines. + */ + name = strchr(sb.buf, '#'); + if (name) + strbuf_setlen(&sb, name - sb.buf); + strbuf_trim(&sb); + if (!sb.len) + continue; + + if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0') + die("invalid object name: %s", sb.buf); + oidset_insert(set, &oid); + } + if (ferror(fp)) + die_errno("Could not read '%s'", path); + fclose(fp); + strbuf_release(&sb); + } diff --combined oidset.h index 505fad578b,c4807749df..5346563b0b --- a/oidset.h +++ b/oidset.h @@@ -16,11 -16,23 +16,11 @@@ * table overhead. */ -static inline unsigned int oid_hash(struct object_id oid) -{ - return sha1hash(oid.hash); -} - -static inline int oid_equal(struct object_id a, struct object_id b) -{ - return oideq(&a, &b); -} - -KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal) - /** * A single oidset; should be zero-initialized (or use OIDSET_INIT). */ struct oidset { - kh_oid_t set; + kh_oid_set_t set; }; #define OIDSET_INIT { { 0 } } @@@ -61,8 -73,16 +61,16 @@@ int oidset_remove(struct oidset *set, c */ void oidset_clear(struct oidset *set); + /** + * Add the contents of the file 'path' to an initialized oidset. Each line is + * an unabbreviated object name. Comments begin with '#', and trailing comments + * are allowed. Leading whitespace and empty or white-space only lines are + * ignored. + */ + void oidset_parse_file(struct oidset *set, const char *path); + struct oidset_iter { - kh_oid_t *set; + kh_oid_set_t *set; khiter_t iter; };