From: Junio C Hamano Date: Fri, 19 Jul 2019 18:30:21 +0000 (-0700) Subject: Merge branch 'nd/tree-walk-with-repo' X-Git-Tag: v2.23.0-rc0~34 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/1eb0a12ec3934b7fc398b118806fdf7550ae636e?hp=-c Merge branch 'nd/tree-walk-with-repo' The tree-walk API learned to pass an in-core repository instance throughout more codepaths. * nd/tree-walk-with-repo: t7814: do not generate same commits in different repos Use the right 'struct repository' instead of the_repository match-trees.c: remove the_repo from shift_tree*() tree-walk.c: remove the_repo from get_tree_entry_follow_symlinks() tree-walk.c: remove the_repo from get_tree_entry() tree-walk.c: remove the_repo from fill_tree_descriptor() sha1-file.c: remove the_repo from read_object_with_reference() --- 1eb0a12ec3934b7fc398b118806fdf7550ae636e diff --combined blame.c index 7f04580ad5,ef022809e9..36a2e7ef11 --- a/blame.c +++ b/blame.c @@@ -101,7 -101,7 +101,7 @@@ static void verify_working_tree_path(st struct object_id blob_oid; unsigned short mode; - if (!get_tree_entry(commit_oid, path, &blob_oid, &mode) && + if (!get_tree_entry(r, commit_oid, path, &blob_oid, &mode) && oid_object_info(r, &blob_oid, NULL) == OBJ_BLOB) return; } @@@ -311,707 -311,12 +311,707 @@@ static int diff_hunks(mmfile_t *file_a return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb); } +static const char *get_next_line(const char *start, const char *end) +{ + const char *nl = memchr(start, '\n', end - start); + + return nl ? nl + 1 : end; +} + +static int find_line_starts(int **line_starts, const char *buf, + unsigned long len) +{ + const char *end = buf + len; + const char *p; + int *lineno; + int num = 0; + + for (p = buf; p < end; p = get_next_line(p, end)) + num++; + + ALLOC_ARRAY(*line_starts, num + 1); + lineno = *line_starts; + + for (p = buf; p < end; p = get_next_line(p, end)) + *lineno++ = p - buf; + + *lineno = len; + + return num; +} + +struct fingerprint_entry; + +/* A fingerprint is intended to loosely represent a string, such that two + * fingerprints can be quickly compared to give an indication of the similarity + * of the strings that they represent. + * + * A fingerprint is represented as a multiset of the lower-cased byte pairs in + * the string that it represents. Whitespace is added at each end of the + * string. Whitespace pairs are ignored. Whitespace is converted to '\0'. + * For example, the string "Darth Radar" will be converted to the following + * fingerprint: + * {"\0d", "da", "da", "ar", "ar", "rt", "th", "h\0", "\0r", "ra", "ad", "r\0"} + * + * The similarity between two fingerprints is the size of the intersection of + * their multisets, including repeated elements. See fingerprint_similarity for + * examples. + * + * For ease of implementation, the fingerprint is implemented as a map + * of byte pairs to the count of that byte pair in the string, instead of + * allowing repeated elements in a set. + */ +struct fingerprint { + struct hashmap map; + /* As we know the maximum number of entries in advance, it's + * convenient to store the entries in a single array instead of having + * the hashmap manage the memory. + */ + struct fingerprint_entry *entries; +}; + +/* A byte pair in a fingerprint. Stores the number of times the byte pair + * occurs in the string that the fingerprint represents. + */ +struct fingerprint_entry { + /* The hashmap entry - the hash represents the byte pair in its + * entirety so we don't need to store the byte pair separately. + */ + struct hashmap_entry entry; + /* The number of times the byte pair occurs in the string that the + * fingerprint represents. + */ + int count; +}; + +/* See `struct fingerprint` for an explanation of what a fingerprint is. + * \param result the fingerprint of the string is stored here. This must be + * freed later using free_fingerprint. + * \param line_begin the start of the string + * \param line_end the end of the string + */ +static void get_fingerprint(struct fingerprint *result, + const char *line_begin, + const char *line_end) +{ + unsigned int hash, c0 = 0, c1; + const char *p; + int max_map_entry_count = 1 + line_end - line_begin; + struct fingerprint_entry *entry = xcalloc(max_map_entry_count, + sizeof(struct fingerprint_entry)); + struct fingerprint_entry *found_entry; + + hashmap_init(&result->map, NULL, NULL, max_map_entry_count); + result->entries = entry; + for (p = line_begin; p <= line_end; ++p, c0 = c1) { + /* Always terminate the string with whitespace. + * Normalise whitespace to 0, and normalise letters to + * lower case. This won't work for multibyte characters but at + * worst will match some unrelated characters. + */ + if ((p == line_end) || isspace(*p)) + c1 = 0; + else + c1 = tolower(*p); + hash = c0 | (c1 << 8); + /* Ignore whitespace pairs */ + if (hash == 0) + continue; + hashmap_entry_init(entry, hash); + + found_entry = hashmap_get(&result->map, entry, NULL); + if (found_entry) { + found_entry->count += 1; + } else { + entry->count = 1; + hashmap_add(&result->map, entry); + ++entry; + } + } +} + +static void free_fingerprint(struct fingerprint *f) +{ + hashmap_free(&f->map, 0); + free(f->entries); +} + +/* Calculates the similarity between two fingerprints as the size of the + * intersection of their multisets, including repeated elements. See + * `struct fingerprint` for an explanation of the fingerprint representation. + * The similarity between "cat mat" and "father rather" is 2 because "at" is + * present twice in both strings while the similarity between "tim" and "mit" + * is 0. + */ +static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b) +{ + int intersection = 0; + struct hashmap_iter iter; + const struct fingerprint_entry *entry_a, *entry_b; + + hashmap_iter_init(&b->map, &iter); + + while ((entry_b = hashmap_iter_next(&iter))) { + if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) { + intersection += entry_a->count < entry_b->count ? + entry_a->count : entry_b->count; + } + } + return intersection; +} + +/* Subtracts byte-pair elements in B from A, modifying A in place. + */ +static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b) +{ + struct hashmap_iter iter; + struct fingerprint_entry *entry_a; + const struct fingerprint_entry *entry_b; + + hashmap_iter_init(&b->map, &iter); + + while ((entry_b = hashmap_iter_next(&iter))) { + if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) { + if (entry_a->count <= entry_b->count) + hashmap_remove(&a->map, entry_b, NULL); + else + entry_a->count -= entry_b->count; + } + } +} + +/* Calculate fingerprints for a series of lines. + * Puts the fingerprints in the fingerprints array, which must have been + * preallocated to allow storing line_count elements. + */ +static void get_line_fingerprints(struct fingerprint *fingerprints, + const char *content, const int *line_starts, + long first_line, long line_count) +{ + int i; + const char *linestart, *lineend; + + line_starts += first_line; + for (i = 0; i < line_count; ++i) { + linestart = content + line_starts[i]; + lineend = content + line_starts[i + 1]; + get_fingerprint(fingerprints + i, linestart, lineend); + } +} + +static void free_line_fingerprints(struct fingerprint *fingerprints, + int nr_fingerprints) +{ + int i; + + for (i = 0; i < nr_fingerprints; i++) + free_fingerprint(&fingerprints[i]); +} + +/* This contains the data necessary to linearly map a line number in one half + * of a diff chunk to the line in the other half of the diff chunk that is + * closest in terms of its position as a fraction of the length of the chunk. + */ +struct line_number_mapping { + int destination_start, destination_length, + source_start, source_length; +}; + +/* Given a line number in one range, offset and scale it to map it onto the + * other range. + * Essentially this mapping is a simple linear equation but the calculation is + * more complicated to allow performing it with integer operations. + * Another complication is that if a line could map onto many lines in the + * destination range then we want to choose the line at the center of those + * possibilities. + * Example: if the chunk is 2 lines long in A and 10 lines long in B then the + * first 5 lines in B will map onto the first line in the A chunk, while the + * last 5 lines will all map onto the second line in the A chunk. + * Example: if the chunk is 10 lines long in A and 2 lines long in B then line + * 0 in B will map onto line 2 in A, and line 1 in B will map onto line 7 in A. + */ +static int map_line_number(int line_number, + const struct line_number_mapping *mapping) +{ + return ((line_number - mapping->source_start) * 2 + 1) * + mapping->destination_length / + (mapping->source_length * 2) + + mapping->destination_start; +} + +/* Get a pointer to the element storing the similarity between a line in A + * and a line in B. + * + * The similarities are stored in a 2-dimensional array. Each "row" in the + * array contains the similarities for a line in B. The similarities stored in + * a row are the similarities between the line in B and the nearby lines in A. + * To keep the length of each row the same, it is padded out with values of -1 + * where the search range extends beyond the lines in A. + * For example, if max_search_distance_a is 2 and the two sides of a diff chunk + * look like this: + * a | m + * b | n + * c | o + * d | p + * e | q + * Then the similarity array will contain: + * [-1, -1, am, bm, cm, + * -1, an, bn, cn, dn, + * ao, bo, co, do, eo, + * bp, cp, dp, ep, -1, + * cq, dq, eq, -1, -1] + * Where similarities are denoted either by -1 for invalid, or the + * concatenation of the two lines in the diff being compared. + * + * \param similarities array of similarities between lines in A and B + * \param line_a the index of the line in A, in the same frame of reference as + * closest_line_a. + * \param local_line_b the index of the line in B, relative to the first line + * in B that similarities represents. + * \param closest_line_a the index of the line in A that is deemed to be + * closest to local_line_b. This must be in the same + * frame of reference as line_a. This value defines + * where similarities is centered for the line in B. + * \param max_search_distance_a maximum distance in lines from the closest line + * in A for other lines in A for which + * similarities may be calculated. + */ +static int *get_similarity(int *similarities, + int line_a, int local_line_b, + int closest_line_a, int max_search_distance_a) +{ + assert(abs(line_a - closest_line_a) <= + max_search_distance_a); + return similarities + line_a - closest_line_a + + max_search_distance_a + + local_line_b * (max_search_distance_a * 2 + 1); +} + +#define CERTAIN_NOTHING_MATCHES -2 +#define CERTAINTY_NOT_CALCULATED -1 + +/* Given a line in B, first calculate its similarities with nearby lines in A + * if not already calculated, then identify the most similar and second most + * similar lines. The "certainty" is calculated based on those two + * similarities. + * + * \param start_a the index of the first line of the chunk in A + * \param length_a the length in lines of the chunk in A + * \param local_line_b the index of the line in B, relative to the first line + * in the chunk. + * \param fingerprints_a array of fingerprints for the chunk in A + * \param fingerprints_b array of fingerprints for the chunk in B + * \param similarities 2-dimensional array of similarities between lines in A + * and B. See get_similarity() for more details. + * \param certainties array of values indicating how strongly a line in B is + * matched with some line in A. + * \param second_best_result array of absolute indices in A for the second + * closest match of a line in B. + * \param result array of absolute indices in A for the closest match of a line + * in B. + * \param max_search_distance_a maximum distance in lines from the closest line + * in A for other lines in A for which + * similarities may be calculated. + * \param map_line_number_in_b_to_a parameter to map_line_number(). + */ +static void find_best_line_matches( + int start_a, + int length_a, + int start_b, + int local_line_b, + struct fingerprint *fingerprints_a, + struct fingerprint *fingerprints_b, + int *similarities, + int *certainties, + int *second_best_result, + int *result, + const int max_search_distance_a, + const struct line_number_mapping *map_line_number_in_b_to_a) +{ + + int i, search_start, search_end, closest_local_line_a, *similarity, + best_similarity = 0, second_best_similarity = 0, + best_similarity_index = 0, second_best_similarity_index = 0; + + /* certainty has already been calculated so no need to redo the work */ + if (certainties[local_line_b] != CERTAINTY_NOT_CALCULATED) + return; + + closest_local_line_a = map_line_number( + local_line_b + start_b, map_line_number_in_b_to_a) - start_a; + + search_start = closest_local_line_a - max_search_distance_a; + if (search_start < 0) + search_start = 0; + + search_end = closest_local_line_a + max_search_distance_a + 1; + if (search_end > length_a) + search_end = length_a; + + for (i = search_start; i < search_end; ++i) { + similarity = get_similarity(similarities, + i, local_line_b, + closest_local_line_a, + max_search_distance_a); + if (*similarity == -1) { + /* This value will never exceed 10 but assert just in + * case + */ + assert(abs(i - closest_local_line_a) < 1000); + /* scale the similarity by (1000 - distance from + * closest line) to act as a tie break between lines + * that otherwise are equally similar. + */ + *similarity = fingerprint_similarity( + fingerprints_b + local_line_b, + fingerprints_a + i) * + (1000 - abs(i - closest_local_line_a)); + } + if (*similarity > best_similarity) { + second_best_similarity = best_similarity; + second_best_similarity_index = best_similarity_index; + best_similarity = *similarity; + best_similarity_index = i; + } else if (*similarity > second_best_similarity) { + second_best_similarity = *similarity; + second_best_similarity_index = i; + } + } + + if (best_similarity == 0) { + /* this line definitely doesn't match with anything. Mark it + * with this special value so it doesn't get invalidated and + * won't be recalculated. + */ + certainties[local_line_b] = CERTAIN_NOTHING_MATCHES; + result[local_line_b] = -1; + } else { + /* Calculate the certainty with which this line matches. + * If the line matches well with two lines then that reduces + * the certainty. However we still want to prioritise matching + * a line that matches very well with two lines over matching a + * line that matches poorly with one line, hence doubling + * best_similarity. + * This means that if we have + * line X that matches only one line with a score of 3, + * line Y that matches two lines equally with a score of 5, + * and line Z that matches only one line with a score or 2, + * then the lines in order of certainty are X, Y, Z. + */ + certainties[local_line_b] = best_similarity * 2 - + second_best_similarity; + + /* We keep both the best and second best results to allow us to + * check at a later stage of the matching process whether the + * result needs to be invalidated. + */ + result[local_line_b] = start_a + best_similarity_index; + second_best_result[local_line_b] = + start_a + second_best_similarity_index; + } +} + +/* + * This finds the line that we can match with the most confidence, and + * uses it as a partition. It then calls itself on the lines on either side of + * that partition. In this way we avoid lines appearing out of order, and + * retain a sensible line ordering. + * \param start_a index of the first line in A with which lines in B may be + * compared. + * \param start_b index of the first line in B for which matching should be + * done. + * \param length_a number of lines in A with which lines in B may be compared. + * \param length_b number of lines in B for which matching should be done. + * \param fingerprints_a mutable array of fingerprints in A. The first element + * corresponds to the line at start_a. + * \param fingerprints_b array of fingerprints in B. The first element + * corresponds to the line at start_b. + * \param similarities 2-dimensional array of similarities between lines in A + * and B. See get_similarity() for more details. + * \param certainties array of values indicating how strongly a line in B is + * matched with some line in A. + * \param second_best_result array of absolute indices in A for the second + * closest match of a line in B. + * \param result array of absolute indices in A for the closest match of a line + * in B. + * \param max_search_distance_a maximum distance in lines from the closest line + * in A for other lines in A for which + * similarities may be calculated. + * \param max_search_distance_b an upper bound on the greatest possible + * distance between lines in B such that they will + * both be compared with the same line in A + * according to max_search_distance_a. + * \param map_line_number_in_b_to_a parameter to map_line_number(). + */ +static void fuzzy_find_matching_lines_recurse( + int start_a, int start_b, + int length_a, int length_b, + struct fingerprint *fingerprints_a, + struct fingerprint *fingerprints_b, + int *similarities, + int *certainties, + int *second_best_result, + int *result, + int max_search_distance_a, + int max_search_distance_b, + const struct line_number_mapping *map_line_number_in_b_to_a) +{ + int i, invalidate_min, invalidate_max, offset_b, + second_half_start_a, second_half_start_b, + second_half_length_a, second_half_length_b, + most_certain_line_a, most_certain_local_line_b = -1, + most_certain_line_certainty = -1, + closest_local_line_a; + + for (i = 0; i < length_b; ++i) { + find_best_line_matches(start_a, + length_a, + start_b, + i, + fingerprints_a, + fingerprints_b, + similarities, + certainties, + second_best_result, + result, + max_search_distance_a, + map_line_number_in_b_to_a); + + if (certainties[i] > most_certain_line_certainty) { + most_certain_line_certainty = certainties[i]; + most_certain_local_line_b = i; + } + } + + /* No matches. */ + if (most_certain_local_line_b == -1) + return; + + most_certain_line_a = result[most_certain_local_line_b]; + + /* + * Subtract the most certain line's fingerprint in B from the matched + * fingerprint in A. This means that other lines in B can't also match + * the same parts of the line in A. + */ + fingerprint_subtract(fingerprints_a + most_certain_line_a - start_a, + fingerprints_b + most_certain_local_line_b); + + /* Invalidate results that may be affected by the choice of most + * certain line. + */ + invalidate_min = most_certain_local_line_b - max_search_distance_b; + invalidate_max = most_certain_local_line_b + max_search_distance_b + 1; + if (invalidate_min < 0) + invalidate_min = 0; + if (invalidate_max > length_b) + invalidate_max = length_b; + + /* As the fingerprint in A has changed, discard previously calculated + * similarity values with that fingerprint. + */ + for (i = invalidate_min; i < invalidate_max; ++i) { + closest_local_line_a = map_line_number( + i + start_b, map_line_number_in_b_to_a) - start_a; + + /* Check that the lines in A and B are close enough that there + * is a similarity value for them. + */ + if (abs(most_certain_line_a - start_a - closest_local_line_a) > + max_search_distance_a) { + continue; + } + + *get_similarity(similarities, most_certain_line_a - start_a, + i, closest_local_line_a, + max_search_distance_a) = -1; + } + + /* More invalidating of results that may be affected by the choice of + * most certain line. + * Discard the matches for lines in B that are currently matched with a + * line in A such that their ordering contradicts the ordering imposed + * by the choice of most certain line. + */ + for (i = most_certain_local_line_b - 1; i >= invalidate_min; --i) { + /* In this loop we discard results for lines in B that are + * before most-certain-line-B but are matched with a line in A + * that is after most-certain-line-A. + */ + if (certainties[i] >= 0 && + (result[i] >= most_certain_line_a || + second_best_result[i] >= most_certain_line_a)) { + certainties[i] = CERTAINTY_NOT_CALCULATED; + } + } + for (i = most_certain_local_line_b + 1; i < invalidate_max; ++i) { + /* In this loop we discard results for lines in B that are + * after most-certain-line-B but are matched with a line in A + * that is before most-certain-line-A. + */ + if (certainties[i] >= 0 && + (result[i] <= most_certain_line_a || + second_best_result[i] <= most_certain_line_a)) { + certainties[i] = CERTAINTY_NOT_CALCULATED; + } + } + + /* Repeat the matching process for lines before the most certain line. + */ + if (most_certain_local_line_b > 0) { + fuzzy_find_matching_lines_recurse( + start_a, start_b, + most_certain_line_a + 1 - start_a, + most_certain_local_line_b, + fingerprints_a, fingerprints_b, similarities, + certainties, second_best_result, result, + max_search_distance_a, + max_search_distance_b, + map_line_number_in_b_to_a); + } + /* Repeat the matching process for lines after the most certain line. + */ + if (most_certain_local_line_b + 1 < length_b) { + second_half_start_a = most_certain_line_a; + offset_b = most_certain_local_line_b + 1; + second_half_start_b = start_b + offset_b; + second_half_length_a = + length_a + start_a - second_half_start_a; + second_half_length_b = + length_b + start_b - second_half_start_b; + fuzzy_find_matching_lines_recurse( + second_half_start_a, second_half_start_b, + second_half_length_a, second_half_length_b, + fingerprints_a + second_half_start_a - start_a, + fingerprints_b + offset_b, + similarities + + offset_b * (max_search_distance_a * 2 + 1), + certainties + offset_b, + second_best_result + offset_b, result + offset_b, + max_search_distance_a, + max_search_distance_b, + map_line_number_in_b_to_a); + } +} + +/* Find the lines in the parent line range that most closely match the lines in + * the target line range. This is accomplished by matching fingerprints in each + * blame_origin, and choosing the best matches that preserve the line ordering. + * See struct fingerprint for details of fingerprint matching, and + * fuzzy_find_matching_lines_recurse for details of preserving line ordering. + * + * The performance is believed to be O(n log n) in the typical case and O(n^2) + * in a pathological case, where n is the number of lines in the target range. + */ +static int *fuzzy_find_matching_lines(struct blame_origin *parent, + struct blame_origin *target, + int tlno, int parent_slno, int same, + int parent_len) +{ + /* We use the terminology "A" for the left hand side of the diff AKA + * parent, and "B" for the right hand side of the diff AKA target. */ + int start_a = parent_slno; + int length_a = parent_len; + int start_b = tlno; + int length_b = same - tlno; + + struct line_number_mapping map_line_number_in_b_to_a = { + start_a, length_a, start_b, length_b + }; + + struct fingerprint *fingerprints_a = parent->fingerprints; + struct fingerprint *fingerprints_b = target->fingerprints; + + int i, *result, *second_best_result, + *certainties, *similarities, similarity_count; + + /* + * max_search_distance_a means that given a line in B, compare it to + * the line in A that is closest to its position, and the lines in A + * that are no greater than max_search_distance_a lines away from the + * closest line in A. + * + * max_search_distance_b is an upper bound on the greatest possible + * distance between lines in B such that they will both be compared + * with the same line in A according to max_search_distance_a. + */ + int max_search_distance_a = 10, max_search_distance_b; + + if (length_a <= 0) + return NULL; + + if (max_search_distance_a >= length_a) + max_search_distance_a = length_a ? length_a - 1 : 0; + + max_search_distance_b = ((2 * max_search_distance_a + 1) * length_b + - 1) / length_a; + + result = xcalloc(sizeof(int), length_b); + second_best_result = xcalloc(sizeof(int), length_b); + certainties = xcalloc(sizeof(int), length_b); + + /* See get_similarity() for details of similarities. */ + similarity_count = length_b * (max_search_distance_a * 2 + 1); + similarities = xcalloc(sizeof(int), similarity_count); + + for (i = 0; i < length_b; ++i) { + result[i] = -1; + second_best_result[i] = -1; + certainties[i] = CERTAINTY_NOT_CALCULATED; + } + + for (i = 0; i < similarity_count; ++i) + similarities[i] = -1; + + fuzzy_find_matching_lines_recurse(start_a, start_b, + length_a, length_b, + fingerprints_a + start_a, + fingerprints_b + start_b, + similarities, + certainties, + second_best_result, + result, + max_search_distance_a, + max_search_distance_b, + &map_line_number_in_b_to_a); + + free(similarities); + free(certainties); + free(second_best_result); + + return result; +} + +static void fill_origin_fingerprints(struct blame_origin *o) +{ + int *line_starts; + + if (o->fingerprints) + return; + o->num_lines = find_line_starts(&line_starts, o->file.ptr, + o->file.size); + o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines); + get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts, + 0, o->num_lines); + free(line_starts); +} + +static void drop_origin_fingerprints(struct blame_origin *o) +{ + if (o->fingerprints) { + free_line_fingerprints(o->fingerprints, o->num_lines); + o->num_lines = 0; + FREE_AND_NULL(o->fingerprints); + } +} + /* * Given an origin, prepare mmfile_t structure to be used by the * diff machinery */ static void fill_origin_blob(struct diff_options *opt, - struct blame_origin *o, mmfile_t *file, int *num_read_blob) + struct blame_origin *o, mmfile_t *file, + int *num_read_blob, int fill_fingerprints) { if (!o->file.ptr) { enum object_type type; @@@ -1035,14 -340,11 +1035,14 @@@ } else *file = o->file; + if (fill_fingerprints) + fill_origin_fingerprints(o); } static void drop_origin_blob(struct blame_origin *o) { FREE_AND_NULL(o->file.ptr); + drop_origin_fingerprints(o); } /* @@@ -1178,9 -480,7 +1178,9 @@@ void blame_coalesce(struct blame_scoreb for (ent = sb->ent; ent && (next = ent->next); ent = next) { if (ent->suspect == next->suspect && - ent->s_lno + ent->num_lines == next->s_lno) { + ent->s_lno + ent->num_lines == next->s_lno && + ent->ignored == next->ignored && + ent->unblamable == next->unblamable) { ent->num_lines += next->num_lines; ent->next = next->next; blame_origin_decref(next->suspect); @@@ -1232,7 -532,7 +1232,7 @@@ static int fill_blob_sha1_and_mode(stru { if (!is_null_oid(&origin->blob_oid)) return 0; - if (get_tree_entry(&origin->commit->object.oid, origin->path, &origin->blob_oid, &origin->mode)) + if (get_tree_entry(r, &origin->commit->object.oid, origin->path, &origin->blob_oid, &origin->mode)) goto error_out; if (oid_object_info(r, &origin->blob_oid, NULL) != OBJ_BLOB) goto error_out; @@@ -1430,14 -730,8 +1430,14 @@@ static void split_overlap(struct blame_ struct blame_origin *parent) { int chunk_end_lno; + int i; memset(split, 0, sizeof(struct blame_entry [3])); + for (i = 0; i < 3; i++) { + split[i].ignored = e->ignored; + split[i].unblamable = e->unblamable; + } + if (e->s_lno < tlno) { /* there is a pre-chunk part not blamed on parent */ split[0].suspect = blame_origin_incref(e->suspect); @@@ -1545,164 -839,6 +1545,164 @@@ static struct blame_entry *reverse_blam return tail; } +/* + * Splits a blame entry into two entries at 'len' lines. The original 'e' + * consists of len lines, i.e. [e->lno, e->lno + len), and the second part, + * which is returned, consists of the remainder: [e->lno + len, e->lno + + * e->num_lines). The caller needs to sort out the reference counting for the + * new entry's suspect. + */ +static struct blame_entry *split_blame_at(struct blame_entry *e, int len, + struct blame_origin *new_suspect) +{ + struct blame_entry *n = xcalloc(1, sizeof(struct blame_entry)); + + n->suspect = new_suspect; + n->ignored = e->ignored; + n->unblamable = e->unblamable; + n->lno = e->lno + len; + n->s_lno = e->s_lno + len; + n->num_lines = e->num_lines - len; + e->num_lines = len; + e->score = 0; + return n; +} + +struct blame_line_tracker { + int is_parent; + int s_lno; +}; + +static int are_lines_adjacent(struct blame_line_tracker *first, + struct blame_line_tracker *second) +{ + return first->is_parent == second->is_parent && + first->s_lno + 1 == second->s_lno; +} + +static int scan_parent_range(struct fingerprint *p_fps, + struct fingerprint *t_fps, int t_idx, + int from, int nr_lines) +{ + int sim, p_idx; + #define FINGERPRINT_FILE_THRESHOLD 10 + int best_sim_val = FINGERPRINT_FILE_THRESHOLD; + int best_sim_idx = -1; + + for (p_idx = from; p_idx < from + nr_lines; p_idx++) { + sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]); + if (sim < best_sim_val) + continue; + /* Break ties with the closest-to-target line number */ + if (sim == best_sim_val && best_sim_idx != -1 && + abs(best_sim_idx - t_idx) < abs(p_idx - t_idx)) + continue; + best_sim_val = sim; + best_sim_idx = p_idx; + } + return best_sim_idx; +} + +/* + * The first pass checks the blame entry (from the target) against the parent's + * diff chunk. If that fails for a line, the second pass tries to match that + * line to any part of parent file. That catches cases where a change was + * broken into two chunks by 'context.' + */ +static void guess_line_blames(struct blame_origin *parent, + struct blame_origin *target, + int tlno, int offset, int same, int parent_len, + struct blame_line_tracker *line_blames) +{ + int i, best_idx, target_idx; + int parent_slno = tlno + offset; + int *fuzzy_matches; + + fuzzy_matches = fuzzy_find_matching_lines(parent, target, + tlno, parent_slno, same, + parent_len); + for (i = 0; i < same - tlno; i++) { + target_idx = tlno + i; + if (fuzzy_matches && fuzzy_matches[i] >= 0) { + best_idx = fuzzy_matches[i]; + } else { + best_idx = scan_parent_range(parent->fingerprints, + target->fingerprints, + target_idx, 0, + parent->num_lines); + } + if (best_idx >= 0) { + line_blames[i].is_parent = 1; + line_blames[i].s_lno = best_idx; + } else { + line_blames[i].is_parent = 0; + line_blames[i].s_lno = target_idx; + } + } + free(fuzzy_matches); +} + +/* + * This decides which parts of a blame entry go to the parent (added to the + * ignoredp list) and which stay with the target (added to the diffp list). The + * actual decision was made in a separate heuristic function, and those answers + * for the lines in 'e' are in line_blames. This consumes e, essentially + * putting it on a list. + * + * Note that the blame entries on the ignoredp list are not necessarily sorted + * with respect to the parent's line numbers yet. + */ +static void ignore_blame_entry(struct blame_entry *e, + struct blame_origin *parent, + struct blame_entry **diffp, + struct blame_entry **ignoredp, + struct blame_line_tracker *line_blames) +{ + int entry_len, nr_lines, i; + + /* + * We carve new entries off the front of e. Each entry comes from a + * contiguous chunk of lines: adjacent lines from the same origin + * (either the parent or the target). + */ + entry_len = 1; + nr_lines = e->num_lines; /* e changes in the loop */ + for (i = 0; i < nr_lines; i++) { + struct blame_entry *next = NULL; + + /* + * We are often adjacent to the next line - only split the blame + * entry when we have to. + */ + if (i + 1 < nr_lines) { + if (are_lines_adjacent(&line_blames[i], + &line_blames[i + 1])) { + entry_len++; + continue; + } + next = split_blame_at(e, entry_len, + blame_origin_incref(e->suspect)); + } + if (line_blames[i].is_parent) { + e->ignored = 1; + blame_origin_decref(e->suspect); + e->suspect = blame_origin_incref(parent); + e->s_lno = line_blames[i - entry_len + 1].s_lno; + e->next = *ignoredp; + *ignoredp = e; + } else { + e->unblamable = 1; + /* e->s_lno is already in the target's address space. */ + e->next = *diffp; + *diffp = e; + } + assert(e->num_lines == entry_len); + e = next; + entry_len = 1; + } + assert(!e); +} + /* * Process one hunk from the patch between the current suspect for * blame_entry e and its parent. This first blames any unfinished @@@ -1712,20 -848,13 +1712,20 @@@ * -C options may lead to overlapping/duplicate source line number * ranges, all we can rely on from sorting/merging is the order of the * first suspect line number. + * + * tlno: line number in the target where this chunk begins + * same: line number in the target where this chunk ends + * offset: add to tlno to get the chunk starting point in the parent + * parent_len: number of lines in the parent chunk */ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq, - int tlno, int offset, int same, - struct blame_origin *parent) + int tlno, int offset, int same, int parent_len, + struct blame_origin *parent, + struct blame_origin *target, int ignore_diffs) { struct blame_entry *e = **srcq; - struct blame_entry *samep = NULL, *diffp = NULL; + struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL; + struct blame_line_tracker *line_blames = NULL; while (e && e->s_lno < tlno) { struct blame_entry *next = e->next; @@@ -1736,9 -865,14 +1736,9 @@@ */ if (e->s_lno + e->num_lines > tlno) { /* Move second half to a new record */ - int len = tlno - e->s_lno; - struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry)); - n->suspect = e->suspect; - n->lno = e->lno + len; - n->s_lno = e->s_lno + len; - n->num_lines = e->num_lines - len; - e->num_lines = len; - e->score = 0; + struct blame_entry *n; + + n = split_blame_at(e, tlno - e->s_lno, e->suspect); /* Push new record to diffp */ n->next = diffp; diffp = n; @@@ -1774,14 -908,6 +1774,14 @@@ */ samep = NULL; diffp = NULL; + + if (ignore_diffs && same - tlno > 0) { + line_blames = xcalloc(sizeof(struct blame_line_tracker), + same - tlno); + guess_line_blames(parent, target, tlno, offset, same, + parent_len, line_blames); + } + while (e && e->s_lno < same) { struct blame_entry *next = e->next; @@@ -1793,37 -919,22 +1793,37 @@@ * Move second half to a new record to be * processed by later chunks */ - int len = same - e->s_lno; - struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry)); - n->suspect = blame_origin_incref(e->suspect); - n->lno = e->lno + len; - n->s_lno = e->s_lno + len; - n->num_lines = e->num_lines - len; - e->num_lines = len; - e->score = 0; + struct blame_entry *n; + + n = split_blame_at(e, same - e->s_lno, + blame_origin_incref(e->suspect)); /* Push new record to samep */ n->next = samep; samep = n; } - e->next = diffp; - diffp = e; + if (ignore_diffs) { + ignore_blame_entry(e, parent, &diffp, &ignoredp, + line_blames + e->s_lno - tlno); + } else { + e->next = diffp; + diffp = e; + } e = next; } + free(line_blames); + if (ignoredp) { + /* + * Note ignoredp is not sorted yet, and thus neither is dstq. + * That list must be sorted before we queue_blames(). We defer + * sorting until after all diff hunks are processed, so that + * guess_line_blames() can pick *any* line in the parent. The + * slight drawback is that we end up sorting all blame entries + * passed to the parent, including those that are unrelated to + * changes made by the ignored commit. + */ + **dstq = reverse_blame(ignoredp, **dstq); + *dstq = &ignoredp->next; + } **srcq = reverse_blame(diffp, reverse_blame(samep, e)); /* Move across elements that are in the unblamable portion */ if (diffp) @@@ -1832,9 -943,7 +1832,9 @@@ struct blame_chunk_cb_data { struct blame_origin *parent; + struct blame_origin *target; long offset; + int ignore_diffs; struct blame_entry **dstq; struct blame_entry **srcq; }; @@@ -1847,8 -956,7 +1847,8 @@@ static int blame_chunk_cb(long start_a if (start_a - start_b != d->offset) die("internal error in blame::blame_chunk_cb"); blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b, - start_b + count_b, d->parent); + start_b + count_b, count_a, d->parent, d->target, + d->ignore_diffs); d->offset = start_a + count_a - (start_b + count_b); return 0; } @@@ -1860,7 -968,7 +1860,7 @@@ */ static void pass_blame_to_parent(struct blame_scoreboard *sb, struct blame_origin *target, - struct blame_origin *parent) + struct blame_origin *parent, int ignore_diffs) { mmfile_t file_p, file_o; struct blame_chunk_cb_data d; @@@ -1870,15 -978,11 +1870,15 @@@ return; /* nothing remains for this target */ d.parent = parent; + d.target = target; d.offset = 0; + d.ignore_diffs = ignore_diffs; d.dstq = &newdest; d.srcq = &target->suspects; - fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob); - fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob); + fill_origin_blob(&sb->revs->diffopt, parent, &file_p, + &sb->num_read_blob, ignore_diffs); + fill_origin_blob(&sb->revs->diffopt, target, &file_o, + &sb->num_read_blob, ignore_diffs); sb->num_get_patch++; if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts)) @@@ -1886,13 -990,8 +1886,13 @@@ oid_to_hex(&parent->commit->object.oid), oid_to_hex(&target->commit->object.oid)); /* The rest are the same as the parent */ - blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent); + blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0, + parent, target, 0); *d.dstq = NULL; + if (ignore_diffs) + newdest = llist_mergesort(newdest, get_next_blame, + set_next_blame, + compare_blame_suspect); queue_blames(sb, parent, newdest); return; @@@ -2089,8 -1188,7 +2089,8 @@@ static void find_move_in_parent(struct if (!unblamed) return; /* nothing remains for this target */ - fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob); + fill_origin_blob(&sb->revs->diffopt, parent, &file_p, + &sb->num_read_blob, 0); if (!file_p.ptr) return; @@@ -2219,8 -1317,7 +2219,8 @@@ static void find_copy_in_parent(struct norigin = get_origin(parent, p->one->path); oidcpy(&norigin->blob_oid, &p->one->oid); norigin->mode = p->one->mode; - fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob); + fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, + &sb->num_read_blob, 0); if (!file_p.ptr) continue; @@@ -2398,34 -1495,11 +2398,34 @@@ static void pass_blame(struct blame_sco blame_origin_incref(porigin); origin->previous = porigin; } - pass_blame_to_parent(sb, origin, porigin); + pass_blame_to_parent(sb, origin, porigin, 0); if (!origin->suspects) goto finish; } + /* + * Pass remaining suspects for ignored commits to their parents. + */ + if (oidset_contains(&sb->ignore_list, &commit->object.oid)) { + for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse); + i < num_sg && sg; + sg = sg->next, i++) { + struct blame_origin *porigin = sg_origin[i]; + + if (!porigin) + continue; + pass_blame_to_parent(sb, origin, porigin, 1); + /* + * Preemptively drop porigin so we can refresh the + * fingerprints if we use the parent again, which can + * occur if you ignore back-to-back commits. + */ + drop_origin_blob(porigin); + if (!origin->suspects) + goto finish; + } + } + /* * Optionally find moves in parents' files. */ @@@ -2566,14 -1640,37 +2566,14 @@@ void assign_blame(struct blame_scoreboa } } -static const char *get_next_line(const char *start, const char *end) -{ - const char *nl = memchr(start, '\n', end - start); - return nl ? nl + 1 : end; -} - /* * To allow quick access to the contents of nth line in the * final image, prepare an index in the scoreboard. */ static int prepare_lines(struct blame_scoreboard *sb) { - const char *buf = sb->final_buf; - unsigned long len = sb->final_buf_size; - const char *end = buf + len; - const char *p; - int *lineno; - int num = 0; - - for (p = buf; p < end; p = get_next_line(p, end)) - num++; - - ALLOC_ARRAY(sb->lineno, num + 1); - lineno = sb->lineno; - - for (p = buf; p < end; p = get_next_line(p, end)) - *lineno++ = p - buf; - - *lineno = len; - - sb->num_lines = num; + sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf, + sb->final_buf_size); return sb->num_lines; } diff --combined builtin/pack-objects.c index 000dc4b872,a030c24a4a..267c562b1f --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -606,12 -606,12 +606,12 @@@ static int mark_tagged(const char *path void *cb_data) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid->hash, NULL); + struct object_entry *entry = packlist_find(&to_pack, oid, NULL); if (entry) entry->tagged = 1; if (!peel_ref(path, &peeled)) { - entry = packlist_find(&to_pack, peeled.hash, NULL); + entry = packlist_find(&to_pack, &peeled, NULL); if (entry) entry->tagged = 1; } @@@ -996,7 -996,7 +996,7 @@@ static int have_duplicate_entry(const s { struct object_entry *entry; - entry = packlist_find(&to_pack, oid->hash, index_pos); + entry = packlist_find(&to_pack, oid, index_pos); if (!entry) return 0; @@@ -1428,7 -1428,8 +1428,8 @@@ static void add_preferred_base(struct o if (window <= num_preferred_base++) return; - data = read_object_with_reference(oid, tree_type, &size, &tree_oid); + data = read_object_with_reference(the_repository, oid, + tree_type, &size, &tree_oid); if (!data) return; @@@ -1494,13 -1495,11 +1495,13 @@@ static int can_reuse_delta(const unsign if (!base_sha1) return 0; + oidread(&base_oid, base_sha1); + /* * First see if we're already sending the base (or it's explicitly in * our "excluded" list). */ - base = packlist_find(&to_pack, base_sha1, NULL); + base = packlist_find(&to_pack, &base_oid, NULL); if (base) { if (!in_same_island(&delta->idx.oid, &base->idx.oid)) return 0; @@@ -1513,6 -1512,7 +1514,6 @@@ * even if it was buried too deep in history to make it into the * packing list. */ - oidread(&base_oid, base_sha1); if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) { if (use_delta_islands) { if (!in_same_island(&delta->idx.oid, &base_oid)) @@@ -2572,7 -2572,7 +2573,7 @@@ static void add_tag_chain(const struct * it was included via bitmaps, we would not have parsed it * previously). */ - if (packlist_find(&to_pack, oid->hash, NULL)) + if (packlist_find(&to_pack, oid, NULL)) return; tag = lookup_tag(the_repository, oid); @@@ -2596,7 -2596,7 +2597,7 @@@ static int add_ref_tag(const char *path if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, &peeled) && /* peelable? */ - packlist_find(&to_pack, peeled.hash, NULL)) /* object packed? */ + packlist_find(&to_pack, &peeled, NULL)) /* object packed? */ add_tag_chain(oid); return 0; } @@@ -2796,7 -2796,7 +2797,7 @@@ static void show_object(struct object * for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) depth++; - ent = packlist_find(&to_pack, obj->oid.hash, NULL); + ent = packlist_find(&to_pack, &obj->oid, NULL); if (ent && depth > oe_tree_depth(&to_pack, ent)) oe_set_tree_depth(&to_pack, ent, depth); } @@@ -2923,7 -2923,7 +2924,7 @@@ static void add_objects_in_unpacked_pac for (i = 0; i < p->num_objects; i++) { nth_packed_object_oid(&oid, p, i); - o = lookup_unknown_object(oid.hash); + o = lookup_unknown_object(&oid); if (!(o->flags & OBJECT_ADDED)) mark_in_pack_object(o, p, &in_pack); o->flags |= OBJECT_ADDED; @@@ -3027,7 -3027,7 +3028,7 @@@ static void loosen_unused_packed_object for (i = 0; i < p->num_objects; i++) { nth_packed_object_oid(&oid, p, i); - if (!packlist_find(&to_pack, oid.hash, NULL) && + if (!packlist_find(&to_pack, &oid, NULL) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) if (force_object_loose(&oid, p->mtime)) @@@ -3135,7 -3135,7 +3136,7 @@@ static void get_object_list(int ac, con return; if (use_delta_islands) - load_delta_islands(the_repository); + load_delta_islands(the_repository, progress); if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); diff --combined builtin/rebase.c index 89fc4b8153,28490f5f88..95d34223e9 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@@ -738,30 -738,20 +738,30 @@@ static int finish_rebase(struct rebase_ { struct strbuf dir = STRBUF_INIT; const char *argv_gc_auto[] = { "gc", "--auto", NULL }; + int ret = 0; delete_ref(NULL, "REBASE_HEAD", NULL, REF_NO_DEREF); apply_autostash(opts); - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); /* * We ignore errors in 'gc --auto', since the * user should see them. */ run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); - strbuf_addstr(&dir, opts->state_dir); - remove_dir_recursively(&dir, 0); - strbuf_release(&dir); + if (opts->type == REBASE_INTERACTIVE) { + struct replay_opts replay = REPLAY_OPTS_INIT; - return 0; + replay.action = REPLAY_INTERACTIVE_REBASE; + ret = sequencer_remove_state(&replay); + } else { + strbuf_addstr(&dir, opts->state_dir); + if (remove_dir_recursively(&dir, 0)) + ret = error(_("could not remove '%s'"), + opts->state_dir); + strbuf_release(&dir); + } + + return ret; } static struct commit *peel_committish(const char *name) @@@ -850,13 -840,13 +850,13 @@@ static int reset_head(struct object_id goto leave_reset_head; } - if (!reset_hard && !fill_tree_descriptor(&desc[nr++], &head_oid)) { + if (!reset_hard && !fill_tree_descriptor(the_repository, &desc[nr++], &head_oid)) { ret = error(_("failed to find tree of %s"), oid_to_hex(&head_oid)); goto leave_reset_head; } - if (!fill_tree_descriptor(&desc[nr++], oid)) { + if (!fill_tree_descriptor(the_repository, &desc[nr++], oid)) { ret = error(_("failed to find tree of %s"), oid_to_hex(oid)); goto leave_reset_head; } @@@ -1389,7 -1379,6 +1389,7 @@@ int cmd_rebase(int argc, const char **a struct string_list strategy_options = STRING_LIST_INIT_NODUP; struct object_id squash_onto; char *squash_onto_name = NULL; + int reschedule_failed_exec = -1; struct option builtin_rebase_options[] = { OPT_STRING(0, "onto", &options.onto_name, N_("revision"), @@@ -1482,7 -1471,7 +1482,7 @@@ OPT_BOOL(0, "root", &options.root, N_("rebase all reachable commits up to the root(s)")), OPT_BOOL(0, "reschedule-failed-exec", - &options.reschedule_failed_exec, + &reschedule_failed_exec, N_("automatically re-schedule any `exec` that fails")), OPT_END(), }; @@@ -1611,7 -1600,7 +1611,7 @@@ if (reset_head(NULL, "reset", NULL, RESET_HEAD_HARD, NULL, NULL) < 0) die(_("could not discard worktree changes")); - remove_branch_state(the_repository); + remove_branch_state(the_repository, 0); if (read_basic_state(&options)) exit(1); goto run_rebase; @@@ -1631,24 -1620,16 +1631,24 @@@ NULL, NULL) < 0) die(_("could not move back to %s"), oid_to_hex(&options.orig_head)); - remove_branch_state(the_repository); - ret = finish_rebase(&options); + remove_branch_state(the_repository, 0); + ret = !!finish_rebase(&options); goto cleanup; } case ACTION_QUIT: { - strbuf_reset(&buf); - strbuf_addstr(&buf, options.state_dir); - ret = !!remove_dir_recursively(&buf, 0); - if (ret) - die(_("could not remove '%s'"), options.state_dir); + if (options.type == REBASE_INTERACTIVE) { + struct replay_opts replay = REPLAY_OPTS_INIT; + + replay.action = REPLAY_INTERACTIVE_REBASE; + ret = !!sequencer_remove_state(&replay); + } else { + strbuf_reset(&buf); + strbuf_addstr(&buf, options.state_dir); + ret = !!remove_dir_recursively(&buf, 0); + if (ret) + error(_("could not remove '%s'"), + options.state_dir); + } goto cleanup; } case ACTION_EDIT_TODO: @@@ -1797,11 -1778,8 +1797,11 @@@ break; } - if (options.reschedule_failed_exec && !is_interactive(&options)) - die(_("%s requires an interactive rebase"), "--reschedule-failed-exec"); + if (reschedule_failed_exec > 0 && !is_interactive(&options)) + die(_("--reschedule-failed-exec requires " + "--exec or --interactive")); + if (reschedule_failed_exec >= 0) + options.reschedule_failed_exec = reschedule_failed_exec; if (options.git_am_opts.argc) { /* all am options except -q are compatible only with --am */ @@@ -2163,7 -2141,6 +2163,7 @@@ run_rebase ret = !!run_specific_rebase(&options, action); cleanup: + strbuf_release(&buf); strbuf_release(&revisions); free(options.head_name); free(options.gpg_sign_opt); diff --combined builtin/reset.c index c2bb35a4b7,77c38f28c2..fdd572168b --- a/builtin/reset.c +++ b/builtin/reset.c @@@ -79,13 -79,13 +79,13 @@@ static int reset_index(const struct obj struct object_id head_oid; if (get_oid("HEAD", &head_oid)) return error(_("You do not have a valid HEAD.")); - if (!fill_tree_descriptor(desc + nr, &head_oid)) + if (!fill_tree_descriptor(the_repository, desc + nr, &head_oid)) return error(_("Failed to find tree of HEAD.")); nr++; opts.fn = twoway_merge; } - if (!fill_tree_descriptor(desc + nr, oid)) { + if (!fill_tree_descriptor(the_repository, desc + nr, oid)) { error(_("Failed to find tree of %s."), oid_to_hex(oid)); goto out; } @@@ -421,7 -421,7 +421,7 @@@ int cmd_reset(int argc, const char **ar print_new_head_line(lookup_commit_reference(the_repository, &oid)); } if (!pathspec.nr) - remove_branch_state(the_repository); + remove_branch_state(the_repository, 0); return update_ref_status; } diff --combined cache.h index 3167585cab,ddefda2bb6..b1da1ab08f --- a/cache.h +++ b/cache.h @@@ -43,6 -43,30 +43,6 @@@ int git_deflate_end_gently(git_zstream int git_deflate(git_zstream *, int flush); unsigned long git_deflate_bound(git_zstream *, unsigned long); -/* The length in bytes and in hex digits of an object name (SHA-1 value). */ -#define GIT_SHA1_RAWSZ 20 -#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ) -/* The block size of SHA-1. */ -#define GIT_SHA1_BLKSZ 64 - -/* The length in bytes and in hex digits of an object name (SHA-256 value). */ -#define GIT_SHA256_RAWSZ 32 -#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ) -/* The block size of SHA-256. */ -#define GIT_SHA256_BLKSZ 64 - -/* The length in byte and in hex digits of the largest possible hash value. */ -#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ -#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ -/* The largest possible block size for any supported hash. */ -#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ - -struct object_id { - unsigned char hash[GIT_MAX_RAWSZ]; -}; - -#define the_hash_algo the_repository->hash_algo - #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) #define DTYPE(de) ((de)->d_type) #else @@@ -1476,7 -1500,8 +1476,8 @@@ int df_name_compare(const char *name1, int name_compare(const char *name1, size_t len1, const char *name2, size_t len2); int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2); - void *read_object_with_reference(const struct object_id *oid, + void *read_object_with_reference(struct repository *r, + const struct object_id *oid, const char *required_type, unsigned long *size, struct object_id *oid_ret); @@@ -1735,7 -1760,6 +1736,7 @@@ void setup_pager(void) int pager_in_use(void); extern int pager_use_color; int term_columns(void); +void term_clear_line(void); int decimal_width(uintmax_t); int check_pager_config(const char *cmd); void prepare_pager_args(struct child_process *, const char *pager); @@@ -1762,8 -1786,8 +1763,8 @@@ int add_files_to_cache(const char *pref extern int diff_auto_refresh_index; /* match-trees.c */ - void shift_tree(const struct object_id *, const struct object_id *, struct object_id *, int); - void shift_tree_by(const struct object_id *, const struct object_id *, struct object_id *, const char *); + void shift_tree(struct repository *, const struct object_id *, const struct object_id *, struct object_id *, int); + void shift_tree_by(struct repository *, const struct object_id *, const struct object_id *, struct object_id *, const char *); /* * whitespace rules. diff --combined fast-import.c index 6dfdd6801c,3970b50acc..b44d6a467e --- a/fast-import.c +++ b/fast-import.c @@@ -644,7 -644,7 +644,7 @@@ static struct tree_content *grow_tree_c struct tree_content *r = new_tree_content(t->entry_count + amt); r->entry_count = t->entry_count; r->delta_depth = t->delta_depth; - memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0])); + COPY_ARRAY(r->entries, t->entries, t->entry_count); release_tree_content(t); return r; } @@@ -2410,7 -2410,8 +2410,8 @@@ static void note_change_n(const char *p oidcpy(&commit_oid, &commit_oe->idx.oid); } else if (!get_oid(p, &commit_oid)) { unsigned long size; - char *buf = read_object_with_reference(&commit_oid, + char *buf = read_object_with_reference(the_repository, + &commit_oid, commit_type, &size, &commit_oid); if (!buf || size < the_hash_algo->hexsz + 6) @@@ -2482,7 -2483,8 +2483,8 @@@ static void parse_from_existing(struct unsigned long size; char *buf; - buf = read_object_with_reference(&b->oid, commit_type, &size, + buf = read_object_with_reference(the_repository, + &b->oid, commit_type, &size, &b->oid); parse_from_commit(b, buf, size); free(buf); @@@ -2560,7 -2562,8 +2562,8 @@@ static struct hash_list *parse_merge(un oidcpy(&n->oid, &oe->idx.oid); } else if (!get_oid(from, &n->oid)) { unsigned long size; - char *buf = read_object_with_reference(&n->oid, + char *buf = read_object_with_reference(the_repository, + &n->oid, commit_type, &size, &n->oid); if (!buf || size < the_hash_algo->hexsz + 6) diff --combined sequencer.c index 66126e020d,64428ac28f..34ebf8ed94 --- a/sequencer.c +++ b/sequencer.c @@@ -279,7 -279,7 +279,7 @@@ static const char *gpg_sign_opt_quoted( int sequencer_remove_state(struct replay_opts *opts) { struct strbuf buf = STRBUF_INIT; - int i; + int i, ret = 0; if (is_rebase_i(opts) && strbuf_read_file(&buf, rebase_path_refs_to_delete(), 0) > 0) { @@@ -288,10 -288,8 +288,10 @@@ char *eol = strchr(p, '\n'); if (eol) *eol = '\0'; - if (delete_ref("(rebase -i) cleanup", p, NULL, 0) < 0) + if (delete_ref("(rebase -i) cleanup", p, NULL, 0) < 0) { warning(_("could not delete '%s'"), p); + ret = -1; + } if (!eol) break; p = eol + 1; @@@ -307,11 -305,10 +307,11 @@@ strbuf_reset(&buf); strbuf_addstr(&buf, get_dir(opts)); - remove_dir_recursively(&buf, 0); + if (remove_dir_recursively(&buf, 0)) + ret = error(_("could not remove '%s'"), buf.buf); strbuf_release(&buf); - return 0; + return ret; } static const char *action_name(const struct replay_opts *opts) @@@ -2079,18 -2076,6 +2079,18 @@@ const char *todo_item_get_arg(struct to return todo_list->buf.buf + item->arg_offset; } +static int is_command(enum todo_command command, const char **bol) +{ + const char *str = todo_command_info[command].str; + const char nick = todo_command_info[command].c; + const char *p = *bol + 1; + + return skip_prefix(*bol, str, bol) || + ((nick && **bol == nick) && + (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || !*p) && + (*bol = p)); +} + static int parse_insn_line(struct repository *r, struct todo_item *item, const char *buf, const char *bol, char *eol) { @@@ -2112,7 -2097,12 +2112,7 @@@ } for (i = 0; i < TODO_COMMENT; i++) - if (skip_prefix(bol, todo_command_info[i].str, &bol)) { - item->command = i; - break; - } else if ((bol + 1 == eol || bol[1] == ' ') && - *bol == todo_command_info[i].c) { - bol++; + if (is_command(i, &bol)) { item->command = i; break; } @@@ -2180,26 -2170,34 +2180,26 @@@ int sequencer_get_last_command(struct repository *r, enum replay_action *action) { - struct todo_item item; - char *eol; - const char *todo_file; + const char *todo_file, *bol; struct strbuf buf = STRBUF_INIT; - int ret = -1; + int ret = 0; todo_file = git_path_todo_file(); if (strbuf_read_file(&buf, todo_file, 0) < 0) { - if (errno == ENOENT) + if (errno == ENOENT || errno == ENOTDIR) return -1; else return error_errno("unable to open '%s'", todo_file); } - eol = strchrnul(buf.buf, '\n'); - if (buf.buf != eol && eol[-1] == '\r') - eol--; /* strip Carriage Return */ - if (parse_insn_line(r, &item, buf.buf, buf.buf, eol)) - goto fail; - if (item.command == TODO_PICK) + bol = buf.buf + strspn(buf.buf, " \t\r\n"); + if (is_command(TODO_PICK, &bol) && (*bol == ' ' || *bol == '\t')) *action = REPLAY_PICK; - else if (item.command == TODO_REVERT) + else if (is_command(TODO_REVERT, &bol) && + (*bol == ' ' || *bol == '\t')) *action = REPLAY_REVERT; else - goto fail; - - ret = 0; + ret = -1; - fail: strbuf_release(&buf); return ret; @@@ -2313,21 -2311,19 +2313,21 @@@ static int have_finished_the_last_pick( return ret; } -void sequencer_post_commit_cleanup(struct repository *r) +void sequencer_post_commit_cleanup(struct repository *r, int verbose) { struct replay_opts opts = REPLAY_OPTS_INIT; int need_cleanup = 0; if (file_exists(git_path_cherry_pick_head(r))) { - unlink(git_path_cherry_pick_head(r)); + if (!unlink(git_path_cherry_pick_head(r)) && verbose) + warning(_("cancelling a cherry picking in progress")); opts.action = REPLAY_PICK; need_cleanup = 1; } if (file_exists(git_path_revert_head(r))) { - unlink(git_path_revert_head(r)); + if (!unlink(git_path_revert_head(r)) && verbose) + warning(_("cancelling a revert in progress")); opts.action = REPLAY_REVERT; need_cleanup = 1; } @@@ -2654,41 -2650,15 +2654,41 @@@ static int walk_revs_populate_todo(stru return 0; } -static int create_seq_dir(void) +static int create_seq_dir(struct repository *r) { - if (file_exists(git_path_seq_dir())) { - error(_("a cherry-pick or revert is already in progress")); - advise(_("try \"git cherry-pick (--continue | --quit | --abort)\"")); + enum replay_action action; + const char *in_progress_error = NULL; + const char *in_progress_advice = NULL; + unsigned int advise_skip = file_exists(git_path_revert_head(r)) || + file_exists(git_path_cherry_pick_head(r)); + + if (!sequencer_get_last_command(r, &action)) { + switch (action) { + case REPLAY_REVERT: + in_progress_error = _("revert is already in progress"); + in_progress_advice = + _("try \"git revert (--continue | %s--abort | --quit)\""); + break; + case REPLAY_PICK: + in_progress_error = _("cherry-pick is already in progress"); + in_progress_advice = + _("try \"git cherry-pick (--continue | %s--abort | --quit)\""); + break; + default: + BUG("unexpected action in create_seq_dir"); + } + } + if (in_progress_error) { + error("%s", in_progress_error); + if (advice_sequencer_in_use) + advise(in_progress_advice, + advise_skip ? "--skip | " : ""); return -1; - } else if (mkdir(git_path_seq_dir(), 0777) < 0) + } + if (mkdir(git_path_seq_dir(), 0777) < 0) return error_errno(_("could not create sequencer directory '%s'"), git_path_seq_dir()); + return 0; } @@@ -2739,20 -2709,15 +2739,20 @@@ static int rollback_is_safe(void return oideq(&actual_head, &expected_head); } -static int reset_for_rollback(const struct object_id *oid) +static int reset_merge(const struct object_id *oid) { - const char *argv[4]; /* reset --merge + NULL */ + int ret; + struct argv_array argv = ARGV_ARRAY_INIT; - argv[0] = "reset"; - argv[1] = "--merge"; - argv[2] = oid_to_hex(oid); - argv[3] = NULL; - return run_command_v_opt(argv, RUN_GIT_CMD); + argv_array_pushl(&argv, "reset", "--merge", NULL); + + if (!is_null_oid(oid)) + argv_array_push(&argv, oid_to_hex(oid)); + + ret = run_command_v_opt(argv.argv, RUN_GIT_CMD); + argv_array_clear(&argv); + + return ret; } static int rollback_single_pick(struct repository *r) @@@ -2766,16 -2731,7 +2766,16 @@@ return error(_("cannot resolve HEAD")); if (is_null_oid(&head_oid)) return error(_("cannot abort from a branch yet to be born")); - return reset_for_rollback(&head_oid); + return reset_merge(&head_oid); +} + +static int skip_single_pick(void) +{ + struct object_id head; + + if (read_ref_full("HEAD", 0, &head, NULL)) + return error(_("cannot resolve HEAD")); + return reset_merge(&head); } int sequencer_rollback(struct repository *r, struct replay_opts *opts) @@@ -2818,7 -2774,7 +2818,7 @@@ warning(_("You seem to have moved HEAD. " "Not rewinding, check your HEAD!")); } else - if (reset_for_rollback(&oid)) + if (reset_merge(&oid)) goto fail; strbuf_release(&buf); return sequencer_remove_state(opts); @@@ -2827,70 -2783,6 +2827,70 @@@ fail return -1; } +int sequencer_skip(struct repository *r, struct replay_opts *opts) +{ + enum replay_action action = -1; + sequencer_get_last_command(r, &action); + + /* + * Check whether the subcommand requested to skip the commit is actually + * in progress and that it's safe to skip the commit. + * + * opts->action tells us which subcommand requested to skip the commit. + * If the corresponding .git/_HEAD exists, we know that the + * action is in progress and we can skip the commit. + * + * Otherwise we check that the last instruction was related to the + * particular subcommand we're trying to execute and barf if that's not + * the case. + * + * Finally we check that the rollback is "safe", i.e., has the HEAD + * moved? In this case, it doesn't make sense to "reset the merge" and + * "skip the commit" as the user already handled this by committing. But + * we'd not want to barf here, instead give advice on how to proceed. We + * only need to check that when .git/_HEAD doesn't exist because + * it gets removed when the user commits, so if it still exists we're + * sure the user can't have committed before. + */ + switch (opts->action) { + case REPLAY_REVERT: + if (!file_exists(git_path_revert_head(r))) { + if (action != REPLAY_REVERT) + return error(_("no revert in progress")); + if (!rollback_is_safe()) + goto give_advice; + } + break; + case REPLAY_PICK: + if (!file_exists(git_path_cherry_pick_head(r))) { + if (action != REPLAY_PICK) + return error(_("no cherry-pick in progress")); + if (!rollback_is_safe()) + goto give_advice; + } + break; + default: + BUG("unexpected action in sequencer_skip"); + } + + if (skip_single_pick()) + return error(_("failed to skip the commit")); + if (!is_directory(git_path_seq_dir())) + return 0; + + return sequencer_continue(r, opts); + +give_advice: + error(_("there is nothing to skip")); + + if (advice_resolve_conflict) { + advise(_("have you committed already?\n" + "try \"git %s --continue\""), + action == REPLAY_REVERT ? "revert" : "cherry-pick"); + } + return -1; +} + static int save_todo(struct todo_list *todo_list, struct replay_opts *opts) { struct lock_file todo_lock = LOCK_INIT; @@@ -3302,7 -3194,7 +3302,7 @@@ static int do_reset(struct repository * return error_resolve_conflict(_(action_name(opts))); } - if (!fill_tree_descriptor(&desc, &oid)) { + if (!fill_tree_descriptor(r, &desc, &oid)) { error(_("failed to find tree of %s"), oid_to_hex(&oid)); rollback_lock_file(&lock); free((void *)desc.buffer); @@@ -3841,14 -3733,11 +3841,14 @@@ static int pick_commits(struct reposito unlink(rebase_path_author_script()); unlink(rebase_path_stopped_sha()); unlink(rebase_path_amend()); - unlink(git_path_merge_head(the_repository)); + unlink(git_path_merge_head(r)); delete_ref(NULL, "REBASE_HEAD", NULL, REF_NO_DEREF); - if (item->command == TODO_BREAK) + if (item->command == TODO_BREAK) { + if (!opts->verbose) + term_clear_line(); return stopped_at_head(r); + } } if (item->command <= TODO_SQUASH) { if (is_rebase_i(opts)) @@@ -3870,14 -3759,11 +3870,14 @@@ } if (item->command == TODO_EDIT) { struct commit *commit = item->commit; - if (!res) + if (!res) { + if (!opts->verbose) + term_clear_line(); fprintf(stderr, _("Stopped at %s... %.*s\n"), short_commit_name(commit), item->arg_len, arg); + } return error_with_patch(r, commit, arg, item->arg_len, opts, res, !res); } @@@ -3915,8 -3801,6 +3915,8 @@@ int saved = *end_of_arg; struct stat st; + if (!opts->verbose) + term_clear_line(); *end_of_arg = '\0'; res = do_exec(r, arg); *end_of_arg = saved; @@@ -4075,13 -3959,10 +4075,13 @@@ cleanup_head_ref } apply_autostash(opts); - if (!opts->quiet) + if (!opts->quiet) { + if (!opts->verbose) + term_clear_line(); fprintf(stderr, "Successfully rebased and updated %s.\n", head_ref.buf); + } strbuf_release(&buf); strbuf_release(&head_ref); @@@ -4226,7 -4107,7 +4226,7 @@@ static int commit_staged_changes(struc opts, flags)) return error(_("could not commit staged changes.")); unlink(rebase_path_amend()); - unlink(git_path_merge_head(the_repository)); + unlink(git_path_merge_head(r)); if (final_fixup) { unlink(rebase_path_fixup_msg()); unlink(rebase_path_squash_msg()); @@@ -4361,7 -4242,7 +4361,7 @@@ int sequencer_pick_revisions(struct rep */ if (walk_revs_populate_todo(&todo_list, opts) || - create_seq_dir() < 0) + create_seq_dir(r) < 0) return -1; if (get_oid("HEAD", &oid) && (opts->action == REPLAY_REVERT)) return error(_("can't revert as initial commit")); diff --combined sha1-name.c index 49855ad24f,6069fe006b..2989e27b71 --- a/sha1-name.c +++ b/sha1-name.c @@@ -478,7 -478,7 +478,7 @@@ static enum get_oid_result get_short_oi * or migrated from loose to packed. */ if (status == MISSING_OBJECT) { - reprepare_packed_git(the_repository); + reprepare_packed_git(r); find_short_object_filename(&ds); find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, oid); @@@ -801,7 -801,7 +801,7 @@@ static int get_oid_basic(struct reposit "because it will be ignored when you just specify 40-hex. These refs\n" "may be created by mistake. For example,\n" "\n" - " git checkout -b $br $(git rev-parse ...)\n" + " git switch -c $br $(git rev-parse ...)\n" "\n" "where \"$br\" is somehow empty and a 40-hex ref is created. Please\n" "examine these refs and maybe delete them. Turn this message off by\n" @@@ -1389,9 -1389,7 +1389,7 @@@ int repo_get_oid_mb(struct repository * two = lookup_commit_reference_gently(r, &oid_tmp, 0); if (!two) return -1; - if (r != the_repository) - BUG("sorry get_merge_bases() can't take struct repository yet"); - mbs = get_merge_bases(one, two); + mbs = repo_get_merge_bases(r, one, two); if (!mbs || mbs->next) st = -1; else { @@@ -1677,7 -1675,8 +1675,8 @@@ int repo_get_oid_blob(struct repositor } /* Must be called only when object_name:filename doesn't exist. */ - static void diagnose_invalid_oid_path(const char *prefix, + static void diagnose_invalid_oid_path(struct repository *r, + const char *prefix, const char *filename, const struct object_id *tree_oid, const char *object_name, @@@ -1695,7 -1694,7 +1694,7 @@@ if (is_missing_file_error(errno)) { char *fullname = xstrfmt("%s%s", prefix, filename); - if (!get_tree_entry(tree_oid, fullname, &oid, &mode)) { + if (!get_tree_entry(r, tree_oid, fullname, &oid, &mode)) { die("Path '%s' exists, but not '%s'.\n" "Did you mean '%.*s:%s' aka '%.*s:./%s'?", fullname, @@@ -1889,23 -1888,15 +1888,15 @@@ static enum get_oid_result get_oid_with new_filename = resolve_relative_path(repo, filename); if (new_filename) filename = new_filename; - /* - * NEEDSWORK: Eventually get_tree_entry*() should - * learn to take struct repository directly and we - * would not need to inject submodule odb to the - * in-core odb. - */ - if (repo != the_repository) - add_to_alternates_memory(repo->objects->odb->path); if (flags & GET_OID_FOLLOW_SYMLINKS) { - ret = get_tree_entry_follow_symlinks(&tree_oid, + ret = get_tree_entry_follow_symlinks(repo, &tree_oid, filename, oid, &oc->symlink_path, &oc->mode); } else { - ret = get_tree_entry(&tree_oid, filename, oid, + ret = get_tree_entry(repo, &tree_oid, filename, oid, &oc->mode); if (ret && only_to_die) { - diagnose_invalid_oid_path(prefix, + diagnose_invalid_oid_path(repo, prefix, filename, &tree_oid, name, len); diff --combined unpack-trees.c index dab713203e,cfe1c5ec6f..62276d4fef --- a/unpack-trees.c +++ b/unpack-trees.c @@@ -315,7 -315,7 +315,7 @@@ static struct progress *get_progress(st total++; } - return start_delayed_progress(_("Checking out files"), total); + return start_delayed_progress(_("Updating files"), total); } static void setup_collided_checkout_detection(struct checkout *state, @@@ -840,7 -840,7 +840,7 @@@ static int traverse_trees_recursive(in const struct object_id *oid = NULL; if (dirmask & 1) oid = &names[i].oid; - buf[nr_buf++] = fill_tree_descriptor(t + i, oid); + buf[nr_buf++] = fill_tree_descriptor(the_repository, t + i, oid); } }