From: Junio C Hamano <gitster@pobox.com>
Date: Fri, 19 Jul 2019 18:30:20 +0000 (-0700)
Subject: Merge branch 'br/blame-ignore'
X-Git-Tag: v2.23.0-rc0~39
X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/209f0755934a0c9b448408f9b7c9849c15041ecc?hp=-c

Merge branch 'br/blame-ignore'

"git blame" learned to "ignore" commits in the history, whose
effects (as well as their presence) get ignored.

* br/blame-ignore:
t8014: remove unnecessary braces
blame: drop some unused function parameters
blame: add a test to cover blame_coalesce()
blame: use the fingerprint heuristic to match ignored lines
blame: add a fingerprint heuristic to match ignored lines
blame: optionally track line fingerprints during fill_blame_origin()
blame: add config options for the output of ignored or unblamable lines
blame: add the ability to ignore commits and their changes
blame: use a helper function in blame_chunk()
Move oidset_parse_file() to oidset.c
fsck: rename and touch up init_skiplist()
---

209f0755934a0c9b448408f9b7c9849c15041ecc
diff --combined blame.c
index 145eaf2faf,acb1bf7f7a..7f04580ad5
--- a/blame.c
+++ b/blame.c
@@@ -99,7 -99,7 +99,7 @@@ static void verify_working_tree_path(st
  	for (parents = work_tree->parents; parents; parents = parents->next) {
  		const struct object_id *commit_oid = &parents->item->object.oid;
  		struct object_id blob_oid;
 -		unsigned mode;
 +		unsigned short mode;
  
  		if (!get_tree_entry(commit_oid, path, &blob_oid, &mode) &&
  		    oid_object_info(r, &blob_oid, NULL) == OBJ_BLOB)
@@@ -188,7 -188,7 +188,7 @@@ static struct commit *fake_working_tree
  	unsigned mode;
  	struct strbuf msg = STRBUF_INIT;
  
 -	read_index(r->index);
 +	repo_read_index(r);
  	time(&now);
  	commit = alloc_commit_node(r);
  	commit->object.parsed = 1;
@@@ -204,8 -204,7 +204,8 @@@
  
  	origin = make_origin(commit, path);
  
 -	ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
 +	ident = fmt_ident("Not Committed Yet", "not.committed.yet",
 +			WANT_BLANK_IDENT, NULL, 0);
  	strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n");
  	for (parent = commit->parents; parent; parent = parent->next)
  		strbuf_addf(&msg, "parent %s\n",
@@@ -271,7 -270,7 +271,7 @@@
  	 * want to run "diff-index --cached".
  	 */
  	discard_index(r->index);
 -	read_index(r->index);
 +	repo_read_index(r);
  
  	len = strlen(path);
  	if (!mode) {
@@@ -311,12 -310,707 +311,707 @@@ static int diff_hunks(mmfile_t *file_a
  	return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
  }
  
+ static const char *get_next_line(const char *start, const char *end)
+ {
+ 	const char *nl = memchr(start, '\n', end - start);
+ 
+ 	return nl ? nl + 1 : end;
+ }
+ 
+ static int find_line_starts(int **line_starts, const char *buf,
+ 			    unsigned long len)
+ {
+ 	const char *end = buf + len;
+ 	const char *p;
+ 	int *lineno;
+ 	int num = 0;
+ 
+ 	for (p = buf; p < end; p = get_next_line(p, end))
+ 		num++;
+ 
+ 	ALLOC_ARRAY(*line_starts, num + 1);
+ 	lineno = *line_starts;
+ 
+ 	for (p = buf; p < end; p = get_next_line(p, end))
+ 		*lineno++ = p - buf;
+ 
+ 	*lineno = len;
+ 
+ 	return num;
+ }
+ 
+ struct fingerprint_entry;
+ 
+ /* A fingerprint is intended to loosely represent a string, such that two
+  * fingerprints can be quickly compared to give an indication of the similarity
+  * of the strings that they represent.
+  *
+  * A fingerprint is represented as a multiset of the lower-cased byte pairs in
+  * the string that it represents. Whitespace is added at each end of the
+  * string. Whitespace pairs are ignored. Whitespace is converted to '\0'.
+  * For example, the string "Darth   Radar" will be converted to the following
+  * fingerprint:
+  * {"\0d", "da", "da", "ar", "ar", "rt", "th", "h\0", "\0r", "ra", "ad", "r\0"}
+  *
+  * The similarity between two fingerprints is the size of the intersection of
+  * their multisets, including repeated elements. See fingerprint_similarity for
+  * examples.
+  *
+  * For ease of implementation, the fingerprint is implemented as a map
+  * of byte pairs to the count of that byte pair in the string, instead of
+  * allowing repeated elements in a set.
+  */
+ struct fingerprint {
+ 	struct hashmap map;
+ 	/* As we know the maximum number of entries in advance, it's
+ 	 * convenient to store the entries in a single array instead of having
+ 	 * the hashmap manage the memory.
+ 	 */
+ 	struct fingerprint_entry *entries;
+ };
+ 
+ /* A byte pair in a fingerprint. Stores the number of times the byte pair
+  * occurs in the string that the fingerprint represents.
+  */
+ struct fingerprint_entry {
+ 	/* The hashmap entry - the hash represents the byte pair in its
+ 	 * entirety so we don't need to store the byte pair separately.
+ 	 */
+ 	struct hashmap_entry entry;
+ 	/* The number of times the byte pair occurs in the string that the
+ 	 * fingerprint represents.
+ 	 */
+ 	int count;
+ };
+ 
+ /* See `struct fingerprint` for an explanation of what a fingerprint is.
+  * \param result the fingerprint of the string is stored here. This must be
+  * 		 freed later using free_fingerprint.
+  * \param line_begin the start of the string
+  * \param line_end the end of the string
+  */
+ static void get_fingerprint(struct fingerprint *result,
+ 			    const char *line_begin,
+ 			    const char *line_end)
+ {
+ 	unsigned int hash, c0 = 0, c1;
+ 	const char *p;
+ 	int max_map_entry_count = 1 + line_end - line_begin;
+ 	struct fingerprint_entry *entry = xcalloc(max_map_entry_count,
+ 		sizeof(struct fingerprint_entry));
+ 	struct fingerprint_entry *found_entry;
+ 
+ 	hashmap_init(&result->map, NULL, NULL, max_map_entry_count);
+ 	result->entries = entry;
+ 	for (p = line_begin; p <= line_end; ++p, c0 = c1) {
+ 		/* Always terminate the string with whitespace.
+ 		 * Normalise whitespace to 0, and normalise letters to
+ 		 * lower case. This won't work for multibyte characters but at
+ 		 * worst will match some unrelated characters.
+ 		 */
+ 		if ((p == line_end) || isspace(*p))
+ 			c1 = 0;
+ 		else
+ 			c1 = tolower(*p);
+ 		hash = c0 | (c1 << 8);
+ 		/* Ignore whitespace pairs */
+ 		if (hash == 0)
+ 			continue;
+ 		hashmap_entry_init(entry, hash);
+ 
+ 		found_entry = hashmap_get(&result->map, entry, NULL);
+ 		if (found_entry) {
+ 			found_entry->count += 1;
+ 		} else {
+ 			entry->count = 1;
+ 			hashmap_add(&result->map, entry);
+ 			++entry;
+ 		}
+ 	}
+ }
+ 
+ static void free_fingerprint(struct fingerprint *f)
+ {
+ 	hashmap_free(&f->map, 0);
+ 	free(f->entries);
+ }
+ 
+ /* Calculates the similarity between two fingerprints as the size of the
+  * intersection of their multisets, including repeated elements. See
+  * `struct fingerprint` for an explanation of the fingerprint representation.
+  * The similarity between "cat mat" and "father rather" is 2 because "at" is
+  * present twice in both strings while the similarity between "tim" and "mit"
+  * is 0.
+  */
+ static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b)
+ {
+ 	int intersection = 0;
+ 	struct hashmap_iter iter;
+ 	const struct fingerprint_entry *entry_a, *entry_b;
+ 
+ 	hashmap_iter_init(&b->map, &iter);
+ 
+ 	while ((entry_b = hashmap_iter_next(&iter))) {
+ 		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+ 			intersection += entry_a->count < entry_b->count ?
+ 					entry_a->count : entry_b->count;
+ 		}
+ 	}
+ 	return intersection;
+ }
+ 
+ /* Subtracts byte-pair elements in B from A, modifying A in place.
+  */
+ static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b)
+ {
+ 	struct hashmap_iter iter;
+ 	struct fingerprint_entry *entry_a;
+ 	const struct fingerprint_entry *entry_b;
+ 
+ 	hashmap_iter_init(&b->map, &iter);
+ 
+ 	while ((entry_b = hashmap_iter_next(&iter))) {
+ 		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+ 			if (entry_a->count <= entry_b->count)
+ 				hashmap_remove(&a->map, entry_b, NULL);
+ 			else
+ 				entry_a->count -= entry_b->count;
+ 		}
+ 	}
+ }
+ 
+ /* Calculate fingerprints for a series of lines.
+  * Puts the fingerprints in the fingerprints array, which must have been
+  * preallocated to allow storing line_count elements.
+  */
+ static void get_line_fingerprints(struct fingerprint *fingerprints,
+ 				  const char *content, const int *line_starts,
+ 				  long first_line, long line_count)
+ {
+ 	int i;
+ 	const char *linestart, *lineend;
+ 
+ 	line_starts += first_line;
+ 	for (i = 0; i < line_count; ++i) {
+ 		linestart = content + line_starts[i];
+ 		lineend = content + line_starts[i + 1];
+ 		get_fingerprint(fingerprints + i, linestart, lineend);
+ 	}
+ }
+ 
+ static void free_line_fingerprints(struct fingerprint *fingerprints,
+ 				   int nr_fingerprints)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < nr_fingerprints; i++)
+ 		free_fingerprint(&fingerprints[i]);
+ }
+ 
+ /* This contains the data necessary to linearly map a line number in one half
+  * of a diff chunk to the line in the other half of the diff chunk that is
+  * closest in terms of its position as a fraction of the length of the chunk.
+  */
+ struct line_number_mapping {
+ 	int destination_start, destination_length,
+ 		source_start, source_length;
+ };
+ 
+ /* Given a line number in one range, offset and scale it to map it onto the
+  * other range.
+  * Essentially this mapping is a simple linear equation but the calculation is
+  * more complicated to allow performing it with integer operations.
+  * Another complication is that if a line could map onto many lines in the
+  * destination range then we want to choose the line at the center of those
+  * possibilities.
+  * Example: if the chunk is 2 lines long in A and 10 lines long in B then the
+  * first 5 lines in B will map onto the first line in the A chunk, while the
+  * last 5 lines will all map onto the second line in the A chunk.
+  * Example: if the chunk is 10 lines long in A and 2 lines long in B then line
+  * 0 in B will map onto line 2 in A, and line 1 in B will map onto line 7 in A.
+  */
+ static int map_line_number(int line_number,
+ 	const struct line_number_mapping *mapping)
+ {
+ 	return ((line_number - mapping->source_start) * 2 + 1) *
+ 	       mapping->destination_length /
+ 	       (mapping->source_length * 2) +
+ 	       mapping->destination_start;
+ }
+ 
+ /* Get a pointer to the element storing the similarity between a line in A
+  * and a line in B.
+  *
+  * The similarities are stored in a 2-dimensional array. Each "row" in the
+  * array contains the similarities for a line in B. The similarities stored in
+  * a row are the similarities between the line in B and the nearby lines in A.
+  * To keep the length of each row the same, it is padded out with values of -1
+  * where the search range extends beyond the lines in A.
+  * For example, if max_search_distance_a is 2 and the two sides of a diff chunk
+  * look like this:
+  * a | m
+  * b | n
+  * c | o
+  * d | p
+  * e | q
+  * Then the similarity array will contain:
+  * [-1, -1, am, bm, cm,
+  *  -1, an, bn, cn, dn,
+  *  ao, bo, co, do, eo,
+  *  bp, cp, dp, ep, -1,
+  *  cq, dq, eq, -1, -1]
+  * Where similarities are denoted either by -1 for invalid, or the
+  * concatenation of the two lines in the diff being compared.
+  *
+  * \param similarities array of similarities between lines in A and B
+  * \param line_a the index of the line in A, in the same frame of reference as
+  *	closest_line_a.
+  * \param local_line_b the index of the line in B, relative to the first line
+  *		       in B that similarities represents.
+  * \param closest_line_a the index of the line in A that is deemed to be
+  *			 closest to local_line_b. This must be in the same
+  *			 frame of reference as line_a. This value defines
+  *			 where similarities is centered for the line in B.
+  * \param max_search_distance_a maximum distance in lines from the closest line
+  * 				in A for other lines in A for which
+  * 				similarities may be calculated.
+  */
+ static int *get_similarity(int *similarities,
+ 			   int line_a, int local_line_b,
+ 			   int closest_line_a, int max_search_distance_a)
+ {
+ 	assert(abs(line_a - closest_line_a) <=
+ 	       max_search_distance_a);
+ 	return similarities + line_a - closest_line_a +
+ 	       max_search_distance_a +
+ 	       local_line_b * (max_search_distance_a * 2 + 1);
+ }
+ 
+ #define CERTAIN_NOTHING_MATCHES -2
+ #define CERTAINTY_NOT_CALCULATED -1
+ 
+ /* Given a line in B, first calculate its similarities with nearby lines in A
+  * if not already calculated, then identify the most similar and second most
+  * similar lines. The "certainty" is calculated based on those two
+  * similarities.
+  *
+  * \param start_a the index of the first line of the chunk in A
+  * \param length_a the length in lines of the chunk in A
+  * \param local_line_b the index of the line in B, relative to the first line
+  * 		       in the chunk.
+  * \param fingerprints_a array of fingerprints for the chunk in A
+  * \param fingerprints_b array of fingerprints for the chunk in B
+  * \param similarities 2-dimensional array of similarities between lines in A
+  * 		       and B. See get_similarity() for more details.
+  * \param certainties array of values indicating how strongly a line in B is
+  * 		      matched with some line in A.
+  * \param second_best_result array of absolute indices in A for the second
+  * 			     closest match of a line in B.
+  * \param result array of absolute indices in A for the closest match of a line
+  * 		 in B.
+  * \param max_search_distance_a maximum distance in lines from the closest line
+  * 				in A for other lines in A for which
+  * 				similarities may be calculated.
+  * \param map_line_number_in_b_to_a parameter to map_line_number().
+  */
+ static void find_best_line_matches(
+ 	int start_a,
+ 	int length_a,
+ 	int start_b,
+ 	int local_line_b,
+ 	struct fingerprint *fingerprints_a,
+ 	struct fingerprint *fingerprints_b,
+ 	int *similarities,
+ 	int *certainties,
+ 	int *second_best_result,
+ 	int *result,
+ 	const int max_search_distance_a,
+ 	const struct line_number_mapping *map_line_number_in_b_to_a)
+ {
+ 
+ 	int i, search_start, search_end, closest_local_line_a, *similarity,
+ 		best_similarity = 0, second_best_similarity = 0,
+ 		best_similarity_index = 0, second_best_similarity_index = 0;
+ 
+ 	/* certainty has already been calculated so no need to redo the work */
+ 	if (certainties[local_line_b] != CERTAINTY_NOT_CALCULATED)
+ 		return;
+ 
+ 	closest_local_line_a = map_line_number(
+ 		local_line_b + start_b, map_line_number_in_b_to_a) - start_a;
+ 
+ 	search_start = closest_local_line_a - max_search_distance_a;
+ 	if (search_start < 0)
+ 		search_start = 0;
+ 
+ 	search_end = closest_local_line_a + max_search_distance_a + 1;
+ 	if (search_end > length_a)
+ 		search_end = length_a;
+ 
+ 	for (i = search_start; i < search_end; ++i) {
+ 		similarity = get_similarity(similarities,
+ 					    i, local_line_b,
+ 					    closest_local_line_a,
+ 					    max_search_distance_a);
+ 		if (*similarity == -1) {
+ 			/* This value will never exceed 10 but assert just in
+ 			 * case
+ 			 */
+ 			assert(abs(i - closest_local_line_a) < 1000);
+ 			/* scale the similarity by (1000 - distance from
+ 			 * closest line) to act as a tie break between lines
+ 			 * that otherwise are equally similar.
+ 			 */
+ 			*similarity = fingerprint_similarity(
+ 				fingerprints_b + local_line_b,
+ 				fingerprints_a + i) *
+ 				(1000 - abs(i - closest_local_line_a));
+ 		}
+ 		if (*similarity > best_similarity) {
+ 			second_best_similarity = best_similarity;
+ 			second_best_similarity_index = best_similarity_index;
+ 			best_similarity = *similarity;
+ 			best_similarity_index = i;
+ 		} else if (*similarity > second_best_similarity) {
+ 			second_best_similarity = *similarity;
+ 			second_best_similarity_index = i;
+ 		}
+ 	}
+ 
+ 	if (best_similarity == 0) {
+ 		/* this line definitely doesn't match with anything. Mark it
+ 		 * with this special value so it doesn't get invalidated and
+ 		 * won't be recalculated.
+ 		 */
+ 		certainties[local_line_b] = CERTAIN_NOTHING_MATCHES;
+ 		result[local_line_b] = -1;
+ 	} else {
+ 		/* Calculate the certainty with which this line matches.
+ 		 * If the line matches well with two lines then that reduces
+ 		 * the certainty. However we still want to prioritise matching
+ 		 * a line that matches very well with two lines over matching a
+ 		 * line that matches poorly with one line, hence doubling
+ 		 * best_similarity.
+ 		 * This means that if we have
+ 		 * line X that matches only one line with a score of 3,
+ 		 * line Y that matches two lines equally with a score of 5,
+ 		 * and line Z that matches only one line with a score or 2,
+ 		 * then the lines in order of certainty are X, Y, Z.
+ 		 */
+ 		certainties[local_line_b] = best_similarity * 2 -
+ 			second_best_similarity;
+ 
+ 		/* We keep both the best and second best results to allow us to
+ 		 * check at a later stage of the matching process whether the
+ 		 * result needs to be invalidated.
+ 		 */
+ 		result[local_line_b] = start_a + best_similarity_index;
+ 		second_best_result[local_line_b] =
+ 			start_a + second_best_similarity_index;
+ 	}
+ }
+ 
+ /*
+  * This finds the line that we can match with the most confidence, and
+  * uses it as a partition. It then calls itself on the lines on either side of
+  * that partition. In this way we avoid lines appearing out of order, and
+  * retain a sensible line ordering.
+  * \param start_a index of the first line in A with which lines in B may be
+  * 		  compared.
+  * \param start_b index of the first line in B for which matching should be
+  * 		  done.
+  * \param length_a number of lines in A with which lines in B may be compared.
+  * \param length_b number of lines in B for which matching should be done.
+  * \param fingerprints_a mutable array of fingerprints in A. The first element
+  * 			 corresponds to the line at start_a.
+  * \param fingerprints_b array of fingerprints in B. The first element
+  * 			 corresponds to the line at start_b.
+  * \param similarities 2-dimensional array of similarities between lines in A
+  * 		       and B. See get_similarity() for more details.
+  * \param certainties array of values indicating how strongly a line in B is
+  * 		      matched with some line in A.
+  * \param second_best_result array of absolute indices in A for the second
+  * 			     closest match of a line in B.
+  * \param result array of absolute indices in A for the closest match of a line
+  * 		 in B.
+  * \param max_search_distance_a maximum distance in lines from the closest line
+  * 			      in A for other lines in A for which
+  * 			      similarities may be calculated.
+  * \param max_search_distance_b an upper bound on the greatest possible
+  * 			      distance between lines in B such that they will
+  *                              both be compared with the same line in A
+  * 			      according to max_search_distance_a.
+  * \param map_line_number_in_b_to_a parameter to map_line_number().
+  */
+ static void fuzzy_find_matching_lines_recurse(
+ 	int start_a, int start_b,
+ 	int length_a, int length_b,
+ 	struct fingerprint *fingerprints_a,
+ 	struct fingerprint *fingerprints_b,
+ 	int *similarities,
+ 	int *certainties,
+ 	int *second_best_result,
+ 	int *result,
+ 	int max_search_distance_a,
+ 	int max_search_distance_b,
+ 	const struct line_number_mapping *map_line_number_in_b_to_a)
+ {
+ 	int i, invalidate_min, invalidate_max, offset_b,
+ 		second_half_start_a, second_half_start_b,
+ 		second_half_length_a, second_half_length_b,
+ 		most_certain_line_a, most_certain_local_line_b = -1,
+ 		most_certain_line_certainty = -1,
+ 		closest_local_line_a;
+ 
+ 	for (i = 0; i < length_b; ++i) {
+ 		find_best_line_matches(start_a,
+ 				       length_a,
+ 				       start_b,
+ 				       i,
+ 				       fingerprints_a,
+ 				       fingerprints_b,
+ 				       similarities,
+ 				       certainties,
+ 				       second_best_result,
+ 				       result,
+ 				       max_search_distance_a,
+ 				       map_line_number_in_b_to_a);
+ 
+ 		if (certainties[i] > most_certain_line_certainty) {
+ 			most_certain_line_certainty = certainties[i];
+ 			most_certain_local_line_b = i;
+ 		}
+ 	}
+ 
+ 	/* No matches. */
+ 	if (most_certain_local_line_b == -1)
+ 		return;
+ 
+ 	most_certain_line_a = result[most_certain_local_line_b];
+ 
+ 	/*
+ 	 * Subtract the most certain line's fingerprint in B from the matched
+ 	 * fingerprint in A. This means that other lines in B can't also match
+ 	 * the same parts of the line in A.
+ 	 */
+ 	fingerprint_subtract(fingerprints_a + most_certain_line_a - start_a,
+ 			     fingerprints_b + most_certain_local_line_b);
+ 
+ 	/* Invalidate results that may be affected by the choice of most
+ 	 * certain line.
+ 	 */
+ 	invalidate_min = most_certain_local_line_b - max_search_distance_b;
+ 	invalidate_max = most_certain_local_line_b + max_search_distance_b + 1;
+ 	if (invalidate_min < 0)
+ 		invalidate_min = 0;
+ 	if (invalidate_max > length_b)
+ 		invalidate_max = length_b;
+ 
+ 	/* As the fingerprint in A has changed, discard previously calculated
+ 	 * similarity values with that fingerprint.
+ 	 */
+ 	for (i = invalidate_min; i < invalidate_max; ++i) {
+ 		closest_local_line_a = map_line_number(
+ 			i + start_b, map_line_number_in_b_to_a) - start_a;
+ 
+ 		/* Check that the lines in A and B are close enough that there
+ 		 * is a similarity value for them.
+ 		 */
+ 		if (abs(most_certain_line_a - start_a - closest_local_line_a) >
+ 			max_search_distance_a) {
+ 			continue;
+ 		}
+ 
+ 		*get_similarity(similarities, most_certain_line_a - start_a,
+ 				i, closest_local_line_a,
+ 				max_search_distance_a) = -1;
+ 	}
+ 
+ 	/* More invalidating of results that may be affected by the choice of
+ 	 * most certain line.
+ 	 * Discard the matches for lines in B that are currently matched with a
+ 	 * line in A such that their ordering contradicts the ordering imposed
+ 	 * by the choice of most certain line.
+ 	 */
+ 	for (i = most_certain_local_line_b - 1; i >= invalidate_min; --i) {
+ 		/* In this loop we discard results for lines in B that are
+ 		 * before most-certain-line-B but are matched with a line in A
+ 		 * that is after most-certain-line-A.
+ 		 */
+ 		if (certainties[i] >= 0 &&
+ 		    (result[i] >= most_certain_line_a ||
+ 		     second_best_result[i] >= most_certain_line_a)) {
+ 			certainties[i] = CERTAINTY_NOT_CALCULATED;
+ 		}
+ 	}
+ 	for (i = most_certain_local_line_b + 1; i < invalidate_max; ++i) {
+ 		/* In this loop we discard results for lines in B that are
+ 		 * after most-certain-line-B but are matched with a line in A
+ 		 * that is before most-certain-line-A.
+ 		 */
+ 		if (certainties[i] >= 0 &&
+ 		    (result[i] <= most_certain_line_a ||
+ 		     second_best_result[i] <= most_certain_line_a)) {
+ 			certainties[i] = CERTAINTY_NOT_CALCULATED;
+ 		}
+ 	}
+ 
+ 	/* Repeat the matching process for lines before the most certain line.
+ 	 */
+ 	if (most_certain_local_line_b > 0) {
+ 		fuzzy_find_matching_lines_recurse(
+ 			start_a, start_b,
+ 			most_certain_line_a + 1 - start_a,
+ 			most_certain_local_line_b,
+ 			fingerprints_a, fingerprints_b, similarities,
+ 			certainties, second_best_result, result,
+ 			max_search_distance_a,
+ 			max_search_distance_b,
+ 			map_line_number_in_b_to_a);
+ 	}
+ 	/* Repeat the matching process for lines after the most certain line.
+ 	 */
+ 	if (most_certain_local_line_b + 1 < length_b) {
+ 		second_half_start_a = most_certain_line_a;
+ 		offset_b = most_certain_local_line_b + 1;
+ 		second_half_start_b = start_b + offset_b;
+ 		second_half_length_a =
+ 			length_a + start_a - second_half_start_a;
+ 		second_half_length_b =
+ 			length_b + start_b - second_half_start_b;
+ 		fuzzy_find_matching_lines_recurse(
+ 			second_half_start_a, second_half_start_b,
+ 			second_half_length_a, second_half_length_b,
+ 			fingerprints_a + second_half_start_a - start_a,
+ 			fingerprints_b + offset_b,
+ 			similarities +
+ 				offset_b * (max_search_distance_a * 2 + 1),
+ 			certainties + offset_b,
+ 			second_best_result + offset_b, result + offset_b,
+ 			max_search_distance_a,
+ 			max_search_distance_b,
+ 			map_line_number_in_b_to_a);
+ 	}
+ }
+ 
+ /* Find the lines in the parent line range that most closely match the lines in
+  * the target line range. This is accomplished by matching fingerprints in each
+  * blame_origin, and choosing the best matches that preserve the line ordering.
+  * See struct fingerprint for details of fingerprint matching, and
+  * fuzzy_find_matching_lines_recurse for details of preserving line ordering.
+  *
+  * The performance is believed to be O(n log n) in the typical case and O(n^2)
+  * in a pathological case, where n is the number of lines in the target range.
+  */
+ static int *fuzzy_find_matching_lines(struct blame_origin *parent,
+ 				      struct blame_origin *target,
+ 				      int tlno, int parent_slno, int same,
+ 				      int parent_len)
+ {
+ 	/* We use the terminology "A" for the left hand side of the diff AKA
+ 	 * parent, and "B" for the right hand side of the diff AKA target. */
+ 	int start_a = parent_slno;
+ 	int length_a = parent_len;
+ 	int start_b = tlno;
+ 	int length_b = same - tlno;
+ 
+ 	struct line_number_mapping map_line_number_in_b_to_a = {
+ 		start_a, length_a, start_b, length_b
+ 	};
+ 
+ 	struct fingerprint *fingerprints_a = parent->fingerprints;
+ 	struct fingerprint *fingerprints_b = target->fingerprints;
+ 
+ 	int i, *result, *second_best_result,
+ 		*certainties, *similarities, similarity_count;
+ 
+ 	/*
+ 	 * max_search_distance_a means that given a line in B, compare it to
+ 	 * the line in A that is closest to its position, and the lines in A
+ 	 * that are no greater than max_search_distance_a lines away from the
+ 	 * closest line in A.
+ 	 *
+ 	 * max_search_distance_b is an upper bound on the greatest possible
+ 	 * distance between lines in B such that they will both be compared
+ 	 * with the same line in A according to max_search_distance_a.
+ 	 */
+ 	int max_search_distance_a = 10, max_search_distance_b;
+ 
+ 	if (length_a <= 0)
+ 		return NULL;
+ 
+ 	if (max_search_distance_a >= length_a)
+ 		max_search_distance_a = length_a ? length_a - 1 : 0;
+ 
+ 	max_search_distance_b = ((2 * max_search_distance_a + 1) * length_b
+ 				 - 1) / length_a;
+ 
+ 	result = xcalloc(sizeof(int), length_b);
+ 	second_best_result = xcalloc(sizeof(int), length_b);
+ 	certainties = xcalloc(sizeof(int), length_b);
+ 
+ 	/* See get_similarity() for details of similarities. */
+ 	similarity_count = length_b * (max_search_distance_a * 2 + 1);
+ 	similarities = xcalloc(sizeof(int), similarity_count);
+ 
+ 	for (i = 0; i < length_b; ++i) {
+ 		result[i] = -1;
+ 		second_best_result[i] = -1;
+ 		certainties[i] = CERTAINTY_NOT_CALCULATED;
+ 	}
+ 
+ 	for (i = 0; i < similarity_count; ++i)
+ 		similarities[i] = -1;
+ 
+ 	fuzzy_find_matching_lines_recurse(start_a, start_b,
+ 					  length_a, length_b,
+ 					  fingerprints_a + start_a,
+ 					  fingerprints_b + start_b,
+ 					  similarities,
+ 					  certainties,
+ 					  second_best_result,
+ 					  result,
+ 					  max_search_distance_a,
+ 					  max_search_distance_b,
+ 					  &map_line_number_in_b_to_a);
+ 
+ 	free(similarities);
+ 	free(certainties);
+ 	free(second_best_result);
+ 
+ 	return result;
+ }
+ 
+ static void fill_origin_fingerprints(struct blame_origin *o)
+ {
+ 	int *line_starts;
+ 
+ 	if (o->fingerprints)
+ 		return;
+ 	o->num_lines = find_line_starts(&line_starts, o->file.ptr,
+ 					o->file.size);
+ 	o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
+ 	get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts,
+ 			      0, o->num_lines);
+ 	free(line_starts);
+ }
+ 
+ static void drop_origin_fingerprints(struct blame_origin *o)
+ {
+ 	if (o->fingerprints) {
+ 		free_line_fingerprints(o->fingerprints, o->num_lines);
+ 		o->num_lines = 0;
+ 		FREE_AND_NULL(o->fingerprints);
+ 	}
+ }
+ 
  /*
   * Given an origin, prepare mmfile_t structure to be used by the
   * diff machinery
   */
  static void fill_origin_blob(struct diff_options *opt,
- 			     struct blame_origin *o, mmfile_t *file, int *num_read_blob)
+ 			     struct blame_origin *o, mmfile_t *file,
+ 			     int *num_read_blob, int fill_fingerprints)
  {
  	if (!o->file.ptr) {
  		enum object_type type;
@@@ -340,11 -1034,14 +1035,14 @@@
  	}
  	else
  		*file = o->file;
+ 	if (fill_fingerprints)
+ 		fill_origin_fingerprints(o);
  }
  
  static void drop_origin_blob(struct blame_origin *o)
  {
  	FREE_AND_NULL(o->file.ptr);
+ 	drop_origin_fingerprints(o);
  }
  
  /*
@@@ -480,7 -1177,9 +1178,9 @@@ void blame_coalesce(struct blame_scoreb
  
  	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
  		if (ent->suspect == next->suspect &&
- 		    ent->s_lno + ent->num_lines == next->s_lno) {
+ 		    ent->s_lno + ent->num_lines == next->s_lno &&
+ 		    ent->ignored == next->ignored &&
+ 		    ent->unblamable == next->unblamable) {
  			ent->num_lines += next->num_lines;
  			ent->next = next->next;
  			blame_origin_decref(next->suspect);
@@@ -730,8 -1429,14 +1430,14 @@@ static void split_overlap(struct blame_
  			  struct blame_origin *parent)
  {
  	int chunk_end_lno;
+ 	int i;
  	memset(split, 0, sizeof(struct blame_entry [3]));
  
+ 	for (i = 0; i < 3; i++) {
+ 		split[i].ignored = e->ignored;
+ 		split[i].unblamable = e->unblamable;
+ 	}
+ 
  	if (e->s_lno < tlno) {
  		/* there is a pre-chunk part not blamed on parent */
  		split[0].suspect = blame_origin_incref(e->suspect);
@@@ -839,6 -1544,164 +1545,164 @@@ static struct blame_entry *reverse_blam
  	return tail;
  }
  
+ /*
+  * Splits a blame entry into two entries at 'len' lines.  The original 'e'
+  * consists of len lines, i.e. [e->lno, e->lno + len), and the second part,
+  * which is returned, consists of the remainder: [e->lno + len, e->lno +
+  * e->num_lines).  The caller needs to sort out the reference counting for the
+  * new entry's suspect.
+  */
+ static struct blame_entry *split_blame_at(struct blame_entry *e, int len,
+ 					  struct blame_origin *new_suspect)
+ {
+ 	struct blame_entry *n = xcalloc(1, sizeof(struct blame_entry));
+ 
+ 	n->suspect = new_suspect;
+ 	n->ignored = e->ignored;
+ 	n->unblamable = e->unblamable;
+ 	n->lno = e->lno + len;
+ 	n->s_lno = e->s_lno + len;
+ 	n->num_lines = e->num_lines - len;
+ 	e->num_lines = len;
+ 	e->score = 0;
+ 	return n;
+ }
+ 
+ struct blame_line_tracker {
+ 	int is_parent;
+ 	int s_lno;
+ };
+ 
+ static int are_lines_adjacent(struct blame_line_tracker *first,
+ 			      struct blame_line_tracker *second)
+ {
+ 	return first->is_parent == second->is_parent &&
+ 	       first->s_lno + 1 == second->s_lno;
+ }
+ 
+ static int scan_parent_range(struct fingerprint *p_fps,
+ 			     struct fingerprint *t_fps, int t_idx,
+ 			     int from, int nr_lines)
+ {
+ 	int sim, p_idx;
+ 	#define FINGERPRINT_FILE_THRESHOLD	10
+ 	int best_sim_val = FINGERPRINT_FILE_THRESHOLD;
+ 	int best_sim_idx = -1;
+ 
+ 	for (p_idx = from; p_idx < from + nr_lines; p_idx++) {
+ 		sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]);
+ 		if (sim < best_sim_val)
+ 			continue;
+ 		/* Break ties with the closest-to-target line number */
+ 		if (sim == best_sim_val && best_sim_idx != -1 &&
+ 		    abs(best_sim_idx - t_idx) < abs(p_idx - t_idx))
+ 			continue;
+ 		best_sim_val = sim;
+ 		best_sim_idx = p_idx;
+ 	}
+ 	return best_sim_idx;
+ }
+ 
+ /*
+  * The first pass checks the blame entry (from the target) against the parent's
+  * diff chunk.  If that fails for a line, the second pass tries to match that
+  * line to any part of parent file.  That catches cases where a change was
+  * broken into two chunks by 'context.'
+  */
+ static void guess_line_blames(struct blame_origin *parent,
+ 			      struct blame_origin *target,
+ 			      int tlno, int offset, int same, int parent_len,
+ 			      struct blame_line_tracker *line_blames)
+ {
+ 	int i, best_idx, target_idx;
+ 	int parent_slno = tlno + offset;
+ 	int *fuzzy_matches;
+ 
+ 	fuzzy_matches = fuzzy_find_matching_lines(parent, target,
+ 						  tlno, parent_slno, same,
+ 						  parent_len);
+ 	for (i = 0; i < same - tlno; i++) {
+ 		target_idx = tlno + i;
+ 		if (fuzzy_matches && fuzzy_matches[i] >= 0) {
+ 			best_idx = fuzzy_matches[i];
+ 		} else {
+ 			best_idx = scan_parent_range(parent->fingerprints,
+ 						     target->fingerprints,
+ 						     target_idx, 0,
+ 						     parent->num_lines);
+ 		}
+ 		if (best_idx >= 0) {
+ 			line_blames[i].is_parent = 1;
+ 			line_blames[i].s_lno = best_idx;
+ 		} else {
+ 			line_blames[i].is_parent = 0;
+ 			line_blames[i].s_lno = target_idx;
+ 		}
+ 	}
+ 	free(fuzzy_matches);
+ }
+ 
+ /*
+  * This decides which parts of a blame entry go to the parent (added to the
+  * ignoredp list) and which stay with the target (added to the diffp list).  The
+  * actual decision was made in a separate heuristic function, and those answers
+  * for the lines in 'e' are in line_blames.  This consumes e, essentially
+  * putting it on a list.
+  *
+  * Note that the blame entries on the ignoredp list are not necessarily sorted
+  * with respect to the parent's line numbers yet.
+  */
+ static void ignore_blame_entry(struct blame_entry *e,
+ 			       struct blame_origin *parent,
+ 			       struct blame_entry **diffp,
+ 			       struct blame_entry **ignoredp,
+ 			       struct blame_line_tracker *line_blames)
+ {
+ 	int entry_len, nr_lines, i;
+ 
+ 	/*
+ 	 * We carve new entries off the front of e.  Each entry comes from a
+ 	 * contiguous chunk of lines: adjacent lines from the same origin
+ 	 * (either the parent or the target).
+ 	 */
+ 	entry_len = 1;
+ 	nr_lines = e->num_lines;	/* e changes in the loop */
+ 	for (i = 0; i < nr_lines; i++) {
+ 		struct blame_entry *next = NULL;
+ 
+ 		/*
+ 		 * We are often adjacent to the next line - only split the blame
+ 		 * entry when we have to.
+ 		 */
+ 		if (i + 1 < nr_lines) {
+ 			if (are_lines_adjacent(&line_blames[i],
+ 					       &line_blames[i + 1])) {
+ 				entry_len++;
+ 				continue;
+ 			}
+ 			next = split_blame_at(e, entry_len,
+ 					      blame_origin_incref(e->suspect));
+ 		}
+ 		if (line_blames[i].is_parent) {
+ 			e->ignored = 1;
+ 			blame_origin_decref(e->suspect);
+ 			e->suspect = blame_origin_incref(parent);
+ 			e->s_lno = line_blames[i - entry_len + 1].s_lno;
+ 			e->next = *ignoredp;
+ 			*ignoredp = e;
+ 		} else {
+ 			e->unblamable = 1;
+ 			/* e->s_lno is already in the target's address space. */
+ 			e->next = *diffp;
+ 			*diffp = e;
+ 		}
+ 		assert(e->num_lines == entry_len);
+ 		e = next;
+ 		entry_len = 1;
+ 	}
+ 	assert(!e);
+ }
+ 
  /*
   * Process one hunk from the patch between the current suspect for
   * blame_entry e and its parent.  This first blames any unfinished
@@@ -848,13 -1711,20 +1712,20 @@@
   * -C options may lead to overlapping/duplicate source line number
   * ranges, all we can rely on from sorting/merging is the order of the
   * first suspect line number.
+  *
+  * tlno: line number in the target where this chunk begins
+  * same: line number in the target where this chunk ends
+  * offset: add to tlno to get the chunk starting point in the parent
+  * parent_len: number of lines in the parent chunk
   */
  static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
- 			int tlno, int offset, int same,
- 			struct blame_origin *parent)
+ 			int tlno, int offset, int same, int parent_len,
+ 			struct blame_origin *parent,
+ 			struct blame_origin *target, int ignore_diffs)
  {
  	struct blame_entry *e = **srcq;
- 	struct blame_entry *samep = NULL, *diffp = NULL;
+ 	struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL;
+ 	struct blame_line_tracker *line_blames = NULL;
  
  	while (e && e->s_lno < tlno) {
  		struct blame_entry *next = e->next;
@@@ -865,14 -1735,9 +1736,9 @@@
  		 */
  		if (e->s_lno + e->num_lines > tlno) {
  			/* Move second half to a new record */
- 			int len = tlno - e->s_lno;
- 			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
- 			n->suspect = e->suspect;
- 			n->lno = e->lno + len;
- 			n->s_lno = e->s_lno + len;
- 			n->num_lines = e->num_lines - len;
- 			e->num_lines = len;
- 			e->score = 0;
+ 			struct blame_entry *n;
+ 
+ 			n = split_blame_at(e, tlno - e->s_lno, e->suspect);
  			/* Push new record to diffp */
  			n->next = diffp;
  			diffp = n;
@@@ -908,6 -1773,14 +1774,14 @@@
  	 */
  	samep = NULL;
  	diffp = NULL;
+ 
+ 	if (ignore_diffs && same - tlno > 0) {
+ 		line_blames = xcalloc(sizeof(struct blame_line_tracker),
+ 				      same - tlno);
+ 		guess_line_blames(parent, target, tlno, offset, same,
+ 				  parent_len, line_blames);
+ 	}
+ 
  	while (e && e->s_lno < same) {
  		struct blame_entry *next = e->next;
  
@@@ -919,22 -1792,37 +1793,37 @@@
  			 * Move second half to a new record to be
  			 * processed by later chunks
  			 */
- 			int len = same - e->s_lno;
- 			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
- 			n->suspect = blame_origin_incref(e->suspect);
- 			n->lno = e->lno + len;
- 			n->s_lno = e->s_lno + len;
- 			n->num_lines = e->num_lines - len;
- 			e->num_lines = len;
- 			e->score = 0;
+ 			struct blame_entry *n;
+ 
+ 			n = split_blame_at(e, same - e->s_lno,
+ 					   blame_origin_incref(e->suspect));
  			/* Push new record to samep */
  			n->next = samep;
  			samep = n;
  		}
- 		e->next = diffp;
- 		diffp = e;
+ 		if (ignore_diffs) {
+ 			ignore_blame_entry(e, parent, &diffp, &ignoredp,
+ 					   line_blames + e->s_lno - tlno);
+ 		} else {
+ 			e->next = diffp;
+ 			diffp = e;
+ 		}
  		e = next;
  	}
+ 	free(line_blames);
+ 	if (ignoredp) {
+ 		/*
+ 		 * Note ignoredp is not sorted yet, and thus neither is dstq.
+ 		 * That list must be sorted before we queue_blames().  We defer
+ 		 * sorting until after all diff hunks are processed, so that
+ 		 * guess_line_blames() can pick *any* line in the parent.  The
+ 		 * slight drawback is that we end up sorting all blame entries
+ 		 * passed to the parent, including those that are unrelated to
+ 		 * changes made by the ignored commit.
+ 		 */
+ 		**dstq = reverse_blame(ignoredp, **dstq);
+ 		*dstq = &ignoredp->next;
+ 	}
  	**srcq = reverse_blame(diffp, reverse_blame(samep, e));
  	/* Move across elements that are in the unblamable portion */
  	if (diffp)
@@@ -943,7 -1831,9 +1832,9 @@@
  
  struct blame_chunk_cb_data {
  	struct blame_origin *parent;
+ 	struct blame_origin *target;
  	long offset;
+ 	int ignore_diffs;
  	struct blame_entry **dstq;
  	struct blame_entry **srcq;
  };
@@@ -956,7 -1846,8 +1847,8 @@@ static int blame_chunk_cb(long start_a
  	if (start_a - start_b != d->offset)
  		die("internal error in blame::blame_chunk_cb");
  	blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
- 		    start_b + count_b, d->parent);
+ 		    start_b + count_b, count_a, d->parent, d->target,
+ 		    d->ignore_diffs);
  	d->offset = start_a + count_a - (start_b + count_b);
  	return 0;
  }
@@@ -968,7 -1859,7 +1860,7 @@@
   */
  static void pass_blame_to_parent(struct blame_scoreboard *sb,
  				 struct blame_origin *target,
- 				 struct blame_origin *parent)
+ 				 struct blame_origin *parent, int ignore_diffs)
  {
  	mmfile_t file_p, file_o;
  	struct blame_chunk_cb_data d;
@@@ -978,11 -1869,15 +1870,15 @@@
  		return; /* nothing remains for this target */
  
  	d.parent = parent;
+ 	d.target = target;
  	d.offset = 0;
+ 	d.ignore_diffs = ignore_diffs;
  	d.dstq = &newdest; d.srcq = &target->suspects;
  
- 	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
- 	fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob);
+ 	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+ 			 &sb->num_read_blob, ignore_diffs);
+ 	fill_origin_blob(&sb->revs->diffopt, target, &file_o,
+ 			 &sb->num_read_blob, ignore_diffs);
  	sb->num_get_patch++;
  
  	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
@@@ -990,8 -1885,13 +1886,13 @@@
  		    oid_to_hex(&parent->commit->object.oid),
  		    oid_to_hex(&target->commit->object.oid));
  	/* The rest are the same as the parent */
- 	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent);
+ 	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0,
+ 		    parent, target, 0);
  	*d.dstq = NULL;
+ 	if (ignore_diffs)
+ 		newdest = llist_mergesort(newdest, get_next_blame,
+ 					  set_next_blame,
+ 					  compare_blame_suspect);
  	queue_blames(sb, parent, newdest);
  
  	return;
@@@ -1188,7 -2088,8 +2089,8 @@@ static void find_move_in_parent(struct 
  	if (!unblamed)
  		return; /* nothing remains for this target */
  
- 	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
+ 	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+ 			 &sb->num_read_blob, 0);
  	if (!file_p.ptr)
  		return;
  
@@@ -1317,7 -2218,8 +2219,8 @@@ static void find_copy_in_parent(struct 
  			norigin = get_origin(parent, p->one->path);
  			oidcpy(&norigin->blob_oid, &p->one->oid);
  			norigin->mode = p->one->mode;
- 			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob);
+ 			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p,
+ 					 &sb->num_read_blob, 0);
  			if (!file_p.ptr)
  				continue;
  
@@@ -1495,11 -2397,34 +2398,34 @@@ static void pass_blame(struct blame_sco
  			blame_origin_incref(porigin);
  			origin->previous = porigin;
  		}
- 		pass_blame_to_parent(sb, origin, porigin);
+ 		pass_blame_to_parent(sb, origin, porigin, 0);
  		if (!origin->suspects)
  			goto finish;
  	}
  
+ 	/*
+ 	 * Pass remaining suspects for ignored commits to their parents.
+ 	 */
+ 	if (oidset_contains(&sb->ignore_list, &commit->object.oid)) {
+ 		for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
+ 		     i < num_sg && sg;
+ 		     sg = sg->next, i++) {
+ 			struct blame_origin *porigin = sg_origin[i];
+ 
+ 			if (!porigin)
+ 				continue;
+ 			pass_blame_to_parent(sb, origin, porigin, 1);
+ 			/*
+ 			 * Preemptively drop porigin so we can refresh the
+ 			 * fingerprints if we use the parent again, which can
+ 			 * occur if you ignore back-to-back commits.
+ 			 */
+ 			drop_origin_blob(porigin);
+ 			if (!origin->suspects)
+ 				goto finish;
+ 		}
+ 	}
+ 
  	/*
  	 * Optionally find moves in parents' files.
  	 */
@@@ -1562,8 -2487,7 +2488,8 @@@ finish
  	}
  	for (i = 0; i < num_sg; i++) {
  		if (sg_origin[i]) {
 -			drop_origin_blob(sg_origin[i]);
 +			if (!sg_origin[i]->suspects)
 +				drop_origin_blob(sg_origin[i]);
  			blame_origin_decref(sg_origin[i]);
  		}
  	}
@@@ -1640,37 -2564,14 +2566,14 @@@ void assign_blame(struct blame_scoreboa
  	}
  }
  
- static const char *get_next_line(const char *start, const char *end)
- {
- 	const char *nl = memchr(start, '\n', end - start);
- 	return nl ? nl + 1 : end;
- }
- 
  /*
   * To allow quick access to the contents of nth line in the
   * final image, prepare an index in the scoreboard.
   */
  static int prepare_lines(struct blame_scoreboard *sb)
  {
- 	const char *buf = sb->final_buf;
- 	unsigned long len = sb->final_buf_size;
- 	const char *end = buf + len;
- 	const char *p;
- 	int *lineno;
- 	int num = 0;
- 
- 	for (p = buf; p < end; p = get_next_line(p, end))
- 		num++;
- 
- 	ALLOC_ARRAY(sb->lineno, num + 1);
- 	lineno = sb->lineno;
- 
- 	for (p = buf; p < end; p = get_next_line(p, end))
- 		*lineno++ = p - buf;
- 
- 	*lineno = len;
- 
- 	sb->num_lines = num;
+ 	sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf,
+ 					 sb->final_buf_size);
  	return sb->num_lines;
  }
  
diff --combined blame.h
index d62f80fa74,5dd877bb78..4a9e1270b0
--- a/blame.h
+++ b/blame.h
@@@ -51,8 -51,10 +51,10 @@@ struct blame_origin 
  	 */
  	struct blame_entry *suspects;
  	mmfile_t file;
+ 	int num_lines;
+ 	void *fingerprints;
  	struct object_id blob_oid;
 -	unsigned mode;
 +	unsigned short mode;
  	/* guilty gets set when shipping any suspects to the final
  	 * blame list instead of other commits
  	 */
@@@ -92,6 -94,8 +94,8 @@@ struct blame_entry 
  	 * scanning the lines over and over.
  	 */
  	unsigned score;
+ 	int ignored;
+ 	int unblamable;
  };
  
  /*
@@@ -117,6 -121,8 +121,8 @@@ struct blame_scoreboard 
  	/* linked list of blames */
  	struct blame_entry *ent;
  
+ 	struct oidset ignore_list;
+ 
  	/* look-up a line in the final buffer */
  	int num_lines;
  	int *lineno;
@@@ -177,6 -183,6 +183,6 @@@ struct blame_entry *blame_entry_prepend
  					long start, long end,
  					struct blame_origin *o);
  
 -extern struct blame_origin *get_blame_suspects(struct commit *commit);
 +struct blame_origin *get_blame_suspects(struct commit *commit);
  
  #endif /* BLAME_H */
diff --combined builtin/blame.c
index 50e3d4a265,bb321eb200..b6534d4dea
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@@ -27,7 -27,6 +27,7 @@@
  #include "object-store.h"
  #include "blame.h"
  #include "string-list.h"
 +#include "refs.h"
  
  static char blame_usage[] = N_("git blame [<options>] [<rev-opts>] [<rev>] [--] <file>");
  
@@@ -53,14 -52,17 +53,17 @@@ static int no_whole_file_rename
  static int show_progress;
  static char repeated_meta_color[COLOR_MAXLEN];
  static int coloring_mode;
+ static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP;
+ static int mark_unblamable_lines;
+ static int mark_ignored_lines;
  
  static struct date_mode blame_date_mode = { DATE_ISO8601 };
  static size_t blame_date_width;
  
  static struct string_list mailmap = STRING_LIST_INIT_NODUP;
  
 -#ifndef DEBUG
 -#define DEBUG 0
 +#ifndef DEBUG_BLAME
 +#define DEBUG_BLAME 0
  #endif
  
  static unsigned blame_move_score;
@@@ -480,6 -482,14 +483,14 @@@ static void emit_other(struct blame_sco
  			}
  		}
  
+ 		if (mark_unblamable_lines && ent->unblamable) {
+ 			length--;
+ 			putchar('*');
+ 		}
+ 		if (mark_ignored_lines && ent->ignored) {
+ 			length--;
+ 			putchar('?');
+ 		}
  		printf("%.*s", length, hex);
  		if (opt & OUTPUT_ANNOTATE_COMPAT) {
  			const char *name;
@@@ -696,6 -706,24 +707,24 @@@ static int git_blame_config(const char 
  		parse_date_format(value, &blame_date_mode);
  		return 0;
  	}
+ 	if (!strcmp(var, "blame.ignorerevsfile")) {
+ 		const char *str;
+ 		int ret;
+ 
+ 		ret = git_config_pathname(&str, var, value);
+ 		if (ret)
+ 			return ret;
+ 		string_list_insert(&ignore_revs_file_list, str);
+ 		return 0;
+ 	}
+ 	if (!strcmp(var, "blame.markunblamablelines")) {
+ 		mark_unblamable_lines = git_config_bool(var, value);
+ 		return 0;
+ 	}
+ 	if (!strcmp(var, "blame.markignoredlines")) {
+ 		mark_ignored_lines = git_config_bool(var, value);
+ 		return 0;
+ 	}
  	if (!strcmp(var, "color.blame.repeatedlines")) {
  		if (color_parse_mem(value, strlen(value), repeated_meta_color))
  			warning(_("invalid color '%s' in color.blame.repeatedLines"),
@@@ -775,6 -803,27 +804,27 @@@ static int is_a_rev(const char *name
  	return OBJ_NONE < oid_object_info(the_repository, &oid, NULL);
  }
  
+ static void build_ignorelist(struct blame_scoreboard *sb,
+ 			     struct string_list *ignore_revs_file_list,
+ 			     struct string_list *ignore_rev_list)
+ {
+ 	struct string_list_item *i;
+ 	struct object_id oid;
+ 
+ 	oidset_init(&sb->ignore_list, 0);
+ 	for_each_string_list_item(i, ignore_revs_file_list) {
+ 		if (!strcmp(i->string, ""))
+ 			oidset_clear(&sb->ignore_list);
+ 		else
+ 			oidset_parse_file(&sb->ignore_list, i->string);
+ 	}
+ 	for_each_string_list_item(i, ignore_rev_list) {
+ 		if (get_oid_committish(i->string, &oid))
+ 			die(_("cannot find revision %s to ignore"), i->string);
+ 		oidset_insert(&sb->ignore_list, &oid);
+ 	}
+ }
+ 
  int cmd_blame(int argc, const char **argv, const char *prefix)
  {
  	struct rev_info revs;
@@@ -786,6 -835,7 +836,7 @@@
  	struct progress_info pi = { NULL, 0 };
  
  	struct string_list range_list = STRING_LIST_INIT_NODUP;
+ 	struct string_list ignore_rev_list = STRING_LIST_INIT_NODUP;
  	int output_option = 0, opt = 0;
  	int show_stats = 0;
  	const char *revs_file = NULL;
@@@ -807,6 -857,8 +858,8 @@@
  		OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
  		OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
  		OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+ 		OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("Ignore <rev> when blaming")),
+ 		OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("Ignore revisions from <file>")),
  		OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
  		OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
  
@@@ -815,7 -867,7 +868,7 @@@
  		 * and are only included here to get included in the "-h"
  		 * output:
  		 */
 -		{ OPTION_LOWLEVEL_CALLBACK, 0, "indent-heuristic", NULL, NULL, N_("Use an experimental heuristic to improve diffs"), PARSE_OPT_NOARG, parse_opt_unknown_cb },
 +		{ OPTION_LOWLEVEL_CALLBACK, 0, "indent-heuristic", NULL, NULL, N_("Use an experimental heuristic to improve diffs"), PARSE_OPT_NOARG, NULL, 0, parse_opt_unknown_cb },
  
  		OPT_BIT(0, "minimal", &xdl_opts, N_("Spend extra cycles to find better match"), XDF_NEED_MINIMAL),
  		OPT_STRING('S', NULL, &revs_file, N_("file"), N_("Use revisions from <file> instead of calling git-rev-list")),
@@@ -926,10 -978,6 +979,10 @@@ parse_done
  		 */
  		blame_date_width = utf8_strwidth(_("4 years, 11 months ago")) + 1; /* add the null */
  		break;
 +	case DATE_HUMAN:
 +		/* If the year is shown, no time is shown */
 +		blame_date_width = sizeof("Thu Oct 19 16:00");
 +		break;
  	case DATE_NORMAL:
  		blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700");
  		break;
@@@ -994,24 -1042,15 +1047,27 @@@
  
  	revs.disable_stdin = 1;
  	setup_revisions(argc, argv, &revs, NULL);
 +	if (!revs.pending.nr && is_bare_repository()) {
 +		struct commit *head_commit;
 +		struct object_id head_oid;
 +
 +		if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
 +					&head_oid, NULL) ||
 +		    !(head_commit = lookup_commit_reference_gently(revs.repo,
 +							     &head_oid, 1)))
 +			die("no such ref: HEAD");
 +
 +		add_pending_object(&revs, &head_commit->object, "HEAD");
 +	}
  
  	init_scoreboard(&sb);
  	sb.revs = &revs;
  	sb.contents_from = contents_from;
  	sb.reverse = reverse;
  	sb.repo = the_repository;
+ 	build_ignorelist(&sb, &ignore_revs_file_list, &ignore_rev_list);
+ 	string_list_clear(&ignore_revs_file_list, 0);
+ 	string_list_clear(&ignore_rev_list, 0);
  	setup_scoreboard(&sb, path, &o);
  	lno = sb.num_lines;
  
@@@ -1024,8 -1063,7 +1080,8 @@@
  		long bottom, top;
  		if (parse_range_arg(range_list.items[range_i].string,
  				    nth_line_cb, &sb, lno, anchor,
 -				    &bottom, &top, sb.path, &the_index))
 +				    &bottom, &top, sb.path,
 +				    the_repository->index))
  			usage(blame_usage);
  		if ((!lno && (top || bottom)) || lno < bottom)
  			die(Q_("file %s has only %lu line",
@@@ -1062,7 -1100,7 +1118,7 @@@
  	if (blame_copy_score)
  		sb.copy_score = blame_copy_score;
  
 -	sb.debug = DEBUG;
 +	sb.debug = DEBUG_BLAME;
  	sb.on_sanity_fail = &sanity_check_on_fail;
  
  	sb.show_root = show_root;
diff --combined fsck.c
index 117c4a978f,80b53e6f49..cdb7d8db03
--- a/fsck.c
+++ b/fsck.c
@@@ -181,41 -181,6 +181,6 @@@ static int fsck_msg_type(enum fsck_msg_
  	return msg_type;
  }
  
- static void init_skiplist(struct fsck_options *options, const char *path)
- {
- 	FILE *fp;
- 	struct strbuf sb = STRBUF_INIT;
- 	struct object_id oid;
- 
- 	fp = fopen(path, "r");
- 	if (!fp)
- 		die("Could not open skip list: %s", path);
- 	while (!strbuf_getline(&sb, fp)) {
- 		const char *p;
- 		const char *hash;
- 
- 		/*
- 		 * Allow trailing comments, leading whitespace
- 		 * (including before commits), and empty or whitespace
- 		 * only lines.
- 		 */
- 		hash = strchr(sb.buf, '#');
- 		if (hash)
- 			strbuf_setlen(&sb, hash - sb.buf);
- 		strbuf_trim(&sb);
- 		if (!sb.len)
- 			continue;
- 
- 		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
- 			die("Invalid SHA-1: %s", sb.buf);
- 		oidset_insert(&options->skiplist, &oid);
- 	}
- 	if (ferror(fp))
- 		die_errno("Could not read '%s'", path);
- 	fclose(fp);
- 	strbuf_release(&sb);
- }
- 
  static int parse_msg_type(const char *str)
  {
  	if (!strcmp(str, "error"))
@@@ -284,7 -249,7 +249,7 @@@ void fsck_set_msg_types(struct fsck_opt
  		if (!strcmp(buf, "skiplist")) {
  			if (equal == len)
  				die("skiplist requires a path");
- 			init_skiplist(options, buf + equal + 1);
+ 			oidset_parse_file(&options->skiplist, buf + equal + 1);
  			buf += len + 1;
  			continue;
  		}
@@@ -410,14 -375,14 +375,14 @@@ static int fsck_walk_tree(struct tree *
  			continue;
  
  		if (S_ISDIR(entry.mode)) {
 -			obj = (struct object *)lookup_tree(the_repository, entry.oid);
 +			obj = (struct object *)lookup_tree(the_repository, &entry.oid);
  			if (name && obj)
  				put_object_name(options, obj, "%s%s/", name,
  					entry.path);
  			result = options->walk(obj, OBJ_TREE, data, options);
  		}
  		else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
 -			obj = (struct object *)lookup_blob(the_repository, entry.oid);
 +			obj = (struct object *)lookup_blob(the_repository, &entry.oid);
  			if (name && obj)
  				put_object_name(options, obj, "%s%s", name,
  					entry.path);
@@@ -604,7 -569,7 +569,7 @@@ static int fsck_tree(struct tree *item
  	o_name = NULL;
  
  	while (desc.size) {
 -		unsigned mode;
 +		unsigned short mode;
  		const char *name;
  		const struct object_id *oid;
  
@@@ -1092,7 -1057,7 +1057,7 @@@ int fsck_finish(struct fsck_options *op
  
  		blob = lookup_blob(the_repository, oid);
  		if (!blob) {
 -			struct object *obj = lookup_unknown_object(oid->hash);
 +			struct object *obj = lookup_unknown_object(oid);
  			ret |= report(options, obj,
  				      FSCK_MSG_GITMODULES_BLOB,
  				      "non-blob found at .gitmodules");
diff --combined oidset.c
index 8bdecb13de,584be63e52..f63ce818f6
--- a/oidset.c
+++ b/oidset.c
@@@ -5,33 -5,68 +5,68 @@@ void oidset_init(struct oidset *set, si
  {
  	memset(&set->set, 0, sizeof(set->set));
  	if (initial_size)
 -		kh_resize_oid(&set->set, initial_size);
 +		kh_resize_oid_set(&set->set, initial_size);
  }
  
  int oidset_contains(const struct oidset *set, const struct object_id *oid)
  {
 -	khiter_t pos = kh_get_oid(&set->set, *oid);
 +	khiter_t pos = kh_get_oid_set(&set->set, *oid);
  	return pos != kh_end(&set->set);
  }
  
  int oidset_insert(struct oidset *set, const struct object_id *oid)
  {
  	int added;
 -	kh_put_oid(&set->set, *oid, &added);
 +	kh_put_oid_set(&set->set, *oid, &added);
  	return !added;
  }
  
  int oidset_remove(struct oidset *set, const struct object_id *oid)
  {
 -	khiter_t pos = kh_get_oid(&set->set, *oid);
 +	khiter_t pos = kh_get_oid_set(&set->set, *oid);
  	if (pos == kh_end(&set->set))
  		return 0;
 -	kh_del_oid(&set->set, pos);
 +	kh_del_oid_set(&set->set, pos);
  	return 1;
  }
  
  void oidset_clear(struct oidset *set)
  {
 -	kh_release_oid(&set->set);
 +	kh_release_oid_set(&set->set);
  	oidset_init(set, 0);
  }
+ 
+ void oidset_parse_file(struct oidset *set, const char *path)
+ {
+ 	FILE *fp;
+ 	struct strbuf sb = STRBUF_INIT;
+ 	struct object_id oid;
+ 
+ 	fp = fopen(path, "r");
+ 	if (!fp)
+ 		die("could not open object name list: %s", path);
+ 	while (!strbuf_getline(&sb, fp)) {
+ 		const char *p;
+ 		const char *name;
+ 
+ 		/*
+ 		 * Allow trailing comments, leading whitespace
+ 		 * (including before commits), and empty or whitespace
+ 		 * only lines.
+ 		 */
+ 		name = strchr(sb.buf, '#');
+ 		if (name)
+ 			strbuf_setlen(&sb, name - sb.buf);
+ 		strbuf_trim(&sb);
+ 		if (!sb.len)
+ 			continue;
+ 
+ 		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
+ 			die("invalid object name: %s", sb.buf);
+ 		oidset_insert(set, &oid);
+ 	}
+ 	if (ferror(fp))
+ 		die_errno("Could not read '%s'", path);
+ 	fclose(fp);
+ 	strbuf_release(&sb);
+ }
diff --combined oidset.h
index 505fad578b,c4807749df..5346563b0b
--- a/oidset.h
+++ b/oidset.h
@@@ -16,11 -16,23 +16,11 @@@
   *      table overhead.
   */
  
 -static inline unsigned int oid_hash(struct object_id oid)
 -{
 -	return sha1hash(oid.hash);
 -}
 -
 -static inline int oid_equal(struct object_id a, struct object_id b)
 -{
 -	return oideq(&a, &b);
 -}
 -
 -KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
 -
  /**
   * A single oidset; should be zero-initialized (or use OIDSET_INIT).
   */
  struct oidset {
 -	kh_oid_t set;
 +	kh_oid_set_t set;
  };
  
  #define OIDSET_INIT { { 0 } }
@@@ -61,8 -73,16 +61,16 @@@ int oidset_remove(struct oidset *set, c
   */
  void oidset_clear(struct oidset *set);
  
+ /**
+  * Add the contents of the file 'path' to an initialized oidset.  Each line is
+  * an unabbreviated object name.  Comments begin with '#', and trailing comments
+  * are allowed.  Leading whitespace and empty or white-space only lines are
+  * ignored.
+  */
+ void oidset_parse_file(struct oidset *set, const char *path);
+ 
  struct oidset_iter {
 -	kh_oid_t *set;
 +	kh_oid_set_t *set;
  	khiter_t iter;
  };