Merge branch 'br/blame-ignore'

author Junio C Hamano <gitster@pobox.com>
Fri, 19 Jul 2019 18:30:20 +0000 (11:30 -0700)

committer Junio C Hamano <gitster@pobox.com>
Fri, 19 Jul 2019 18:30:20 +0000 (11:30 -0700)
author: Junio C Hamano <gitster@pobox.com>
Fri, 19 Jul 2019 18:30:20 +0000 (11:30 -0700)
committer: Junio C Hamano <gitster@pobox.com>
Fri, 19 Jul 2019 18:30:20 +0000 (11:30 -0700)
diff --combined blame.c

index 145eaf2faf9cf56977da61572c93783ea702b0f9,acb1bf7f7a7e221977b01582c847d2206e62f224..7f04580ad57a67be099fb1c0d85340cbcdb1f70c
--- 1/blame.c
--- 2/blame.c
+++ b/blame.c
@@@ -99,7 -99,7 +99,7 @@@ static void verify_working_tree_path(st
         for (parents = work_tree->parents; parents; parents = parents->next) {
                 const struct object_id *commit_oid = &parents->item->object.oid;
                 struct object_id blob_oid;
- -              unsigned mode;
+ +              unsigned short mode;
   
                 if (!get_tree_entry(commit_oid, path, &blob_oid, &mode) &&
                     oid_object_info(r, &blob_oid, NULL) == OBJ_BLOB)
@@@ -188,7 -188,7 +188,7 @@@ static struct commit *fake_working_tree
         unsigned mode;
         struct strbuf msg = STRBUF_INIT;
   
- -      read_index(r->index);
+ +      repo_read_index(r);
         time(&now);
         commit = alloc_commit_node(r);
         commit->object.parsed = 1;
@@@ -204,8 -204,7 +204,8 @@@
   
         origin = make_origin(commit, path);
   
- -      ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
+ +      ident = fmt_ident("Not Committed Yet", "not.committed.yet",
+ +                      WANT_BLANK_IDENT, NULL, 0);
         strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n");
         for (parent = commit->parents; parent; parent = parent->next)
                 strbuf_addf(&msg, "parent %s\n",
@@@ -271,7 -270,7 +271,7 @@@
          * want to run "diff-index --cached".
          */
         discard_index(r->index);
- -      read_index(r->index);
+ +      repo_read_index(r);
   
         len = strlen(path);
         if (!mode) {
@@@ -311,12 -310,707 +311,707 @@@ static int diff_hunks(mmfile_t *file_a
         return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
   }
   
+ static const char *get_next_line(const char *start, const char *end)
+ {
+       const char *nl = memchr(start, '\n', end - start);
+ 
+       return nl ? nl + 1 : end;
+ }
+ 
+ static int find_line_starts(int **line_starts, const char *buf,
+                           unsigned long len)
+ {
+       const char *end = buf + len;
+       const char *p;
+       int *lineno;
+       int num = 0;
+ 
+       for (p = buf; p < end; p = get_next_line(p, end))
+               num++;
+ 
+       ALLOC_ARRAY(*line_starts, num + 1);
+       lineno = *line_starts;
+ 
+       for (p = buf; p < end; p = get_next_line(p, end))
+               *lineno++ = p - buf;
+ 
+       *lineno = len;
+ 
+       return num;
+ }
+ 
+ struct fingerprint_entry;
+ 
+ /* A fingerprint is intended to loosely represent a string, such that two
+  * fingerprints can be quickly compared to give an indication of the similarity
+  * of the strings that they represent.
+  *
+  * A fingerprint is represented as a multiset of the lower-cased byte pairs in
+  * the string that it represents. Whitespace is added at each end of the
+  * string. Whitespace pairs are ignored. Whitespace is converted to '\0'.
+  * For example, the string "Darth   Radar" will be converted to the following
+  * fingerprint:
+  * {"\0d", "da", "da", "ar", "ar", "rt", "th", "h\0", "\0r", "ra", "ad", "r\0"}
+  *
+  * The similarity between two fingerprints is the size of the intersection of
+  * their multisets, including repeated elements. See fingerprint_similarity for
+  * examples.
+  *
+  * For ease of implementation, the fingerprint is implemented as a map
+  * of byte pairs to the count of that byte pair in the string, instead of
+  * allowing repeated elements in a set.
+  */
+ struct fingerprint {
+       struct hashmap map;
+       /* As we know the maximum number of entries in advance, it's
+        * convenient to store the entries in a single array instead of having
+        * the hashmap manage the memory.
+        */
+       struct fingerprint_entry *entries;
+ };
+ 
+ /* A byte pair in a fingerprint. Stores the number of times the byte pair
+  * occurs in the string that the fingerprint represents.
+  */
+ struct fingerprint_entry {
+       /* The hashmap entry - the hash represents the byte pair in its
+        * entirety so we don't need to store the byte pair separately.
+        */
+       struct hashmap_entry entry;
+       /* The number of times the byte pair occurs in the string that the
+        * fingerprint represents.
+        */
+       int count;
+ };
+ 
+ /* See `struct fingerprint` for an explanation of what a fingerprint is.
+  * \param result the fingerprint of the string is stored here. This must be
+  *             freed later using free_fingerprint.
+  * \param line_begin the start of the string
+  * \param line_end the end of the string
+  */
+ static void get_fingerprint(struct fingerprint *result,
+                           const char *line_begin,
+                           const char *line_end)
+ {
+       unsigned int hash, c0 = 0, c1;
+       const char *p;
+       int max_map_entry_count = 1 + line_end - line_begin;
+       struct fingerprint_entry *entry = xcalloc(max_map_entry_count,
+               sizeof(struct fingerprint_entry));
+       struct fingerprint_entry *found_entry;
+ 
+       hashmap_init(&result->map, NULL, NULL, max_map_entry_count);
+       result->entries = entry;
+       for (p = line_begin; p <= line_end; ++p, c0 = c1) {
+               /* Always terminate the string with whitespace.
+                * Normalise whitespace to 0, and normalise letters to
+                * lower case. This won't work for multibyte characters but at
+                * worst will match some unrelated characters.
+                */
+               if ((p == line_end) || isspace(*p))
+                       c1 = 0;
+               else
+                       c1 = tolower(*p);
+               hash = c0 | (c1 << 8);
+               /* Ignore whitespace pairs */
+               if (hash == 0)
+                       continue;
+               hashmap_entry_init(entry, hash);
+ 
+               found_entry = hashmap_get(&result->map, entry, NULL);
+               if (found_entry) {
+                       found_entry->count += 1;
+               } else {
+                       entry->count = 1;
+                       hashmap_add(&result->map, entry);
+                       ++entry;
+               }
+       }
+ }
+ 
+ static void free_fingerprint(struct fingerprint *f)
+ {
+       hashmap_free(&f->map, 0);
+       free(f->entries);
+ }
+ 
+ /* Calculates the similarity between two fingerprints as the size of the
+  * intersection of their multisets, including repeated elements. See
+  * `struct fingerprint` for an explanation of the fingerprint representation.
+  * The similarity between "cat mat" and "father rather" is 2 because "at" is
+  * present twice in both strings while the similarity between "tim" and "mit"
+  * is 0.
+  */
+ static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b)
+ {
+       int intersection = 0;
+       struct hashmap_iter iter;
+       const struct fingerprint_entry *entry_a, *entry_b;
+ 
+       hashmap_iter_init(&b->map, &iter);
+ 
+       while ((entry_b = hashmap_iter_next(&iter))) {
+               if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+                       intersection += entry_a->count < entry_b->count ?
+                                       entry_a->count : entry_b->count;
+               }
+       }
+       return intersection;
+ }
+ 
+ /* Subtracts byte-pair elements in B from A, modifying A in place.
+  */
+ static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b)
+ {
+       struct hashmap_iter iter;
+       struct fingerprint_entry *entry_a;
+       const struct fingerprint_entry *entry_b;
+ 
+       hashmap_iter_init(&b->map, &iter);
+ 
+       while ((entry_b = hashmap_iter_next(&iter))) {
+               if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+                       if (entry_a->count <= entry_b->count)
+                               hashmap_remove(&a->map, entry_b, NULL);
+                       else
+                               entry_a->count -= entry_b->count;
+               }
+       }
+ }
+ 
+ /* Calculate fingerprints for a series of lines.
+  * Puts the fingerprints in the fingerprints array, which must have been
+  * preallocated to allow storing line_count elements.
+  */
+ static void get_line_fingerprints(struct fingerprint *fingerprints,
+                                 const char *content, const int *line_starts,
+                                 long first_line, long line_count)
+ {
+       int i;
+       const char *linestart, *lineend;
+ 
+       line_starts += first_line;
+       for (i = 0; i < line_count; ++i) {
+               linestart = content + line_starts[i];
+               lineend = content + line_starts[i + 1];
+               get_fingerprint(fingerprints + i, linestart, lineend);
+       }
+ }
+ 
+ static void free_line_fingerprints(struct fingerprint *fingerprints,
+                                  int nr_fingerprints)
+ {
+       int i;
+ 
+       for (i = 0; i < nr_fingerprints; i++)
+               free_fingerprint(&fingerprints[i]);
+ }
+ 
+ /* This contains the data necessary to linearly map a line number in one half
+  * of a diff chunk to the line in the other half of the diff chunk that is
+  * closest in terms of its position as a fraction of the length of the chunk.
+  */
+ struct line_number_mapping {
+       int destination_start, destination_length,
+               source_start, source_length;
+ };
+ 
+ /* Given a line number in one range, offset and scale it to map it onto the
+  * other range.
+  * Essentially this mapping is a simple linear equation but the calculation is
+  * more complicated to allow performing it with integer operations.
+  * Another complication is that if a line could map onto many lines in the
+  * destination range then we want to choose the line at the center of those
+  * possibilities.
+  * Example: if the chunk is 2 lines long in A and 10 lines long in B then the
+  * first 5 lines in B will map onto the first line in the A chunk, while the
+  * last 5 lines will all map onto the second line in the A chunk.
+  * Example: if the chunk is 10 lines long in A and 2 lines long in B then line
+  * 0 in B will map onto line 2 in A, and line 1 in B will map onto line 7 in A.
+  */
+ static int map_line_number(int line_number,
+       const struct line_number_mapping *mapping)
+ {
+       return ((line_number - mapping->source_start) * 2 + 1) *
+              mapping->destination_length /
+              (mapping->source_length * 2) +
+              mapping->destination_start;
+ }
+ 
+ /* Get a pointer to the element storing the similarity between a line in A
+  * and a line in B.
+  *
+  * The similarities are stored in a 2-dimensional array. Each "row" in the
+  * array contains the similarities for a line in B. The similarities stored in
+  * a row are the similarities between the line in B and the nearby lines in A.
+  * To keep the length of each row the same, it is padded out with values of -1
+  * where the search range extends beyond the lines in A.
+  * For example, if max_search_distance_a is 2 and the two sides of a diff chunk
+  * look like this:
+  * a | m
+  * b | n
+  * c | o
+  * d | p
+  * e | q
+  * Then the similarity array will contain:
+  * [-1, -1, am, bm, cm,
+  *  -1, an, bn, cn, dn,
+  *  ao, bo, co, do, eo,
+  *  bp, cp, dp, ep, -1,
+  *  cq, dq, eq, -1, -1]
+  * Where similarities are denoted either by -1 for invalid, or the
+  * concatenation of the two lines in the diff being compared.
+  *
+  * \param similarities array of similarities between lines in A and B
+  * \param line_a the index of the line in A, in the same frame of reference as
+  *    closest_line_a.
+  * \param local_line_b the index of the line in B, relative to the first line
+  *                   in B that similarities represents.
+  * \param closest_line_a the index of the line in A that is deemed to be
+  *                     closest to local_line_b. This must be in the same
+  *                     frame of reference as line_a. This value defines
+  *                     where similarities is centered for the line in B.
+  * \param max_search_distance_a maximum distance in lines from the closest line
+  *                            in A for other lines in A for which
+  *                            similarities may be calculated.
+  */
+ static int *get_similarity(int *similarities,
+                          int line_a, int local_line_b,
+                          int closest_line_a, int max_search_distance_a)
+ {
+       assert(abs(line_a - closest_line_a) <=
+              max_search_distance_a);
+       return similarities + line_a - closest_line_a +
+              max_search_distance_a +
+              local_line_b * (max_search_distance_a * 2 + 1);
+ }
+ 
+ #define CERTAIN_NOTHING_MATCHES -2
+ #define CERTAINTY_NOT_CALCULATED -1
+ 
+ /* Given a line in B, first calculate its similarities with nearby lines in A
+  * if not already calculated, then identify the most similar and second most
+  * similar lines. The "certainty" is calculated based on those two
+  * similarities.
+  *
+  * \param start_a the index of the first line of the chunk in A
+  * \param length_a the length in lines of the chunk in A
+  * \param local_line_b the index of the line in B, relative to the first line
+  *                   in the chunk.
+  * \param fingerprints_a array of fingerprints for the chunk in A
+  * \param fingerprints_b array of fingerprints for the chunk in B
+  * \param similarities 2-dimensional array of similarities between lines in A
+  *                   and B. See get_similarity() for more details.
+  * \param certainties array of values indicating how strongly a line in B is
+  *                  matched with some line in A.
+  * \param second_best_result array of absolute indices in A for the second
+  *                         closest match of a line in B.
+  * \param result array of absolute indices in A for the closest match of a line
+  *             in B.
+  * \param max_search_distance_a maximum distance in lines from the closest line
+  *                            in A for other lines in A for which
+  *                            similarities may be calculated.
+  * \param map_line_number_in_b_to_a parameter to map_line_number().
+  */
+ static void find_best_line_matches(
+       int start_a,
+       int length_a,
+       int start_b,
+       int local_line_b,
+       struct fingerprint *fingerprints_a,
+       struct fingerprint *fingerprints_b,
+       int *similarities,
+       int *certainties,
+       int *second_best_result,
+       int *result,
+       const int max_search_distance_a,
+       const struct line_number_mapping *map_line_number_in_b_to_a)
+ {
+ 
+       int i, search_start, search_end, closest_local_line_a, *similarity,
+               best_similarity = 0, second_best_similarity = 0,
+               best_similarity_index = 0, second_best_similarity_index = 0;
+ 
+       /* certainty has already been calculated so no need to redo the work */
+       if (certainties[local_line_b] != CERTAINTY_NOT_CALCULATED)
+               return;
+ 
+       closest_local_line_a = map_line_number(
+               local_line_b + start_b, map_line_number_in_b_to_a) - start_a;
+ 
+       search_start = closest_local_line_a - max_search_distance_a;
+       if (search_start < 0)
+               search_start = 0;
+ 
+       search_end = closest_local_line_a + max_search_distance_a + 1;
+       if (search_end > length_a)
+               search_end = length_a;
+ 
+       for (i = search_start; i < search_end; ++i) {
+               similarity = get_similarity(similarities,
+                                           i, local_line_b,
+                                           closest_local_line_a,
+                                           max_search_distance_a);
+               if (*similarity == -1) {
+                       /* This value will never exceed 10 but assert just in
+                        * case
+                        */
+                       assert(abs(i - closest_local_line_a) < 1000);
+                       /* scale the similarity by (1000 - distance from
+                        * closest line) to act as a tie break between lines
+                        * that otherwise are equally similar.
+                        */
+                       *similarity = fingerprint_similarity(
+                               fingerprints_b + local_line_b,
+                               fingerprints_a + i) *
+                               (1000 - abs(i - closest_local_line_a));
+               }
+               if (*similarity > best_similarity) {
+                       second_best_similarity = best_similarity;
+                       second_best_similarity_index = best_similarity_index;
+                       best_similarity = *similarity;
+                       best_similarity_index = i;
+               } else if (*similarity > second_best_similarity) {
+                       second_best_similarity = *similarity;
+                       second_best_similarity_index = i;
+               }
+       }
+ 
+       if (best_similarity == 0) {
+               /* this line definitely doesn't match with anything. Mark it
+                * with this special value so it doesn't get invalidated and
+                * won't be recalculated.
+                */
+               certainties[local_line_b] = CERTAIN_NOTHING_MATCHES;
+               result[local_line_b] = -1;
+       } else {
+               /* Calculate the certainty with which this line matches.
+                * If the line matches well with two lines then that reduces
+                * the certainty. However we still want to prioritise matching
+                * a line that matches very well with two lines over matching a
+                * line that matches poorly with one line, hence doubling
+                * best_similarity.
+                * This means that if we have
+                * line X that matches only one line with a score of 3,
+                * line Y that matches two lines equally with a score of 5,
+                * and line Z that matches only one line with a score or 2,
+                * then the lines in order of certainty are X, Y, Z.
+                */
+               certainties[local_line_b] = best_similarity * 2 -
+                       second_best_similarity;
+ 
+               /* We keep both the best and second best results to allow us to
+                * check at a later stage of the matching process whether the
+                * result needs to be invalidated.
+                */
+               result[local_line_b] = start_a + best_similarity_index;
+               second_best_result[local_line_b] =
+                       start_a + second_best_similarity_index;
+       }
+ }
+ 
+ /*
+  * This finds the line that we can match with the most confidence, and
+  * uses it as a partition. It then calls itself on the lines on either side of
+  * that partition. In this way we avoid lines appearing out of order, and
+  * retain a sensible line ordering.
+  * \param start_a index of the first line in A with which lines in B may be
+  *              compared.
+  * \param start_b index of the first line in B for which matching should be
+  *              done.
+  * \param length_a number of lines in A with which lines in B may be compared.
+  * \param length_b number of lines in B for which matching should be done.
+  * \param fingerprints_a mutable array of fingerprints in A. The first element
+  *                     corresponds to the line at start_a.
+  * \param fingerprints_b array of fingerprints in B. The first element
+  *                     corresponds to the line at start_b.
+  * \param similarities 2-dimensional array of similarities between lines in A
+  *                   and B. See get_similarity() for more details.
+  * \param certainties array of values indicating how strongly a line in B is
+  *                  matched with some line in A.
+  * \param second_best_result array of absolute indices in A for the second
+  *                         closest match of a line in B.
+  * \param result array of absolute indices in A for the closest match of a line
+  *             in B.
+  * \param max_search_distance_a maximum distance in lines from the closest line
+  *                          in A for other lines in A for which
+  *                          similarities may be calculated.
+  * \param max_search_distance_b an upper bound on the greatest possible
+  *                          distance between lines in B such that they will
+  *                              both be compared with the same line in A
+  *                          according to max_search_distance_a.
+  * \param map_line_number_in_b_to_a parameter to map_line_number().
+  */
+ static void fuzzy_find_matching_lines_recurse(
+       int start_a, int start_b,
+       int length_a, int length_b,
+       struct fingerprint *fingerprints_a,
+       struct fingerprint *fingerprints_b,
+       int *similarities,
+       int *certainties,
+       int *second_best_result,
+       int *result,
+       int max_search_distance_a,
+       int max_search_distance_b,
+       const struct line_number_mapping *map_line_number_in_b_to_a)
+ {
+       int i, invalidate_min, invalidate_max, offset_b,
+               second_half_start_a, second_half_start_b,
+               second_half_length_a, second_half_length_b,
+               most_certain_line_a, most_certain_local_line_b = -1,
+               most_certain_line_certainty = -1,
+               closest_local_line_a;
+ 
+       for (i = 0; i < length_b; ++i) {
+               find_best_line_matches(start_a,
+                                      length_a,
+                                      start_b,
+                                      i,
+                                      fingerprints_a,
+                                      fingerprints_b,
+                                      similarities,
+                                      certainties,
+                                      second_best_result,
+                                      result,
+                                      max_search_distance_a,
+                                      map_line_number_in_b_to_a);
+ 
+               if (certainties[i] > most_certain_line_certainty) {
+                       most_certain_line_certainty = certainties[i];
+                       most_certain_local_line_b = i;
+               }
+       }
+ 
+       /* No matches. */
+       if (most_certain_local_line_b == -1)
+               return;
+ 
+       most_certain_line_a = result[most_certain_local_line_b];
+ 
+       /*
+        * Subtract the most certain line's fingerprint in B from the matched
+        * fingerprint in A. This means that other lines in B can't also match
+        * the same parts of the line in A.
+        */
+       fingerprint_subtract(fingerprints_a + most_certain_line_a - start_a,
+                            fingerprints_b + most_certain_local_line_b);
+ 
+       /* Invalidate results that may be affected by the choice of most
+        * certain line.
+        */
+       invalidate_min = most_certain_local_line_b - max_search_distance_b;
+       invalidate_max = most_certain_local_line_b + max_search_distance_b + 1;
+       if (invalidate_min < 0)
+               invalidate_min = 0;
+       if (invalidate_max > length_b)
+               invalidate_max = length_b;
+ 
+       /* As the fingerprint in A has changed, discard previously calculated
+        * similarity values with that fingerprint.
+        */
+       for (i = invalidate_min; i < invalidate_max; ++i) {
+               closest_local_line_a = map_line_number(
+                       i + start_b, map_line_number_in_b_to_a) - start_a;
+ 
+               /* Check that the lines in A and B are close enough that there
+                * is a similarity value for them.
+                */
+               if (abs(most_certain_line_a - start_a - closest_local_line_a) >
+                       max_search_distance_a) {
+                       continue;
+               }
+ 
+               *get_similarity(similarities, most_certain_line_a - start_a,
+                               i, closest_local_line_a,
+                               max_search_distance_a) = -1;
+       }
+ 
+       /* More invalidating of results that may be affected by the choice of
+        * most certain line.
+        * Discard the matches for lines in B that are currently matched with a
+        * line in A such that their ordering contradicts the ordering imposed
+        * by the choice of most certain line.
+        */
+       for (i = most_certain_local_line_b - 1; i >= invalidate_min; --i) {
+               /* In this loop we discard results for lines in B that are
+                * before most-certain-line-B but are matched with a line in A
+                * that is after most-certain-line-A.
+                */
+               if (certainties[i] >= 0 &&
+                   (result[i] >= most_certain_line_a ||
+                    second_best_result[i] >= most_certain_line_a)) {
+                       certainties[i] = CERTAINTY_NOT_CALCULATED;
+               }
+       }
+       for (i = most_certain_local_line_b + 1; i < invalidate_max; ++i) {
+               /* In this loop we discard results for lines in B that are
+                * after most-certain-line-B but are matched with a line in A
+                * that is before most-certain-line-A.
+                */
+               if (certainties[i] >= 0 &&
+                   (result[i] <= most_certain_line_a ||
+                    second_best_result[i] <= most_certain_line_a)) {
+                       certainties[i] = CERTAINTY_NOT_CALCULATED;
+               }
+       }
+ 
+       /* Repeat the matching process for lines before the most certain line.
+        */
+       if (most_certain_local_line_b > 0) {
+               fuzzy_find_matching_lines_recurse(
+                       start_a, start_b,
+                       most_certain_line_a + 1 - start_a,
+                       most_certain_local_line_b,
+                       fingerprints_a, fingerprints_b, similarities,
+                       certainties, second_best_result, result,
+                       max_search_distance_a,
+                       max_search_distance_b,
+                       map_line_number_in_b_to_a);
+       }
+       /* Repeat the matching process for lines after the most certain line.
+        */
+       if (most_certain_local_line_b + 1 < length_b) {
+               second_half_start_a = most_certain_line_a;
+               offset_b = most_certain_local_line_b + 1;
+               second_half_start_b = start_b + offset_b;
+               second_half_length_a =
+                       length_a + start_a - second_half_start_a;
+               second_half_length_b =
+                       length_b + start_b - second_half_start_b;
+               fuzzy_find_matching_lines_recurse(
+                       second_half_start_a, second_half_start_b,
+                       second_half_length_a, second_half_length_b,
+                       fingerprints_a + second_half_start_a - start_a,
+                       fingerprints_b + offset_b,
+                       similarities +
+                               offset_b * (max_search_distance_a * 2 + 1),
+                       certainties + offset_b,
+                       second_best_result + offset_b, result + offset_b,
+                       max_search_distance_a,
+                       max_search_distance_b,
+                       map_line_number_in_b_to_a);
+       }
+ }
+ 
+ /* Find the lines in the parent line range that most closely match the lines in
+  * the target line range. This is accomplished by matching fingerprints in each
+  * blame_origin, and choosing the best matches that preserve the line ordering.
+  * See struct fingerprint for details of fingerprint matching, and
+  * fuzzy_find_matching_lines_recurse for details of preserving line ordering.
+  *
+  * The performance is believed to be O(n log n) in the typical case and O(n^2)
+  * in a pathological case, where n is the number of lines in the target range.
+  */
+ static int *fuzzy_find_matching_lines(struct blame_origin *parent,
+                                     struct blame_origin *target,
+                                     int tlno, int parent_slno, int same,
+                                     int parent_len)
+ {
+       /* We use the terminology "A" for the left hand side of the diff AKA
+        * parent, and "B" for the right hand side of the diff AKA target. */
+       int start_a = parent_slno;
+       int length_a = parent_len;
+       int start_b = tlno;
+       int length_b = same - tlno;
+ 
+       struct line_number_mapping map_line_number_in_b_to_a = {
+               start_a, length_a, start_b, length_b
+       };
+ 
+       struct fingerprint *fingerprints_a = parent->fingerprints;
+       struct fingerprint *fingerprints_b = target->fingerprints;
+ 
+       int i, *result, *second_best_result,
+               *certainties, *similarities, similarity_count;
+ 
+       /*
+        * max_search_distance_a means that given a line in B, compare it to
+        * the line in A that is closest to its position, and the lines in A
+        * that are no greater than max_search_distance_a lines away from the
+        * closest line in A.
+        *
+        * max_search_distance_b is an upper bound on the greatest possible
+        * distance between lines in B such that they will both be compared
+        * with the same line in A according to max_search_distance_a.
+        */
+       int max_search_distance_a = 10, max_search_distance_b;
+ 
+       if (length_a <= 0)
+               return NULL;
+ 
+       if (max_search_distance_a >= length_a)
+               max_search_distance_a = length_a ? length_a - 1 : 0;
+ 
+       max_search_distance_b = ((2 * max_search_distance_a + 1) * length_b
+                                - 1) / length_a;
+ 
+       result = xcalloc(sizeof(int), length_b);
+       second_best_result = xcalloc(sizeof(int), length_b);
+       certainties = xcalloc(sizeof(int), length_b);
+ 
+       /* See get_similarity() for details of similarities. */
+       similarity_count = length_b * (max_search_distance_a * 2 + 1);
+       similarities = xcalloc(sizeof(int), similarity_count);
+ 
+       for (i = 0; i < length_b; ++i) {
+               result[i] = -1;
+               second_best_result[i] = -1;
+               certainties[i] = CERTAINTY_NOT_CALCULATED;
+       }
+ 
+       for (i = 0; i < similarity_count; ++i)
+               similarities[i] = -1;
+ 
+       fuzzy_find_matching_lines_recurse(start_a, start_b,
+                                         length_a, length_b,
+                                         fingerprints_a + start_a,
+                                         fingerprints_b + start_b,
+                                         similarities,
+                                         certainties,
+                                         second_best_result,
+                                         result,
+                                         max_search_distance_a,
+                                         max_search_distance_b,
+                                         &map_line_number_in_b_to_a);
+ 
+       free(similarities);
+       free(certainties);
+       free(second_best_result);
+ 
+       return result;
+ }
+ 
+ static void fill_origin_fingerprints(struct blame_origin *o)
+ {
+       int *line_starts;
+ 
+       if (o->fingerprints)
+               return;
+       o->num_lines = find_line_starts(&line_starts, o->file.ptr,
+                                       o->file.size);
+       o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
+       get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts,
+                             0, o->num_lines);
+       free(line_starts);
+ }
+ 
+ static void drop_origin_fingerprints(struct blame_origin *o)
+ {
+       if (o->fingerprints) {
+               free_line_fingerprints(o->fingerprints, o->num_lines);
+               o->num_lines = 0;
+               FREE_AND_NULL(o->fingerprints);
+       }
+ }
+ 
   /*
    * Given an origin, prepare mmfile_t structure to be used by the
    * diff machinery
    */
   static void fill_origin_blob(struct diff_options *opt,
-                            struct blame_origin *o, mmfile_t *file, int *num_read_blob)
+                            struct blame_origin *o, mmfile_t *file,
+                            int *num_read_blob, int fill_fingerprints)
   {
         if (!o->file.ptr) {
                 enum object_type type;
@@@ -340,11 -1034,14 +1035,14 @@@
         }
         else
                 *file = o->file;
+       if (fill_fingerprints)
+               fill_origin_fingerprints(o);
   }
   
   static void drop_origin_blob(struct blame_origin *o)
   {
         FREE_AND_NULL(o->file.ptr);
+       drop_origin_fingerprints(o);
   }
   
   /*
@@@ -480,7 -1177,9 +1178,9 @@@ void blame_coalesce(struct blame_scoreb
   
         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
                 if (ent->suspect == next->suspect &&
-                   ent->s_lno + ent->num_lines == next->s_lno) {
+                   ent->s_lno + ent->num_lines == next->s_lno &&
+                   ent->ignored == next->ignored &&
+                   ent->unblamable == next->unblamable) {
                         ent->num_lines += next->num_lines;
                         ent->next = next->next;
                         blame_origin_decref(next->suspect);
@@@ -730,8 -1429,14 +1430,14 @@@ static void split_overlap(struct blame_
                           struct blame_origin *parent)
   {
         int chunk_end_lno;
+       int i;
         memset(split, 0, sizeof(struct blame_entry [3]));
   
+       for (i = 0; i < 3; i++) {
+               split[i].ignored = e->ignored;
+               split[i].unblamable = e->unblamable;
+       }
+ 
         if (e->s_lno < tlno) {
                 /* there is a pre-chunk part not blamed on parent */
                 split[0].suspect = blame_origin_incref(e->suspect);
@@@ -839,6 -1544,164 +1545,164 @@@ static struct blame_entry *reverse_blam
         return tail;
   }
   
+ /*
+  * Splits a blame entry into two entries at 'len' lines.  The original 'e'
+  * consists of len lines, i.e. [e->lno, e->lno + len), and the second part,
+  * which is returned, consists of the remainder: [e->lno + len, e->lno +
+  * e->num_lines).  The caller needs to sort out the reference counting for the
+  * new entry's suspect.
+  */
+ static struct blame_entry *split_blame_at(struct blame_entry *e, int len,
+                                         struct blame_origin *new_suspect)
+ {
+       struct blame_entry *n = xcalloc(1, sizeof(struct blame_entry));
+ 
+       n->suspect = new_suspect;
+       n->ignored = e->ignored;
+       n->unblamable = e->unblamable;
+       n->lno = e->lno + len;
+       n->s_lno = e->s_lno + len;
+       n->num_lines = e->num_lines - len;
+       e->num_lines = len;
+       e->score = 0;
+       return n;
+ }
+ 
+ struct blame_line_tracker {
+       int is_parent;
+       int s_lno;
+ };
+ 
+ static int are_lines_adjacent(struct blame_line_tracker *first,
+                             struct blame_line_tracker *second)
+ {
+       return first->is_parent == second->is_parent &&
+              first->s_lno + 1 == second->s_lno;
+ }
+ 
+ static int scan_parent_range(struct fingerprint *p_fps,
+                            struct fingerprint *t_fps, int t_idx,
+                            int from, int nr_lines)
+ {
+       int sim, p_idx;
+       #define FINGERPRINT_FILE_THRESHOLD      10
+       int best_sim_val = FINGERPRINT_FILE_THRESHOLD;
+       int best_sim_idx = -1;
+ 
+       for (p_idx = from; p_idx < from + nr_lines; p_idx++) {
+               sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]);
+               if (sim < best_sim_val)
+                       continue;
+               /* Break ties with the closest-to-target line number */
+               if (sim == best_sim_val && best_sim_idx != -1 &&
+                   abs(best_sim_idx - t_idx) < abs(p_idx - t_idx))
+                       continue;
+               best_sim_val = sim;
+               best_sim_idx = p_idx;
+       }
+       return best_sim_idx;
+ }
+ 
+ /*
+  * The first pass checks the blame entry (from the target) against the parent's
+  * diff chunk.  If that fails for a line, the second pass tries to match that
+  * line to any part of parent file.  That catches cases where a change was
+  * broken into two chunks by 'context.'
+  */
+ static void guess_line_blames(struct blame_origin *parent,
+                             struct blame_origin *target,
+                             int tlno, int offset, int same, int parent_len,
+                             struct blame_line_tracker *line_blames)
+ {
+       int i, best_idx, target_idx;
+       int parent_slno = tlno + offset;
+       int *fuzzy_matches;
+ 
+       fuzzy_matches = fuzzy_find_matching_lines(parent, target,
+                                                 tlno, parent_slno, same,
+                                                 parent_len);
+       for (i = 0; i < same - tlno; i++) {
+               target_idx = tlno + i;
+               if (fuzzy_matches && fuzzy_matches[i] >= 0) {
+                       best_idx = fuzzy_matches[i];
+               } else {
+                       best_idx = scan_parent_range(parent->fingerprints,
+                                                    target->fingerprints,
+                                                    target_idx, 0,
+                                                    parent->num_lines);
+               }
+               if (best_idx >= 0) {
+                       line_blames[i].is_parent = 1;
+                       line_blames[i].s_lno = best_idx;
+               } else {
+                       line_blames[i].is_parent = 0;
+                       line_blames[i].s_lno = target_idx;
+               }
+       }
+       free(fuzzy_matches);
+ }
+ 
+ /*
+  * This decides which parts of a blame entry go to the parent (added to the
+  * ignoredp list) and which stay with the target (added to the diffp list).  The
+  * actual decision was made in a separate heuristic function, and those answers
+  * for the lines in 'e' are in line_blames.  This consumes e, essentially
+  * putting it on a list.
+  *
+  * Note that the blame entries on the ignoredp list are not necessarily sorted
+  * with respect to the parent's line numbers yet.
+  */
+ static void ignore_blame_entry(struct blame_entry *e,
+                              struct blame_origin *parent,
+                              struct blame_entry **diffp,
+                              struct blame_entry **ignoredp,
+                              struct blame_line_tracker *line_blames)
+ {
+       int entry_len, nr_lines, i;
+ 
+       /*
+        * We carve new entries off the front of e.  Each entry comes from a
+        * contiguous chunk of lines: adjacent lines from the same origin
+        * (either the parent or the target).
+        */
+       entry_len = 1;
+       nr_lines = e->num_lines;        /* e changes in the loop */
+       for (i = 0; i < nr_lines; i++) {
+               struct blame_entry *next = NULL;
+ 
+               /*
+                * We are often adjacent to the next line - only split the blame
+                * entry when we have to.
+                */
+               if (i + 1 < nr_lines) {
+                       if (are_lines_adjacent(&line_blames[i],
+                                              &line_blames[i + 1])) {
+                               entry_len++;
+                               continue;
+                       }
+                       next = split_blame_at(e, entry_len,
+                                             blame_origin_incref(e->suspect));
+               }
+               if (line_blames[i].is_parent) {
+                       e->ignored = 1;
+                       blame_origin_decref(e->suspect);
+                       e->suspect = blame_origin_incref(parent);
+                       e->s_lno = line_blames[i - entry_len + 1].s_lno;
+                       e->next = *ignoredp;
+                       *ignoredp = e;
+               } else {
+                       e->unblamable = 1;
+                       /* e->s_lno is already in the target's address space. */
+                       e->next = *diffp;
+                       *diffp = e;
+               }
+               assert(e->num_lines == entry_len);
+               e = next;
+               entry_len = 1;
+       }
+       assert(!e);
+ }
+ 
   /*
    * Process one hunk from the patch between the current suspect for
    * blame_entry e and its parent.  This first blames any unfinished
@@@ -848,13 -1711,20 +1712,20 @@@
    * -C options may lead to overlapping/duplicate source line number
    * ranges, all we can rely on from sorting/merging is the order of the
    * first suspect line number.
+  *
+  * tlno: line number in the target where this chunk begins
+  * same: line number in the target where this chunk ends
+  * offset: add to tlno to get the chunk starting point in the parent
+  * parent_len: number of lines in the parent chunk
    */
   static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
-                       int tlno, int offset, int same,
-                       struct blame_origin *parent)
+                       int tlno, int offset, int same, int parent_len,
+                       struct blame_origin *parent,
+                       struct blame_origin *target, int ignore_diffs)
   {
         struct blame_entry *e = **srcq;
-       struct blame_entry *samep = NULL, *diffp = NULL;
+       struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL;
+       struct blame_line_tracker *line_blames = NULL;
   
         while (e && e->s_lno < tlno) {
                 struct blame_entry *next = e->next;
@@@ -865,14 -1735,9 +1736,9 @@@
                  */
                 if (e->s_lno + e->num_lines > tlno) {
                         /* Move second half to a new record */
-                       int len = tlno - e->s_lno;
-                       struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
-                       n->suspect = e->suspect;
-                       n->lno = e->lno + len;
-                       n->s_lno = e->s_lno + len;
-                       n->num_lines = e->num_lines - len;
-                       e->num_lines = len;
-                       e->score = 0;
+                       struct blame_entry *n;
+ 
+                       n = split_blame_at(e, tlno - e->s_lno, e->suspect);
                         /* Push new record to diffp */
                         n->next = diffp;
                         diffp = n;
@@@ -908,6 -1773,14 +1774,14 @@@
          */
         samep = NULL;
         diffp = NULL;
+ 
+       if (ignore_diffs && same - tlno > 0) {
+               line_blames = xcalloc(sizeof(struct blame_line_tracker),
+                                     same - tlno);
+               guess_line_blames(parent, target, tlno, offset, same,
+                                 parent_len, line_blames);
+       }
+ 
         while (e && e->s_lno < same) {
                 struct blame_entry *next = e->next;
   
@@@ -919,22 -1792,37 +1793,37 @@@
                          * Move second half to a new record to be
                          * processed by later chunks
                          */
-                       int len = same - e->s_lno;
-                       struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
-                       n->suspect = blame_origin_incref(e->suspect);
-                       n->lno = e->lno + len;
-                       n->s_lno = e->s_lno + len;
-                       n->num_lines = e->num_lines - len;
-                       e->num_lines = len;
-                       e->score = 0;
+                       struct blame_entry *n;
+ 
+                       n = split_blame_at(e, same - e->s_lno,
+                                          blame_origin_incref(e->suspect));
                         /* Push new record to samep */
                         n->next = samep;
                         samep = n;
                 }
-               e->next = diffp;
-               diffp = e;
+               if (ignore_diffs) {
+                       ignore_blame_entry(e, parent, &diffp, &ignoredp,
+                                          line_blames + e->s_lno - tlno);
+               } else {
+                       e->next = diffp;
+                       diffp = e;
+               }
                 e = next;
         }
+       free(line_blames);
+       if (ignoredp) {
+               /*
+                * Note ignoredp is not sorted yet, and thus neither is dstq.
+                * That list must be sorted before we queue_blames().  We defer
+                * sorting until after all diff hunks are processed, so that
+                * guess_line_blames() can pick *any* line in the parent.  The
+                * slight drawback is that we end up sorting all blame entries
+                * passed to the parent, including those that are unrelated to
+                * changes made by the ignored commit.
+                */
+               **dstq = reverse_blame(ignoredp, **dstq);
+               *dstq = &ignoredp->next;
+       }
         **srcq = reverse_blame(diffp, reverse_blame(samep, e));
         /* Move across elements that are in the unblamable portion */
         if (diffp)
@@@ -943,7 -1831,9 +1832,9 @@@
   
   struct blame_chunk_cb_data {
         struct blame_origin *parent;
+       struct blame_origin *target;
         long offset;
+       int ignore_diffs;
         struct blame_entry **dstq;
         struct blame_entry **srcq;
   };
@@@ -956,7 -1846,8 +1847,8 @@@ static int blame_chunk_cb(long start_a
         if (start_a - start_b != d->offset)
                 die("internal error in blame::blame_chunk_cb");
         blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
-                   start_b + count_b, d->parent);
+                   start_b + count_b, count_a, d->parent, d->target,
+                   d->ignore_diffs);
         d->offset = start_a + count_a - (start_b + count_b);
         return 0;
   }
@@@ -968,7 -1859,7 +1860,7 @@@
    */
   static void pass_blame_to_parent(struct blame_scoreboard *sb,
                                  struct blame_origin *target,
-                                struct blame_origin *parent)
+                                struct blame_origin *parent, int ignore_diffs)
   {
         mmfile_t file_p, file_o;
         struct blame_chunk_cb_data d;
@@@ -978,11 -1869,15 +1870,15 @@@
                 return; /* nothing remains for this target */
   
         d.parent = parent;
+       d.target = target;
         d.offset = 0;
+       d.ignore_diffs = ignore_diffs;
         d.dstq = &newdest; d.srcq = &target->suspects;
   
-       fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
-       fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob);
+       fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+                        &sb->num_read_blob, ignore_diffs);
+       fill_origin_blob(&sb->revs->diffopt, target, &file_o,
+                        &sb->num_read_blob, ignore_diffs);
         sb->num_get_patch++;
   
         if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
@@@ -990,8 -1885,13 +1886,13 @@@
                     oid_to_hex(&parent->commit->object.oid),
                     oid_to_hex(&target->commit->object.oid));
         /* The rest are the same as the parent */
-       blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent);
+       blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0,
+                   parent, target, 0);
         *d.dstq = NULL;
+       if (ignore_diffs)
+               newdest = llist_mergesort(newdest, get_next_blame,
+                                         set_next_blame,
+                                         compare_blame_suspect);
         queue_blames(sb, parent, newdest);
   
         return;
@@@ -1188,7 -2088,8 +2089,8 @@@ static void find_move_in_parent(struct 
         if (!unblamed)
                 return; /* nothing remains for this target */
   
-       fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
+       fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+                        &sb->num_read_blob, 0);
         if (!file_p.ptr)
                 return;
   
@@@ -1317,7 -2218,8 +2219,8 @@@ static void find_copy_in_parent(struct 
                         norigin = get_origin(parent, p->one->path);
                         oidcpy(&norigin->blob_oid, &p->one->oid);
                         norigin->mode = p->one->mode;
-                       fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob);
+                       fill_origin_blob(&sb->revs->diffopt, norigin, &file_p,
+                                        &sb->num_read_blob, 0);
                         if (!file_p.ptr)
                                 continue;
   
@@@ -1495,11 -2397,34 +2398,34 @@@ static void pass_blame(struct blame_sco
                         blame_origin_incref(porigin);
                         origin->previous = porigin;
                 }
-               pass_blame_to_parent(sb, origin, porigin);
+               pass_blame_to_parent(sb, origin, porigin, 0);
                 if (!origin->suspects)
                         goto finish;
         }
   
+       /*
+        * Pass remaining suspects for ignored commits to their parents.
+        */
+       if (oidset_contains(&sb->ignore_list, &commit->object.oid)) {
+               for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
+                    i < num_sg && sg;
+                    sg = sg->next, i++) {
+                       struct blame_origin *porigin = sg_origin[i];
+ 
+                       if (!porigin)
+                               continue;
+                       pass_blame_to_parent(sb, origin, porigin, 1);
+                       /*
+                        * Preemptively drop porigin so we can refresh the
+                        * fingerprints if we use the parent again, which can
+                        * occur if you ignore back-to-back commits.
+                        */
+                       drop_origin_blob(porigin);
+                       if (!origin->suspects)
+                               goto finish;
+               }
+       }
+ 
         /*
          * Optionally find moves in parents' files.
          */
@@@ -1562,8 -2487,7 +2488,8 @@@ finish
         }
         for (i = 0; i < num_sg; i++) {
                 if (sg_origin[i]) {
- -                      drop_origin_blob(sg_origin[i]);
+ +                      if (!sg_origin[i]->suspects)
+ +                              drop_origin_blob(sg_origin[i]);
                         blame_origin_decref(sg_origin[i]);
                 }
         }
@@@ -1640,37 -2564,14 +2566,14 @@@ void assign_blame(struct blame_scoreboa
         }
   }
   
- static const char *get_next_line(const char *start, const char *end)
- {
-       const char *nl = memchr(start, '\n', end - start);
-       return nl ? nl + 1 : end;
- }
- 
   /*
    * To allow quick access to the contents of nth line in the
    * final image, prepare an index in the scoreboard.
    */
   static int prepare_lines(struct blame_scoreboard *sb)
   {
-       const char *buf = sb->final_buf;
-       unsigned long len = sb->final_buf_size;
-       const char *end = buf + len;
-       const char *p;
-       int *lineno;
-       int num = 0;
- 
-       for (p = buf; p < end; p = get_next_line(p, end))
-               num++;
- 
-       ALLOC_ARRAY(sb->lineno, num + 1);
-       lineno = sb->lineno;
- 
-       for (p = buf; p < end; p = get_next_line(p, end))
-               *lineno++ = p - buf;
- 
-       *lineno = len;
- 
-       sb->num_lines = num;
+       sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf,
+                                        sb->final_buf_size);
         return sb->num_lines;
   }
   
diff --combined blame.h

index d62f80fa74c44011f8cdaea7a4baec3d8cae03e6,5dd877bb78fc068510753f7e967dea4be049f66d..4a9e1270b036465c23fab5a0e536b9638ca3ce1b
--- 1/blame.h
--- 2/blame.h
+++ b/blame.h
@@@ -51,8 -51,10 +51,10 @@@ struct blame_origin 
          */
         struct blame_entry *suspects;
         mmfile_t file;
+       int num_lines;
+       void *fingerprints;
         struct object_id blob_oid;
- -      unsigned mode;
+ +      unsigned short mode;
         /* guilty gets set when shipping any suspects to the final
          * blame list instead of other commits
          */
@@@ -92,6 -94,8 +94,8 @@@ struct blame_entry 
          * scanning the lines over and over.
          */
         unsigned score;
+       int ignored;
+       int unblamable;
   };
   
   /*
@@@ -117,6 -121,8 +121,8 @@@ struct blame_scoreboard 
         /* linked list of blames */
         struct blame_entry *ent;
   
+       struct oidset ignore_list;
+ 
         /* look-up a line in the final buffer */
         int num_lines;
         int *lineno;
@@@ -177,6 -183,6 +183,6 @@@ struct blame_entry *blame_entry_prepend
                                         long start, long end,
                                         struct blame_origin *o);
   
- -extern struct blame_origin *get_blame_suspects(struct commit *commit);
+ +struct blame_origin *get_blame_suspects(struct commit *commit);
   
   #endif /* BLAME_H */
diff --combined builtin/blame.c

index 50e3d4a2656140b406975947d066283a2ef14460,bb321eb2000179ad924588080202a527a751d7f6..b6534d4dea9ad81a34eaf099f7cf9a0a1e56f410
--- 1/builtin/blame.c
--- 2/builtin/blame.c
+++ b/builtin/blame.c
@@@ -27,7 -27,6 +27,7 @@@
   #include "object-store.h"
   #include "blame.h"
   #include "string-list.h"
+ +#include "refs.h"
   
   static char blame_usage[] = N_("git blame [<options>] [<rev-opts>] [<rev>] [--] <file>");
   
@@@ -53,14 -52,17 +53,17 @@@ static int no_whole_file_rename
   static int show_progress;
   static char repeated_meta_color[COLOR_MAXLEN];
   static int coloring_mode;
+ static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP;
+ static int mark_unblamable_lines;
+ static int mark_ignored_lines;
   
   static struct date_mode blame_date_mode = { DATE_ISO8601 };
   static size_t blame_date_width;
   
   static struct string_list mailmap = STRING_LIST_INIT_NODUP;
   
- -#ifndef DEBUG
- -#define DEBUG 0
+ +#ifndef DEBUG_BLAME
+ +#define DEBUG_BLAME 0
   #endif
   
   static unsigned blame_move_score;
@@@ -480,6 -482,14 +483,14 @@@ static void emit_other(struct blame_sco
                         }
                 }
   
+               if (mark_unblamable_lines && ent->unblamable) {
+                       length--;
+                       putchar('*');
+               }
+               if (mark_ignored_lines && ent->ignored) {
+                       length--;
+                       putchar('?');
+               }
                 printf("%.*s", length, hex);
                 if (opt & OUTPUT_ANNOTATE_COMPAT) {
                         const char *name;
@@@ -696,6 -706,24 +707,24 @@@ static int git_blame_config(const char 
                 parse_date_format(value, &blame_date_mode);
                 return 0;
         }
+       if (!strcmp(var, "blame.ignorerevsfile")) {
+               const char *str;
+               int ret;
+ 
+               ret = git_config_pathname(&str, var, value);
+               if (ret)
+                       return ret;
+               string_list_insert(&ignore_revs_file_list, str);
+               return 0;
+       }
+       if (!strcmp(var, "blame.markunblamablelines")) {
+               mark_unblamable_lines = git_config_bool(var, value);
+               return 0;
+       }
+       if (!strcmp(var, "blame.markignoredlines")) {
+               mark_ignored_lines = git_config_bool(var, value);
+               return 0;
+       }
         if (!strcmp(var, "color.blame.repeatedlines")) {
                 if (color_parse_mem(value, strlen(value), repeated_meta_color))
                         warning(_("invalid color '%s' in color.blame.repeatedLines"),
@@@ -775,6 -803,27 +804,27 @@@ static int is_a_rev(const char *name
         return OBJ_NONE < oid_object_info(the_repository, &oid, NULL);
   }
   
+ static void build_ignorelist(struct blame_scoreboard *sb,
+                            struct string_list *ignore_revs_file_list,
+                            struct string_list *ignore_rev_list)
+ {
+       struct string_list_item *i;
+       struct object_id oid;
+ 
+       oidset_init(&sb->ignore_list, 0);
+       for_each_string_list_item(i, ignore_revs_file_list) {
+               if (!strcmp(i->string, ""))
+                       oidset_clear(&sb->ignore_list);
+               else
+                       oidset_parse_file(&sb->ignore_list, i->string);
+       }
+       for_each_string_list_item(i, ignore_rev_list) {
+               if (get_oid_committish(i->string, &oid))
+                       die(_("cannot find revision %s to ignore"), i->string);
+               oidset_insert(&sb->ignore_list, &oid);
+       }
+ }
+ 
   int cmd_blame(int argc, const char **argv, const char *prefix)
   {
         struct rev_info revs;
@@@ -786,6 -835,7 +836,7 @@@
         struct progress_info pi = { NULL, 0 };
   
         struct string_list range_list = STRING_LIST_INIT_NODUP;
+       struct string_list ignore_rev_list = STRING_LIST_INIT_NODUP;
         int output_option = 0, opt = 0;
         int show_stats = 0;
         const char *revs_file = NULL;
@@@ -807,6 -857,8 +858,8 @@@
                 OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
                 OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
                 OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+               OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("Ignore <rev> when blaming")),
+               OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("Ignore revisions from <file>")),
                 OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
                 OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
   
@@@ -815,7 -867,7 +868,7 @@@
                  * and are only included here to get included in the "-h"
                  * output:
                  */
- -              { OPTION_LOWLEVEL_CALLBACK, 0, "indent-heuristic", NULL, NULL, N_("Use an experimental heuristic to improve diffs"), PARSE_OPT_NOARG, parse_opt_unknown_cb },
+ +              { OPTION_LOWLEVEL_CALLBACK, 0, "indent-heuristic", NULL, NULL, N_("Use an experimental heuristic to improve diffs"), PARSE_OPT_NOARG, NULL, 0, parse_opt_unknown_cb },
   
                 OPT_BIT(0, "minimal", &xdl_opts, N_("Spend extra cycles to find better match"), XDF_NEED_MINIMAL),
                 OPT_STRING('S', NULL, &revs_file, N_("file"), N_("Use revisions from <file> instead of calling git-rev-list")),
@@@ -926,10 -978,6 +979,10 @@@ parse_done
                  */
                 blame_date_width = utf8_strwidth(_("4 years, 11 months ago")) + 1; /* add the null */
                 break;
+ +      case DATE_HUMAN:
+ +              /* If the year is shown, no time is shown */
+ +              blame_date_width = sizeof("Thu Oct 19 16:00");
+ +              break;
         case DATE_NORMAL:
                 blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700");
                 break;
@@@ -994,24 -1042,15 +1047,27 @@@
   
         revs.disable_stdin = 1;
         setup_revisions(argc, argv, &revs, NULL);
+ +      if (!revs.pending.nr && is_bare_repository()) {
+ +              struct commit *head_commit;
+ +              struct object_id head_oid;
+ +
+ +              if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
+ +                                      &head_oid, NULL) ||
+ +                  !(head_commit = lookup_commit_reference_gently(revs.repo,
+ +                                                           &head_oid, 1)))
+ +                      die("no such ref: HEAD");
+ +
+ +              add_pending_object(&revs, &head_commit->object, "HEAD");
+ +      }
   
         init_scoreboard(&sb);
         sb.revs = &revs;
         sb.contents_from = contents_from;
         sb.reverse = reverse;
         sb.repo = the_repository;
+       build_ignorelist(&sb, &ignore_revs_file_list, &ignore_rev_list);
+       string_list_clear(&ignore_revs_file_list, 0);
+       string_list_clear(&ignore_rev_list, 0);
         setup_scoreboard(&sb, path, &o);
         lno = sb.num_lines;
   
@@@ -1024,8 -1063,7 +1080,8 @@@
                 long bottom, top;
                 if (parse_range_arg(range_list.items[range_i].string,
                                     nth_line_cb, &sb, lno, anchor,
- -                                  &bottom, &top, sb.path, &the_index))
+ +                                  &bottom, &top, sb.path,
+ +                                  the_repository->index))
                         usage(blame_usage);
                 if ((!lno && (top || bottom)) || lno < bottom)
                         die(Q_("file %s has only %lu line",
@@@ -1062,7 -1100,7 +1118,7 @@@
         if (blame_copy_score)
                 sb.copy_score = blame_copy_score;
   
- -      sb.debug = DEBUG;
+ +      sb.debug = DEBUG_BLAME;
         sb.on_sanity_fail = &sanity_check_on_fail;
   
         sb.show_root = show_root;
diff --combined fsck.c

index 117c4a978f410047b6ca4b939ca6c294d6163dfc,80b53e6f49681312fdc85bf91ed45a5620cc5a27..cdb7d8db03017e36811e2bf3803ff0a4ca032a3f
--- 1/fsck.c
--- 2/fsck.c
+++ b/fsck.c
@@@ -181,41 -181,6 +181,6 @@@ static int fsck_msg_type(enum fsck_msg_
         return msg_type;
   }
   
- static void init_skiplist(struct fsck_options *options, const char *path)
- {
-       FILE *fp;
-       struct strbuf sb = STRBUF_INIT;
-       struct object_id oid;
- 
-       fp = fopen(path, "r");
-       if (!fp)
-               die("Could not open skip list: %s", path);
-       while (!strbuf_getline(&sb, fp)) {
-               const char *p;
-               const char *hash;
- 
-               /*
-                * Allow trailing comments, leading whitespace
-                * (including before commits), and empty or whitespace
-                * only lines.
-                */
-               hash = strchr(sb.buf, '#');
-               if (hash)
-                       strbuf_setlen(&sb, hash - sb.buf);
-               strbuf_trim(&sb);
-               if (!sb.len)
-                       continue;
- 
-               if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
-                       die("Invalid SHA-1: %s", sb.buf);
-               oidset_insert(&options->skiplist, &oid);
-       }
-       if (ferror(fp))
-               die_errno("Could not read '%s'", path);
-       fclose(fp);
-       strbuf_release(&sb);
- }
- 
   static int parse_msg_type(const char *str)
   {
         if (!strcmp(str, "error"))
@@@ -284,7 -249,7 +249,7 @@@ void fsck_set_msg_types(struct fsck_opt
                 if (!strcmp(buf, "skiplist")) {
                         if (equal == len)
                                 die("skiplist requires a path");
-                       init_skiplist(options, buf + equal + 1);
+                       oidset_parse_file(&options->skiplist, buf + equal + 1);
                         buf += len + 1;
                         continue;
                 }
@@@ -410,14 -375,14 +375,14 @@@ static int fsck_walk_tree(struct tree *
                         continue;
   
                 if (S_ISDIR(entry.mode)) {
- -                      obj = (struct object *)lookup_tree(the_repository, entry.oid);
+ +                      obj = (struct object *)lookup_tree(the_repository, &entry.oid);
                         if (name && obj)
                                 put_object_name(options, obj, "%s%s/", name,
                                         entry.path);
                         result = options->walk(obj, OBJ_TREE, data, options);
                 }
                 else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
- -                      obj = (struct object *)lookup_blob(the_repository, entry.oid);
+ +                      obj = (struct object *)lookup_blob(the_repository, &entry.oid);
                         if (name && obj)
                                 put_object_name(options, obj, "%s%s", name,
                                         entry.path);
@@@ -604,7 -569,7 +569,7 @@@ static int fsck_tree(struct tree *item
         o_name = NULL;
   
         while (desc.size) {
- -              unsigned mode;
+ +              unsigned short mode;
                 const char *name;
                 const struct object_id *oid;
   
@@@ -1092,7 -1057,7 +1057,7 @@@ int fsck_finish(struct fsck_options *op
   
                 blob = lookup_blob(the_repository, oid);
                 if (!blob) {
- -                      struct object *obj = lookup_unknown_object(oid->hash);
+ +                      struct object *obj = lookup_unknown_object(oid);
                         ret |= report(options, obj,
                                       FSCK_MSG_GITMODULES_BLOB,
                                       "non-blob found at .gitmodules");
diff --combined oidset.c

index 8bdecb13de1e0d0f24bfccb27dd46cdf64b27cff,584be63e520ae8d959aef6c6cb2bfa5f5a8995fa..f63ce818f67766378485ab16075dd11e87c00ca0
--- 1/oidset.c
--- 2/oidset.c
+++ b/oidset.c
@@@ -5,33 -5,68 +5,68 @@@ void oidset_init(struct oidset *set, si
   {
         memset(&set->set, 0, sizeof(set->set));
         if (initial_size)
- -              kh_resize_oid(&set->set, initial_size);
+ +              kh_resize_oid_set(&set->set, initial_size);
   }
   
   int oidset_contains(const struct oidset *set, const struct object_id *oid)
   {
- -      khiter_t pos = kh_get_oid(&set->set, *oid);
+ +      khiter_t pos = kh_get_oid_set(&set->set, *oid);
         return pos != kh_end(&set->set);
   }
   
   int oidset_insert(struct oidset *set, const struct object_id *oid)
   {
         int added;
- -      kh_put_oid(&set->set, *oid, &added);
+ +      kh_put_oid_set(&set->set, *oid, &added);
         return !added;
   }
   
   int oidset_remove(struct oidset *set, const struct object_id *oid)
   {
- -      khiter_t pos = kh_get_oid(&set->set, *oid);
+ +      khiter_t pos = kh_get_oid_set(&set->set, *oid);
         if (pos == kh_end(&set->set))
                 return 0;
- -      kh_del_oid(&set->set, pos);
+ +      kh_del_oid_set(&set->set, pos);
         return 1;
   }
   
   void oidset_clear(struct oidset *set)
   {
- -      kh_release_oid(&set->set);
+ +      kh_release_oid_set(&set->set);
         oidset_init(set, 0);
   }
+ 
+ void oidset_parse_file(struct oidset *set, const char *path)
+ {
+       FILE *fp;
+       struct strbuf sb = STRBUF_INIT;
+       struct object_id oid;
+ 
+       fp = fopen(path, "r");
+       if (!fp)
+               die("could not open object name list: %s", path);
+       while (!strbuf_getline(&sb, fp)) {
+               const char *p;
+               const char *name;
+ 
+               /*
+                * Allow trailing comments, leading whitespace
+                * (including before commits), and empty or whitespace
+                * only lines.
+                */
+               name = strchr(sb.buf, '#');
+               if (name)
+                       strbuf_setlen(&sb, name - sb.buf);
+               strbuf_trim(&sb);
+               if (!sb.len)
+                       continue;
+ 
+               if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
+                       die("invalid object name: %s", sb.buf);
+               oidset_insert(set, &oid);
+       }
+       if (ferror(fp))
+               die_errno("Could not read '%s'", path);
+       fclose(fp);
+       strbuf_release(&sb);
+ }
diff --combined oidset.h

index 505fad578bec1691fde9f28342d4c476c60dd50c,c4807749df8d169f03d506beaf183254fc397b48..5346563b0bccb602b8de745d1308793e8995a5e5
--- 1/oidset.h
--- 2/oidset.h
+++ b/oidset.h
@@@ -16,11 -16,23 +16,11 @@@
    *      table overhead.
    */
   
- -static inline unsigned int oid_hash(struct object_id oid)
- -{
- -      return sha1hash(oid.hash);
- -}
- -
- -static inline int oid_equal(struct object_id a, struct object_id b)
- -{
- -      return oideq(&a, &b);
- -}
- -
- -KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
- -
   /**
    * A single oidset; should be zero-initialized (or use OIDSET_INIT).
    */
   struct oidset {
- -      kh_oid_t set;
+ +      kh_oid_set_t set;
   };
   
   #define OIDSET_INIT { { 0 } }
@@@ -61,8 -73,16 +61,16 @@@ int oidset_remove(struct oidset *set, c
    */
   void oidset_clear(struct oidset *set);
   
+ /**
+  * Add the contents of the file 'path' to an initialized oidset.  Each line is
+  * an unabbreviated object name.  Comments begin with '#', and trailing comments
+  * are allowed.  Leading whitespace and empty or white-space only lines are
+  * ignored.
+  */
+ void oidset_parse_file(struct oidset *set, const char *path);
+ 
   struct oidset_iter {
- -      kh_oid_t *set;
+ +      kh_oid_set_t *set;
         khiter_t iter;
   };
author	Junio C Hamano <gitster@pobox.com>
author	Fri, 19 Jul 2019 18:30:20 +0000 (11:30 -0700)
committer	Junio C Hamano <gitster@pobox.com>
committer	Fri, 19 Jul 2019 18:30:20 +0000 (11:30 -0700)
		1	2
blame.c	patch \|	diff1 \|	diff2 \|	blob \| history
blame.h	patch \|	diff1 \|	diff2 \|	blob \| history
builtin/blame.c	patch \|	diff1 \|	diff2 \|	blob \| history
fsck.c	patch \|	diff1 \|	diff2 \|	blob \| history
oidset.c	patch \|	diff1 \|	diff2 \|	blob \| history
oidset.h	patch \|	diff1 \|	diff2 \|	blob \| history