contrib / diff-highlight / DiffHighlight.pmon commit Merge branch 'sg/line-log-tree-diff-optim' (de67293)
   1package DiffHighlight;
   2
   3use 5.008;
   4use warnings FATAL => 'all';
   5use strict;
   6
   7# Use the correct value for both UNIX and Windows (/dev/null vs nul)
   8use File::Spec;
   9
  10my $NULL = File::Spec->devnull();
  11
  12# Highlight by reversing foreground and background. You could do
  13# other things like bold or underline if you prefer.
  14my @OLD_HIGHLIGHT = (
  15        color_config('color.diff-highlight.oldnormal'),
  16        color_config('color.diff-highlight.oldhighlight', "\x1b[7m"),
  17        color_config('color.diff-highlight.oldreset', "\x1b[27m")
  18);
  19my @NEW_HIGHLIGHT = (
  20        color_config('color.diff-highlight.newnormal', $OLD_HIGHLIGHT[0]),
  21        color_config('color.diff-highlight.newhighlight', $OLD_HIGHLIGHT[1]),
  22        color_config('color.diff-highlight.newreset', $OLD_HIGHLIGHT[2])
  23);
  24
  25my $RESET = "\x1b[m";
  26my $COLOR = qr/\x1b\[[0-9;]*m/;
  27my $BORING = qr/$COLOR|\s/;
  28
  29my @removed;
  30my @added;
  31my $in_hunk;
  32my $graph_indent = 0;
  33
  34our $line_cb = sub { print @_ };
  35our $flush_cb = sub { local $| = 1 };
  36
  37# Count the visible width of a string, excluding any terminal color sequences.
  38sub visible_width {
  39        local $_ = shift;
  40        my $ret = 0;
  41        while (length) {
  42                if (s/^$COLOR//) {
  43                        # skip colors
  44                } elsif (s/^.//) {
  45                        $ret++;
  46                }
  47        }
  48        return $ret;
  49}
  50
  51# Return a substring of $str, omitting $len visible characters from the
  52# beginning, where terminal color sequences do not count as visible.
  53sub visible_substr {
  54        my ($str, $len) = @_;
  55        while ($len > 0) {
  56                if ($str =~ s/^$COLOR//) {
  57                        next
  58                }
  59                $str =~ s/^.//;
  60                $len--;
  61        }
  62        return $str;
  63}
  64
  65sub handle_line {
  66        my $orig = shift;
  67        local $_ = $orig;
  68
  69        # match a graph line that begins a commit
  70        if (/^(?:$COLOR?\|$COLOR?[ ])* # zero or more leading "|" with space
  71                 $COLOR?\*$COLOR?[ ]   # a "*" with its trailing space
  72              (?:$COLOR?\|$COLOR?[ ])* # zero or more trailing "|"
  73                                 [ ]*  # trailing whitespace for merges
  74            /x) {
  75                my $graph_prefix = $&;
  76
  77                # We must flush before setting graph indent, since the
  78                # new commit may be indented differently from what we
  79                # queued.
  80                flush();
  81                $graph_indent = visible_width($graph_prefix);
  82
  83        } elsif ($graph_indent) {
  84                if (length($_) < $graph_indent) {
  85                        $graph_indent = 0;
  86                } else {
  87                        $_ = visible_substr($_, $graph_indent);
  88                }
  89        }
  90
  91        if (!$in_hunk) {
  92                $line_cb->($orig);
  93                $in_hunk = /^$COLOR*\@\@ /;
  94        }
  95        elsif (/^$COLOR*-/) {
  96                push @removed, $orig;
  97        }
  98        elsif (/^$COLOR*\+/) {
  99                push @added, $orig;
 100        }
 101        else {
 102                flush();
 103                $line_cb->($orig);
 104                $in_hunk = /^$COLOR*[\@ ]/;
 105        }
 106
 107        # Most of the time there is enough output to keep things streaming,
 108        # but for something like "git log -Sfoo", you can get one early
 109        # commit and then many seconds of nothing. We want to show
 110        # that one commit as soon as possible.
 111        #
 112        # Since we can receive arbitrary input, there's no optimal
 113        # place to flush. Flushing on a blank line is a heuristic that
 114        # happens to match git-log output.
 115        if (!length) {
 116                $flush_cb->();
 117        }
 118}
 119
 120sub flush {
 121        # Flush any queued hunk (this can happen when there is no trailing
 122        # context in the final diff of the input).
 123        show_hunk(\@removed, \@added);
 124        @removed = ();
 125        @added = ();
 126}
 127
 128sub highlight_stdin {
 129        while (<STDIN>) {
 130                handle_line($_);
 131        }
 132        flush();
 133}
 134
 135# Ideally we would feed the default as a human-readable color to
 136# git-config as the fallback value. But diff-highlight does
 137# not otherwise depend on git at all, and there are reports
 138# of it being used in other settings. Let's handle our own
 139# fallback, which means we will work even if git can't be run.
 140sub color_config {
 141        my ($key, $default) = @_;
 142        my $s = `git config --get-color $key 2>$NULL`;
 143        return length($s) ? $s : $default;
 144}
 145
 146sub show_hunk {
 147        my ($a, $b) = @_;
 148
 149        # If one side is empty, then there is nothing to compare or highlight.
 150        if (!@$a || !@$b) {
 151                $line_cb->(@$a, @$b);
 152                return;
 153        }
 154
 155        # If we have mismatched numbers of lines on each side, we could try to
 156        # be clever and match up similar lines. But for now we are simple and
 157        # stupid, and only handle multi-line hunks that remove and add the same
 158        # number of lines.
 159        if (@$a != @$b) {
 160                $line_cb->(@$a, @$b);
 161                return;
 162        }
 163
 164        my @queue;
 165        for (my $i = 0; $i < @$a; $i++) {
 166                my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]);
 167                $line_cb->($rm);
 168                push @queue, $add;
 169        }
 170        $line_cb->(@queue);
 171}
 172
 173sub highlight_pair {
 174        my @a = split_line(shift);
 175        my @b = split_line(shift);
 176
 177        # Find common prefix, taking care to skip any ansi
 178        # color codes.
 179        my $seen_plusminus;
 180        my ($pa, $pb) = (0, 0);
 181        while ($pa < @a && $pb < @b) {
 182                if ($a[$pa] =~ /$COLOR/) {
 183                        $pa++;
 184                }
 185                elsif ($b[$pb] =~ /$COLOR/) {
 186                        $pb++;
 187                }
 188                elsif ($a[$pa] eq $b[$pb]) {
 189                        $pa++;
 190                        $pb++;
 191                }
 192                elsif (!$seen_plusminus && $a[$pa] eq '-' && $b[$pb] eq '+') {
 193                        $seen_plusminus = 1;
 194                        $pa++;
 195                        $pb++;
 196                }
 197                else {
 198                        last;
 199                }
 200        }
 201
 202        # Find common suffix, ignoring colors.
 203        my ($sa, $sb) = ($#a, $#b);
 204        while ($sa >= $pa && $sb >= $pb) {
 205                if ($a[$sa] =~ /$COLOR/) {
 206                        $sa--;
 207                }
 208                elsif ($b[$sb] =~ /$COLOR/) {
 209                        $sb--;
 210                }
 211                elsif ($a[$sa] eq $b[$sb]) {
 212                        $sa--;
 213                        $sb--;
 214                }
 215                else {
 216                        last;
 217                }
 218        }
 219
 220        if (is_pair_interesting(\@a, $pa, $sa, \@b, $pb, $sb)) {
 221                return highlight_line(\@a, $pa, $sa, \@OLD_HIGHLIGHT),
 222                       highlight_line(\@b, $pb, $sb, \@NEW_HIGHLIGHT);
 223        }
 224        else {
 225                return join('', @a),
 226                       join('', @b);
 227        }
 228}
 229
 230# we split either by $COLOR or by character. This has the side effect of
 231# leaving in graph cruft. It works because the graph cruft does not contain "-"
 232# or "+"
 233sub split_line {
 234        local $_ = shift;
 235        return utf8::decode($_) ?
 236                map { utf8::encode($_); $_ }
 237                        map { /$COLOR/ ? $_ : (split //) }
 238                        split /($COLOR+)/ :
 239                map { /$COLOR/ ? $_ : (split //) }
 240                split /($COLOR+)/;
 241}
 242
 243sub highlight_line {
 244        my ($line, $prefix, $suffix, $theme) = @_;
 245
 246        my $start = join('', @{$line}[0..($prefix-1)]);
 247        my $mid = join('', @{$line}[$prefix..$suffix]);
 248        my $end = join('', @{$line}[($suffix+1)..$#$line]);
 249
 250        # If we have a "normal" color specified, then take over the whole line.
 251        # Otherwise, we try to just manipulate the highlighted bits.
 252        if (defined $theme->[0]) {
 253                s/$COLOR//g for ($start, $mid, $end);
 254                chomp $end;
 255                return join('',
 256                        $theme->[0], $start, $RESET,
 257                        $theme->[1], $mid, $RESET,
 258                        $theme->[0], $end, $RESET,
 259                        "\n"
 260                );
 261        } else {
 262                return join('',
 263                        $start,
 264                        $theme->[1], $mid, $theme->[2],
 265                        $end
 266                );
 267        }
 268}
 269
 270# Pairs are interesting to highlight only if we are going to end up
 271# highlighting a subset (i.e., not the whole line). Otherwise, the highlighting
 272# is just useless noise. We can detect this by finding either a matching prefix
 273# or suffix (disregarding boring bits like whitespace and colorization).
 274sub is_pair_interesting {
 275        my ($a, $pa, $sa, $b, $pb, $sb) = @_;
 276        my $prefix_a = join('', @$a[0..($pa-1)]);
 277        my $prefix_b = join('', @$b[0..($pb-1)]);
 278        my $suffix_a = join('', @$a[($sa+1)..$#$a]);
 279        my $suffix_b = join('', @$b[($sb+1)..$#$b]);
 280
 281        return visible_substr($prefix_a, $graph_indent) !~ /^$COLOR*-$BORING*$/ ||
 282               visible_substr($prefix_b, $graph_indent) !~ /^$COLOR*\+$BORING*$/ ||
 283               $suffix_a !~ /^$BORING*$/ ||
 284               $suffix_b !~ /^$BORING*$/;
 285}