contrib / diff-highlight / diff-highlighton commit diff-highlight: do not split multibyte characters (8d00662)
   1#!/usr/bin/perl
   2
   3use 5.008;
   4use warnings FATAL => 'all';
   5use strict;
   6
   7# Highlight by reversing foreground and background. You could do
   8# other things like bold or underline if you prefer.
   9my @OLD_HIGHLIGHT = (
  10        color_config('color.diff-highlight.oldnormal'),
  11        color_config('color.diff-highlight.oldhighlight', "\x1b[7m"),
  12        color_config('color.diff-highlight.oldreset', "\x1b[27m")
  13);
  14my @NEW_HIGHLIGHT = (
  15        color_config('color.diff-highlight.newnormal', $OLD_HIGHLIGHT[0]),
  16        color_config('color.diff-highlight.newhighlight', $OLD_HIGHLIGHT[1]),
  17        color_config('color.diff-highlight.newreset', $OLD_HIGHLIGHT[2])
  18);
  19
  20my $RESET = "\x1b[m";
  21my $COLOR = qr/\x1b\[[0-9;]*m/;
  22my $BORING = qr/$COLOR|\s/;
  23
  24my @removed;
  25my @added;
  26my $in_hunk;
  27
  28while (<>) {
  29        if (!$in_hunk) {
  30                print;
  31                $in_hunk = /^$COLOR*\@/;
  32        }
  33        elsif (/^$COLOR*-/) {
  34                push @removed, $_;
  35        }
  36        elsif (/^$COLOR*\+/) {
  37                push @added, $_;
  38        }
  39        else {
  40                show_hunk(\@removed, \@added);
  41                @removed = ();
  42                @added = ();
  43
  44                print;
  45                $in_hunk = /^$COLOR*[\@ ]/;
  46        }
  47
  48        # Most of the time there is enough output to keep things streaming,
  49        # but for something like "git log -Sfoo", you can get one early
  50        # commit and then many seconds of nothing. We want to show
  51        # that one commit as soon as possible.
  52        #
  53        # Since we can receive arbitrary input, there's no optimal
  54        # place to flush. Flushing on a blank line is a heuristic that
  55        # happens to match git-log output.
  56        if (!length) {
  57                local $| = 1;
  58        }
  59}
  60
  61# Flush any queued hunk (this can happen when there is no trailing context in
  62# the final diff of the input).
  63show_hunk(\@removed, \@added);
  64
  65exit 0;
  66
  67# Ideally we would feed the default as a human-readable color to
  68# git-config as the fallback value. But diff-highlight does
  69# not otherwise depend on git at all, and there are reports
  70# of it being used in other settings. Let's handle our own
  71# fallback, which means we will work even if git can't be run.
  72sub color_config {
  73        my ($key, $default) = @_;
  74        my $s = `git config --get-color $key 2>/dev/null`;
  75        return length($s) ? $s : $default;
  76}
  77
  78sub show_hunk {
  79        my ($a, $b) = @_;
  80
  81        # If one side is empty, then there is nothing to compare or highlight.
  82        if (!@$a || !@$b) {
  83                print @$a, @$b;
  84                return;
  85        }
  86
  87        # If we have mismatched numbers of lines on each side, we could try to
  88        # be clever and match up similar lines. But for now we are simple and
  89        # stupid, and only handle multi-line hunks that remove and add the same
  90        # number of lines.
  91        if (@$a != @$b) {
  92                print @$a, @$b;
  93                return;
  94        }
  95
  96        my @queue;
  97        for (my $i = 0; $i < @$a; $i++) {
  98                my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]);
  99                print $rm;
 100                push @queue, $add;
 101        }
 102        print @queue;
 103}
 104
 105sub highlight_pair {
 106        my @a = split_line(shift);
 107        my @b = split_line(shift);
 108
 109        # Find common prefix, taking care to skip any ansi
 110        # color codes.
 111        my $seen_plusminus;
 112        my ($pa, $pb) = (0, 0);
 113        while ($pa < @a && $pb < @b) {
 114                if ($a[$pa] =~ /$COLOR/) {
 115                        $pa++;
 116                }
 117                elsif ($b[$pb] =~ /$COLOR/) {
 118                        $pb++;
 119                }
 120                elsif ($a[$pa] eq $b[$pb]) {
 121                        $pa++;
 122                        $pb++;
 123                }
 124                elsif (!$seen_plusminus && $a[$pa] eq '-' && $b[$pb] eq '+') {
 125                        $seen_plusminus = 1;
 126                        $pa++;
 127                        $pb++;
 128                }
 129                else {
 130                        last;
 131                }
 132        }
 133
 134        # Find common suffix, ignoring colors.
 135        my ($sa, $sb) = ($#a, $#b);
 136        while ($sa >= $pa && $sb >= $pb) {
 137                if ($a[$sa] =~ /$COLOR/) {
 138                        $sa--;
 139                }
 140                elsif ($b[$sb] =~ /$COLOR/) {
 141                        $sb--;
 142                }
 143                elsif ($a[$sa] eq $b[$sb]) {
 144                        $sa--;
 145                        $sb--;
 146                }
 147                else {
 148                        last;
 149                }
 150        }
 151
 152        if (is_pair_interesting(\@a, $pa, $sa, \@b, $pb, $sb)) {
 153                return highlight_line(\@a, $pa, $sa, \@OLD_HIGHLIGHT),
 154                       highlight_line(\@b, $pb, $sb, \@NEW_HIGHLIGHT);
 155        }
 156        else {
 157                return join('', @a),
 158                       join('', @b);
 159        }
 160}
 161
 162sub split_line {
 163        local $_ = shift;
 164        return utf8::decode($_) ?
 165                map { utf8::encode($_); $_ }
 166                        map { /$COLOR/ ? $_ : (split //) }
 167                        split /($COLOR+)/ :
 168                map { /$COLOR/ ? $_ : (split //) }
 169                split /($COLOR+)/;
 170}
 171
 172sub highlight_line {
 173        my ($line, $prefix, $suffix, $theme) = @_;
 174
 175        my $start = join('', @{$line}[0..($prefix-1)]);
 176        my $mid = join('', @{$line}[$prefix..$suffix]);
 177        my $end = join('', @{$line}[($suffix+1)..$#$line]);
 178
 179        # If we have a "normal" color specified, then take over the whole line.
 180        # Otherwise, we try to just manipulate the highlighted bits.
 181        if (defined $theme->[0]) {
 182                s/$COLOR//g for ($start, $mid, $end);
 183                chomp $end;
 184                return join('',
 185                        $theme->[0], $start, $RESET,
 186                        $theme->[1], $mid, $RESET,
 187                        $theme->[0], $end, $RESET,
 188                        "\n"
 189                );
 190        } else {
 191                return join('',
 192                        $start,
 193                        $theme->[1], $mid, $theme->[2],
 194                        $end
 195                );
 196        }
 197}
 198
 199# Pairs are interesting to highlight only if we are going to end up
 200# highlighting a subset (i.e., not the whole line). Otherwise, the highlighting
 201# is just useless noise. We can detect this by finding either a matching prefix
 202# or suffix (disregarding boring bits like whitespace and colorization).
 203sub is_pair_interesting {
 204        my ($a, $pa, $sa, $b, $pb, $sb) = @_;
 205        my $prefix_a = join('', @$a[0..($pa-1)]);
 206        my $prefix_b = join('', @$b[0..($pb-1)]);
 207        my $suffix_a = join('', @$a[($sa+1)..$#$a]);
 208        my $suffix_b = join('', @$b[($sb+1)..$#$b]);
 209
 210        return $prefix_a !~ /^$COLOR*-$BORING*$/ ||
 211               $prefix_b !~ /^$COLOR*\+$BORING*$/ ||
 212               $suffix_a !~ /^$BORING*$/ ||
 213               $suffix_b !~ /^$BORING*$/;
 214}