Merge branch 'jn/maint-gitweb-utf8-fix'
authorJunio C Hamano <gitster@pobox.com>
Thu, 22 Dec 2011 23:30:12 +0000 (15:30 -0800)
committerJunio C Hamano <gitster@pobox.com>
Thu, 22 Dec 2011 23:30:12 +0000 (15:30 -0800)
* jn/maint-gitweb-utf8-fix:
gitweb: Fix fallback mode of to_utf8 subroutine
gitweb: Output valid utf8 in git_blame_common('data')
gitweb: esc_html() site name for title in OPML
gitweb: Call to_utf8() on input string in chop_and_escape_str()

1  2 
gitweb/gitweb.perl
diff --combined gitweb/gitweb.perl
index f80f2594cb2e498fa788d38f5b06c88e2fef95ca,874023a33e9162b99bfa5b0e4038b102c129f6f1..fc41b07bcb4a7493d255150b3a3fa75ca7412352
@@@ -759,7 -759,6 +759,7 @@@ our @cgi_param_mapping = 
        extra_options => "opt",
        search_use_regexp => "sr",
        ctag => "by_tag",
 +      diff_style => "ds",
        # this must be last entry (for manipulation from JavaScript)
        javascript => "js"
  );
@@@ -1443,8 -1442,8 +1443,8 @@@ sub validate_refname 
  sub to_utf8 {
        my $str = shift;
        return undef unless defined $str;
-       if (utf8::valid($str)) {
-               utf8::decode($str);
+       if (utf8::is_utf8($str) || utf8::decode($str)) {
                return $str;
        } else {
                return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
@@@ -1696,6 -1695,7 +1696,7 @@@ sub chop_and_escape_str 
        my ($str) = @_;
  
        my $chopped = chop_str(@_);
+       $str = to_utf8($str);
        if ($chopped eq $str) {
                return esc_html($chopped);
        } else {
@@@ -2226,119 -2226,93 +2227,119 @@@ sub format_diff_cc_simplified 
        return $result;
  }
  
 -# format patch (diff) line (not to be used for diff headers)
 -sub format_diff_line {
 -      my $line = shift;
 -      my ($from, $to) = @_;
 -      my $diff_class = "";
 -
 -      chomp $line;
 +sub diff_line_class {
 +      my ($line, $from, $to) = @_;
  
 +      # ordinary diff
 +      my $num_sign = 1;
 +      # combined diff
        if ($from && $to && ref($from->{'href'}) eq "ARRAY") {
 -              # combined diff
 -              my $prefix = substr($line, 0, scalar @{$from->{'href'}});
 -              if ($line =~ m/^\@{3}/) {
 -                      $diff_class = " chunk_header";
 -              } elsif ($line =~ m/^\\/) {
 -                      $diff_class = " incomplete";
 -              } elsif ($prefix =~ tr/+/+/) {
 -                      $diff_class = " add";
 -              } elsif ($prefix =~ tr/-/-/) {
 -                      $diff_class = " rem";
 -              }
 -      } else {
 -              # assume ordinary diff
 -              my $char = substr($line, 0, 1);
 -              if ($char eq '+') {
 -                      $diff_class = " add";
 -              } elsif ($char eq '-') {
 -                      $diff_class = " rem";
 -              } elsif ($char eq '@') {
 -                      $diff_class = " chunk_header";
 -              } elsif ($char eq "\\") {
 -                      $diff_class = " incomplete";
 -              }
 +              $num_sign = scalar @{$from->{'href'}};
 +      }
 +
 +      my @diff_line_classifier = (
 +              { regexp => qr/^\@\@{$num_sign} /, class => "chunk_header"},
 +              { regexp => qr/^\\/,               class => "incomplete"  },
 +              { regexp => qr/^ {$num_sign}/,     class => "ctx" },
 +              # classifier for context must come before classifier add/rem,
 +              # or we would have to use more complicated regexp, for example
 +              # qr/(?= {0,$m}\+)[+ ]{$num_sign}/, where $m = $num_sign - 1;
 +              { regexp => qr/^[+ ]{$num_sign}/,   class => "add" },
 +              { regexp => qr/^[- ]{$num_sign}/,   class => "rem" },
 +      );
 +      for my $clsfy (@diff_line_classifier) {
 +              return $clsfy->{'class'}
 +                      if ($line =~ $clsfy->{'regexp'});
        }
 -      $line = untabify($line);
 -      if ($from && $to && $line =~ m/^\@{2} /) {
 -              my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
 -                      $line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
  
 -              $from_lines = 0 unless defined $from_lines;
 -              $to_lines   = 0 unless defined $to_lines;
 +      # fallback
 +      return "";
 +}
  
 -              if ($from->{'href'}) {
 -                      $from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
 -                                           -class=>"list"}, $from_text);
 -              }
 -              if ($to->{'href'}) {
 -                      $to_text   = $cgi->a({-href=>"$to->{'href'}#l$to_start",
 -                                           -class=>"list"}, $to_text);
 -              }
 -              $line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
 -                      "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 -              return "<div class=\"diff$diff_class\">$line</div>\n";
 -      } elsif ($from && $to && $line =~ m/^\@{3}/) {
 -              my ($prefix, $ranges, $section) = $line =~ m/^(\@+) (.*?) \@+(.*)$/;
 -              my (@from_text, @from_start, @from_nlines, $to_text, $to_start, $to_nlines);
 +# assumes that $from and $to are defined and correctly filled,
 +# and that $line holds a line of chunk header for unified diff
 +sub format_unidiff_chunk_header {
 +      my ($line, $from, $to) = @_;
  
 -              @from_text = split(' ', $ranges);
 -              for (my $i = 0; $i < @from_text; ++$i) {
 -                      ($from_start[$i], $from_nlines[$i]) =
 -                              (split(',', substr($from_text[$i], 1)), 0);
 -              }
 +      my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
 +              $line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
  
 -              $to_text   = pop @from_text;
 -              $to_start  = pop @from_start;
 -              $to_nlines = pop @from_nlines;
 +      $from_lines = 0 unless defined $from_lines;
 +      $to_lines   = 0 unless defined $to_lines;
  
 -              $line = "<span class=\"chunk_info\">$prefix ";
 -              for (my $i = 0; $i < @from_text; ++$i) {
 -                      if ($from->{'href'}[$i]) {
 -                              $line .= $cgi->a({-href=>"$from->{'href'}[$i]#l$from_start[$i]",
 -                                                -class=>"list"}, $from_text[$i]);
 -                      } else {
 -                              $line .= $from_text[$i];
 -                      }
 -                      $line .= " ";
 -              }
 -              if ($to->{'href'}) {
 -                      $line .= $cgi->a({-href=>"$to->{'href'}#l$to_start",
 -                                        -class=>"list"}, $to_text);
 +      if ($from->{'href'}) {
 +              $from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
 +                                   -class=>"list"}, $from_text);
 +      }
 +      if ($to->{'href'}) {
 +              $to_text   = $cgi->a({-href=>"$to->{'href'}#l$to_start",
 +                                   -class=>"list"}, $to_text);
 +      }
 +      $line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
 +              "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 +      return $line;
 +}
 +
 +# assumes that $from and $to are defined and correctly filled,
 +# and that $line holds a line of chunk header for combined diff
 +sub format_cc_diff_chunk_header {
 +      my ($line, $from, $to) = @_;
 +
 +      my ($prefix, $ranges, $section) = $line =~ m/^(\@+) (.*?) \@+(.*)$/;
 +      my (@from_text, @from_start, @from_nlines, $to_text, $to_start, $to_nlines);
 +
 +      @from_text = split(' ', $ranges);
 +      for (my $i = 0; $i < @from_text; ++$i) {
 +              ($from_start[$i], $from_nlines[$i]) =
 +                      (split(',', substr($from_text[$i], 1)), 0);
 +      }
 +
 +      $to_text   = pop @from_text;
 +      $to_start  = pop @from_start;
 +      $to_nlines = pop @from_nlines;
 +
 +      $line = "<span class=\"chunk_info\">$prefix ";
 +      for (my $i = 0; $i < @from_text; ++$i) {
 +              if ($from->{'href'}[$i]) {
 +                      $line .= $cgi->a({-href=>"$from->{'href'}[$i]#l$from_start[$i]",
 +                                        -class=>"list"}, $from_text[$i]);
                } else {
 -                      $line .= $to_text;
 +                      $line .= $from_text[$i];
                }
 -              $line .= " $prefix</span>" .
 -                       "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 -              return "<div class=\"diff$diff_class\">$line</div>\n";
 +              $line .= " ";
        }
 -      return "<div class=\"diff$diff_class\">" . esc_html($line, -nbsp=>1) . "</div>\n";
 +      if ($to->{'href'}) {
 +              $line .= $cgi->a({-href=>"$to->{'href'}#l$to_start",
 +                                -class=>"list"}, $to_text);
 +      } else {
 +              $line .= $to_text;
 +      }
 +      $line .= " $prefix</span>" .
 +               "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 +      return $line;
 +}
 +
 +# process patch (diff) line (not to be used for diff headers),
 +# returning class and HTML-formatted (but not wrapped) line
 +sub process_diff_line {
 +      my $line = shift;
 +      my ($from, $to) = @_;
 +
 +      my $diff_class = diff_line_class($line, $from, $to);
 +
 +      chomp $line;
 +      $line = untabify($line);
 +
 +      if ($from && $to && $line =~ m/^\@{2} /) {
 +              $line = format_unidiff_chunk_header($line, $from, $to);
 +              return $diff_class, $line;
 +
 +      } elsif ($from && $to && $line =~ m/^\@{3}/) {
 +              $line = format_cc_diff_chunk_header($line, $from, $to);
 +              return $diff_class, $line;
 +
 +      }
 +      return $diff_class, esc_html($line, -nbsp=>1);
  }
  
  # Generates undef or something like "_snapshot_" or "snapshot (_tbz2_ _zip_)",
@@@ -4860,97 -4834,8 +4861,97 @@@ sub git_difftree_body 
        print "</table>\n";
  }
  
 +sub print_sidebyside_diff_chunk {
 +      my @chunk = @_;
 +      my (@ctx, @rem, @add);
 +
 +      return unless @chunk;
 +
 +      # incomplete last line might be among removed or added lines,
 +      # or both, or among context lines: find which
 +      for (my $i = 1; $i < @chunk; $i++) {
 +              if ($chunk[$i][0] eq 'incomplete') {
 +                      $chunk[$i][0] = $chunk[$i-1][0];
 +              }
 +      }
 +
 +      # guardian
 +      push @chunk, ["", ""];
 +
 +      foreach my $line_info (@chunk) {
 +              my ($class, $line) = @$line_info;
 +
 +              # print chunk headers
 +              if ($class && $class eq 'chunk_header') {
 +                      print $line;
 +                      next;
 +              }
 +
 +              ## print from accumulator when type of class of lines change
 +              # empty contents block on start rem/add block, or end of chunk
 +              if (@ctx && (!$class || $class eq 'rem' || $class eq 'add')) {
 +                      print join '',
 +                              '<div class="chunk_block ctx">',
 +                                      '<div class="old">',
 +                                      @ctx,
 +                                      '</div>',
 +                                      '<div class="new">',
 +                                      @ctx,
 +                                      '</div>',
 +                              '</div>';
 +                      @ctx = ();
 +              }
 +              # empty add/rem block on start context block, or end of chunk
 +              if ((@rem || @add) && (!$class || $class eq 'ctx')) {
 +                      if (!@add) {
 +                              # pure removal
 +                              print join '',
 +                                      '<div class="chunk_block rem">',
 +                                              '<div class="old">',
 +                                              @rem,
 +                                              '</div>',
 +                                      '</div>';
 +                      } elsif (!@rem) {
 +                              # pure addition
 +                              print join '',
 +                                      '<div class="chunk_block add">',
 +                                              '<div class="new">',
 +                                              @add,
 +                                              '</div>',
 +                                      '</div>';
 +                      } else {
 +                              # assume that it is change
 +                              print join '',
 +                                      '<div class="chunk_block chg">',
 +                                              '<div class="old">',
 +                                              @rem,
 +                                              '</div>',
 +                                              '<div class="new">',
 +                                              @add,
 +                                              '</div>',
 +                                      '</div>';
 +                      }
 +                      @rem = @add = ();
 +              }
 +
 +              ## adding lines to accumulator
 +              # guardian value
 +              last unless $line;
 +              # rem, add or change
 +              if ($class eq 'rem') {
 +                      push @rem, $line;
 +              } elsif ($class eq 'add') {
 +                      push @add, $line;
 +              }
 +              # context line
 +              if ($class eq 'ctx') {
 +                      push @ctx, $line;
 +              }
 +      }
 +}
 +
  sub git_patchset_body {
 -      my ($fd, $difftree, $hash, @hash_parents) = @_;
 +      my ($fd, $diff_style, $difftree, $hash, @hash_parents) = @_;
        my ($hash_parent) = $hash_parents[0];
  
        my $is_combined = (@hash_parents > 1);
        my $diffinfo;
        my $to_name;
        my (%from, %to);
 +      my @chunk; # for side-by-side diff
  
        print "<div class=\"patchset\">\n";
  
  
                        next PATCH if ($patch_line =~ m/^diff /);
  
 -                      print format_diff_line($patch_line, \%from, \%to);
 +                      my ($class, $line) = process_diff_line($patch_line, \%from, \%to);
 +                      my $diff_classes = "diff";
 +                      $diff_classes .= " $class" if ($class);
 +                      $line = "<div class=\"$diff_classes\">$line</div>\n";
 +
 +                      if ($diff_style eq 'sidebyside' && !$is_combined) {
 +                              if ($class eq 'chunk_header') {
 +                                      print_sidebyside_diff_chunk(@chunk);
 +                                      @chunk = ( [ $class, $line ] );
 +                              } else {
 +                                      push @chunk, [ $class, $line ];
 +                              }
 +                      } else {
 +                              # default 'inline' style and unknown styles
 +                              print $line;
 +                      }
                }
  
        } continue {
 +              if (@chunk) {
 +                      print_sidebyside_diff_chunk(@chunk);
 +                      @chunk = ();
 +              }
                print "</div>\n"; # class="patch"
        }
  
@@@ -6243,7 -6108,9 +6244,9 @@@ sub git_blame_common 
                        -type=>"text/plain", -charset => "utf-8",
                        -status=> "200 OK");
                local $| = 1; # output autoflush
-               print while <$fd>;
+               while (my $line = <$fd>) {
+                       print to_utf8($line);
+               }
                close $fd
                        or print "ERROR $!\n";
  
@@@ -7085,7 -6952,6 +7088,7 @@@ sub git_object 
  
  sub git_blobdiff {
        my $format = shift || 'html';
 +      my $diff_style = $input_params{'diff_style'} || 'inline';
  
        my $fd;
        my @difftree;
                my $formats_nav =
                        $cgi->a({-href => href(action=>"blobdiff_plain", -replay=>1)},
                                "raw");
 +              $formats_nav .= diff_style_nav($diff_style);
                git_header_html(undef, $expires);
                if (defined $hash_base && (my %co = parse_commit($hash_base))) {
                        git_print_page_nav('','', $hash_base,$co{'tree'},$hash_base, $formats_nav);
        if ($format eq 'html') {
                print "<div class=\"page_body\">\n";
  
 -              git_patchset_body($fd, [ \%diffinfo ], $hash_base, $hash_parent_base);
 +              git_patchset_body($fd, $diff_style,
 +                                [ \%diffinfo ], $hash_base, $hash_parent_base);
                close $fd;
  
                print "</div>\n"; # class="page_body"
@@@ -7222,31 -7086,9 +7225,31 @@@ sub git_blobdiff_plain 
        git_blobdiff('plain');
  }
  
 +# assumes that it is added as later part of already existing navigation,
 +# so it returns "| foo | bar" rather than just "foo | bar"
 +sub diff_style_nav {
 +      my ($diff_style, $is_combined) = @_;
 +      $diff_style ||= 'inline';
 +
 +      return "" if ($is_combined);
 +
 +      my @styles = (inline => 'inline', 'sidebyside' => 'side by side');
 +      my %styles = @styles;
 +      @styles =
 +              @styles[ map { $_ * 2 } 0..$#styles/2 ];
 +
 +      return join '',
 +              map { " | ".$_ }
 +              map {
 +                      $_ eq $diff_style ? $styles{$_} :
 +                      $cgi->a({-href => href(-replay=>1, diff_style => $_)}, $styles{$_})
 +              } @styles;
 +}
 +
  sub git_commitdiff {
        my %params = @_;
        my $format = $params{-format} || 'html';
 +      my $diff_style = $input_params{'diff_style'} || 'inline';
  
        my ($patch_max) = gitweb_get_feature('patches');
        if ($format eq 'patch') {
                                $cgi->a({-href => href(action=>"patch", -replay=>1)},
                                        "patch");
                }
 +              $formats_nav .= diff_style_nav($diff_style, @{$co{'parents'}} > 1);
  
                if (defined $hash_parent &&
                    $hash_parent ne '-c' && $hash_parent ne '--cc') {
                                }
                        }
                        $formats_nav .= ': ' .
 -                              $cgi->a({-href => href(action=>"commitdiff",
 -                                                     hash=>$hash_parent)},
 +                              $cgi->a({-href => href(-replay=>1,
 +                                                     hash=>$hash_parent, hash_base=>undef)},
                                        esc_html($hash_parent_short)) .
                                ')';
                } elsif (!$co{'parent'}) {
                        # single parent commit
                        $formats_nav .=
                                ' (parent: ' .
 -                              $cgi->a({-href => href(action=>"commitdiff",
 -                                                     hash=>$co{'parent'})},
 +                              $cgi->a({-href => href(-replay=>1,
 +                                                     hash=>$co{'parent'}, hash_base=>undef)},
                                        esc_html(substr($co{'parent'}, 0, 7))) .
                                ')';
                } else {
                        # merge commit
                        if ($hash_parent eq '--cc') {
                                $formats_nav .= ' | ' .
 -                                      $cgi->a({-href => href(action=>"commitdiff",
 +                                      $cgi->a({-href => href(-replay=>1,
                                                               hash=>$hash, hash_parent=>'-c')},
                                                'combined');
                        } else { # $hash_parent eq '-c'
                                $formats_nav .= ' | ' .
 -                                      $cgi->a({-href => href(action=>"commitdiff",
 +                                      $cgi->a({-href => href(-replay=>1,
                                                               hash=>$hash, hash_parent=>'--cc')},
                                                'compact');
                        }
                        $formats_nav .=
                                ' (merge: ' .
                                join(' ', map {
 -                                      $cgi->a({-href => href(action=>"commitdiff",
 -                                                             hash=>$_)},
 +                                      $cgi->a({-href => href(-replay=>1,
 +                                                             hash=>$_, hash_base=>undef)},
                                                esc_html(substr($_, 0, 7)));
                                } @{$co{'parents'}} ) .
                                ')';
                                  $use_parents ? @{$co{'parents'}} : $hash_parent);
                print "<br/>\n";
  
 -              git_patchset_body($fd, \@difftree, $hash,
 +              git_patchset_body($fd, $diff_style,
 +                                \@difftree, $hash,
                                  $use_parents ? @{$co{'parents'}} : $hash_parent);
                close $fd;
                print "</div>\n"; # class="page_body"
@@@ -7862,11 -7702,12 +7865,12 @@@ sub git_opml 
                -charset => 'utf-8',
                -content_disposition => 'inline; filename="opml.xml"');
  
+       my $title = esc_html($site_name);
        print <<XML;
  <?xml version="1.0" encoding="utf-8"?>
  <opml version="1.0">
  <head>
-   <title>$site_name OPML Export</title>
+   <title>$title OPML Export</title>
  </head>
  <body>
  <outline text="git RSS feeds">