gitweb: use highlight's shebang detection
authorIan Kelling <>
Sat, 24 Sep 2016 22:32:58 +0000 (15:32 -0700)
committerJunio C Hamano <>
Sun, 25 Sep 2016 23:39:11 +0000 (16:39 -0700)
The "highlight" binary can, in some cases, determine the language type
by the means of file contents, for example the shebang in the first line
for some scripting languages. Make use of this autodetection for files
which syntax is not known by gitweb. In that case, pass the blob
contents to "highlight --force"; the parameter is needed to make it
always generate HTML output (which includes HTML-escaping).

Although we now run highlight on files which do not end up highlighted,
performance is virtually unaffected because when we call highlight, it
is used for escaping HTML. In the case that highlight is used, gitweb
calls sanitize() instead of esc_html(), and the latter is significantly
slower (it does more, being roughly a superset of sanitize()). Simple
benchmark comparing performance of 'blob' view of files without syntax
highlighting in gitweb before and after this change indicates ±1%
difference in request time for all file types. Benchmark was performed
on local instance on Debian, using Apache/2.4.23 web server and CGI.

Document the feature and improve syntax highlight documentation, add
test to ensure gitweb doesn't crash when language detection is used.

Signed-off-by: Ian Kelling <>
Acked-by: Jakub Narębski <>
Signed-off-by: Junio C Hamano <>
index a79e35024623904145c7ac2031f8416bac6e6e23..e6320891b118cc4ddf58293660f8877ac02232ad 100644 (file)
@@ -246,13 +246,20 @@ $highlight_bin::
        Note that 'highlight' feature must be set for gitweb to actually
        use syntax highlighting.
-*NOTE*: if you want to add support for new file type (supported by
-"highlight" but not used by gitweb), you need to modify `%highlight_ext`
-or `%highlight_basename`, depending on whether you detect type of file
-based on extension (for example "sh") or on its basename (for example
-"Makefile").  The keys of these hashes are extension and basename,
-respectively, and value for given key is name of syntax to be passed via
-`--syntax <syntax>` to highlighter.
+*NOTE*: for a file to be highlighted, its syntax type must be detected
+and that syntax must be supported by "highlight".  The default syntax
+detection is minimal, and there are many supported syntax types with no
+detection by default.  There are three options for adding syntax
+detection.  The first and second priority are `%highlight_basename` and
+`%highlight_ext`, which detect based on basename (the full filename, for
+example "Makefile") and extension (for example "sh").  The keys of these
+hashes are the basename and extension, respectively, and the value for a
+given key is the name of the syntax to be passed via `--syntax <syntax>`
+to "highlight".  The last priority is the "highlight" configuration of
+`Shebang` regular expressions to detect the language based on the first
+line in the file, (for example, matching the line "#!/bin/bash").  See
+the highlight documentation and the default config at
+/etc/highlight/filetypes.conf for more details.
 For example if repositories you are hosting use "phtml" extension for
 PHP files, and you want to have correct syntax-highlighting for those
index 6cb4280e49c8aee60a8496ec7e4d0c7f187c133d..44094f41d580bd4259bdb2bdb680b6dff4fd5340 100755 (executable)
@@ -3931,15 +3931,16 @@ sub guess_file_syntax {
 # or return original FD if no highlighting
 sub run_highlighter {
        my ($fd, $highlight, $syntax) = @_;
-       return $fd unless ($highlight && defined $syntax);
+       return $fd unless ($highlight);
        close $fd;
+       my $syntax_arg = (defined $syntax) ? "--syntax $syntax" : "--force";
        open $fd, quote_command(git_cmd(), "cat-file", "blob", $hash)." | ".
                  quote_command($^X, '-CO', '-MEncode=decode,FB_DEFAULT', '-pse',
                    '$_ = decode($fe, $_, FB_DEFAULT) if !utf8::decode($_);',
                    '--', "-fe=$fallback_encoding")." | ".
-                 " --replace-tabs=8 --fragment --syntax $syntax |"
+                 " --replace-tabs=8 --fragment $syntax_arg |"
                or die_error(500, "Couldn't open file or run syntax highlighter");
        return $fd;
@@ -7063,8 +7064,7 @@ sub git_blob {
        my $highlight = gitweb_check_feature('highlight');
        my $syntax = guess_file_syntax($highlight, $file_name);
-       $fd = run_highlighter($fd, $highlight, $syntax)
-               if $syntax;
+       $fd = run_highlighter($fd, $highlight, $syntax);
        git_header_html(undef, $expires);
        my $formats_nav = '';
@@ -7117,7 +7117,7 @@ sub git_blob {
                        $line = untabify($line);
                        printf qq!<div class="pre"><a id="l%i" href="%s#l%i" class="linenr">%4i</a> %s</div>\n!,
                               $nr, esc_attr(href(-replay => 1)), $nr, $nr,
-                              $syntax ? sanitize($line) : esc_html($line, -nbsp=>1);
+                              $highlight ? sanitize($line) : esc_html($line, -nbsp=>1);
        close $fd
index e94b2f147a72b216a1c97d278bb9c822aed7392a..6d06ed96cbc37ee0d19173a4c5379e021c1e5414 100755 (executable)
@@ -709,6 +709,14 @@ test_expect_success HIGHLIGHT \
         git commit -m "Add" &&
         gitweb_run "p=.git;a=blob;"'
+test_expect_success HIGHLIGHT \
+       'syntax highlighting (highlighter language autodetection)' \
+       'git config gitweb.highlight yes &&
+        echo "#!/usr/bin/perl" > test &&
+        git add test &&
+        git commit -m "Add test" &&
+        gitweb_run "p=.git;a=blob;f=test"'
 # ----------------------------------------------------------------------
 # forks of projects