# if we're called with PATH_INFO, we have to strip that
# from the URL to find our real URL
-if (my $path_info = $ENV{"PATH_INFO"}) {
+# we make $path_info global because it's also used later on
+my $path_info = $ENV{"PATH_INFO"};
+if ($path_info) {
$my_url =~ s,\Q$path_info\E$,,;
$my_uri =~ s,\Q$path_info\E$,,;
}
# ======================================================================
# input validation and dispatch
-our $action = $cgi->param('a');
+
+# input parameters can be collected from a variety of sources (presently, CGI
+# and PATH_INFO), so we define an %input_params hash that collects them all
+# together during validation: this allows subsequent uses (e.g. href()) to be
+# agnostic of the parameter origin
+
+my %input_params = ();
+
+# input parameters are stored with the long parameter name as key. This will
+# also be used in the href subroutine to convert parameters to their CGI
+# equivalent, and since the href() usage is the most frequent one, we store
+# the name -> CGI key mapping here, instead of the reverse.
+#
+# XXX: Warning: If you touch this, check the search form for updating,
+# too.
+
+my @cgi_param_mapping = (
+ project => "p",
+ action => "a",
+ file_name => "f",
+ file_parent => "fp",
+ hash => "h",
+ hash_parent => "hp",
+ hash_base => "hb",
+ hash_parent_base => "hpb",
+ page => "pg",
+ order => "o",
+ searchtext => "s",
+ searchtype => "st",
+ snapshot_format => "sf",
+ extra_options => "opt",
+ search_use_regexp => "sr",
+);
+my %cgi_param_mapping = @cgi_param_mapping;
+
+# we will also need to know the possible actions, for validation
+my %actions = (
+ "blame" => \&git_blame,
+ "blobdiff" => \&git_blobdiff,
+ "blobdiff_plain" => \&git_blobdiff_plain,
+ "blob" => \&git_blob,
+ "blob_plain" => \&git_blob_plain,
+ "commitdiff" => \&git_commitdiff,
+ "commitdiff_plain" => \&git_commitdiff_plain,
+ "commit" => \&git_commit,
+ "forks" => \&git_forks,
+ "heads" => \&git_heads,
+ "history" => \&git_history,
+ "log" => \&git_log,
+ "rss" => \&git_rss,
+ "atom" => \&git_atom,
+ "search" => \&git_search,
+ "search_help" => \&git_search_help,
+ "shortlog" => \&git_shortlog,
+ "summary" => \&git_summary,
+ "tag" => \&git_tag,
+ "tags" => \&git_tags,
+ "tree" => \&git_tree,
+ "snapshot" => \&git_snapshot,
+ "object" => \&git_object,
+ # those below don't need $project
+ "opml" => \&git_opml,
+ "project_list" => \&git_project_list,
+ "project_index" => \&git_project_index,
+);
+
+# finally, we have the hash of allowed extra_options for the commands that
+# allow them
+my %allowed_options = (
+ "--no-merges" => [ qw(rss atom log shortlog history) ],
+);
+
+# fill %input_params with the CGI parameters. All values except for 'opt'
+# should be single values, but opt can be an array. We should probably
+# build an array of parameters that can be multi-valued, but since for the time
+# being it's only this one, we just single it out
+while (my ($name, $symbol) = each %cgi_param_mapping) {
+ if ($symbol eq 'opt') {
+ $input_params{$name} = [ $cgi->param($symbol) ];
+ } else {
+ $input_params{$name} = $cgi->param($symbol);
+ }
+}
+
+# now read PATH_INFO and update the parameter list for missing parameters
+sub evaluate_path_info {
+ return if defined $input_params{'project'};
+ return if !$path_info;
+ $path_info =~ s,^/+,,;
+ return if !$path_info;
+
+ # find which part of PATH_INFO is project
+ my $project = $path_info;
+ $project =~ s,/+$,,;
+ while ($project && !check_head_link("$projectroot/$project")) {
+ $project =~ s,/*[^/]*$,,;
+ }
+ return unless $project;
+ $input_params{'project'} = $project;
+
+ # do not change any parameters if an action is given using the query string
+ return if $input_params{'action'};
+ $path_info =~ s,^\Q$project\E/*,,;
+
+ # next, check if we have an action
+ my $action = $path_info;
+ $action =~ s,/.*$,,;
+ if (exists $actions{$action}) {
+ $path_info =~ s,^$action/*,,;
+ $input_params{'action'} = $action;
+ }
+
+ # list of actions that want hash_base instead of hash, but can have no
+ # pathname (f) parameter
+ my @wants_base = (
+ 'tree',
+ 'history',
+ );
+
+ # we want to catch
+ # [$hash_parent_base[:$file_parent]..]$hash_parent[:$file_name]
+ my ($parentrefname, $parentpathname, $refname, $pathname) =
+ ($path_info =~ /^(?:(.+?)(?::(.+))?\.\.)?(.+?)(?::(.+))?$/);
+
+ # first, analyze the 'current' part
+ if (defined $pathname) {
+ # we got "branch:filename" or "branch:dir/"
+ # we could use git_get_type(branch:pathname), but:
+ # - it needs $git_dir
+ # - it does a git() call
+ # - the convention of terminating directories with a slash
+ # makes it superfluous
+ # - embedding the action in the PATH_INFO would make it even
+ # more superfluous
+ $pathname =~ s,^/+,,;
+ if (!$pathname || substr($pathname, -1) eq "/") {
+ $input_params{'action'} ||= "tree";
+ $pathname =~ s,/$,,;
+ } else {
+ # the default action depends on whether we had parent info
+ # or not
+ if ($parentrefname) {
+ $input_params{'action'} ||= "blobdiff_plain";
+ } else {
+ $input_params{'action'} ||= "blob_plain";
+ }
+ }
+ $input_params{'hash_base'} ||= $refname;
+ $input_params{'file_name'} ||= $pathname;
+ } elsif (defined $refname) {
+ # we got "branch". In this case we have to choose if we have to
+ # set hash or hash_base.
+ #
+ # Most of the actions without a pathname only want hash to be
+ # set, except for the ones specified in @wants_base that want
+ # hash_base instead. It should also be noted that hand-crafted
+ # links having 'history' as an action and no pathname or hash
+ # set will fail, but that happens regardless of PATH_INFO.
+ $input_params{'action'} ||= "shortlog";
+ if (grep { $_ eq $input_params{'action'} } @wants_base) {
+ $input_params{'hash_base'} ||= $refname;
+ } else {
+ $input_params{'hash'} ||= $refname;
+ }
+ }
+
+ # next, handle the 'parent' part, if present
+ if (defined $parentrefname) {
+ # a missing pathspec defaults to the 'current' filename, allowing e.g.
+ # someproject/blobdiff/oldrev..newrev:/filename
+ if ($parentpathname) {
+ $parentpathname =~ s,^/+,,;
+ $parentpathname =~ s,/$,,;
+ $input_params{'file_parent'} ||= $parentpathname;
+ } else {
+ $input_params{'file_parent'} ||= $input_params{'file_name'};
+ }
+ # we assume that hash_parent_base is wanted if a path was specified,
+ # or if the action wants hash_base instead of hash
+ if (defined $input_params{'file_parent'} ||
+ grep { $_ eq $input_params{'action'} } @wants_base) {
+ $input_params{'hash_parent_base'} ||= $parentrefname;
+ } else {
+ $input_params{'hash_parent'} ||= $parentrefname;
+ }
+ }
+}
+evaluate_path_info();
+
+our $action = $input_params{'action'};
if (defined $action) {
- if ($action =~ m/[^0-9a-zA-Z\.\-_]/) {
+ if (!validate_action($action)) {
die_error(400, "Invalid action parameter");
}
}
# parameters which are pathnames
-our $project = $cgi->param('p');
+our $project = $input_params{'project'};
if (defined $project) {
- if (!validate_pathname($project) ||
- !(-d "$projectroot/$project") ||
- !check_head_link("$projectroot/$project") ||
- ($export_ok && !(-e "$projectroot/$project/$export_ok")) ||
- ($strict_export && !project_in_list($project))) {
+ if (!validate_project($project)) {
undef $project;
die_error(404, "No such project");
}
}
-our $file_name = $cgi->param('f');
+our $file_name = $input_params{'file_name'};
if (defined $file_name) {
if (!validate_pathname($file_name)) {
die_error(400, "Invalid file parameter");
}
}
-our $file_parent = $cgi->param('fp');
+our $file_parent = $input_params{'file_parent'};
if (defined $file_parent) {
if (!validate_pathname($file_parent)) {
die_error(400, "Invalid file parent parameter");
}
# parameters which are refnames
-our $hash = $cgi->param('h');
+our $hash = $input_params{'hash'};
if (defined $hash) {
if (!validate_refname($hash)) {
die_error(400, "Invalid hash parameter");
}
}
-our $hash_parent = $cgi->param('hp');
+our $hash_parent = $input_params{'hash_parent'};
if (defined $hash_parent) {
if (!validate_refname($hash_parent)) {
die_error(400, "Invalid hash parent parameter");
}
}
-our $hash_base = $cgi->param('hb');
+our $hash_base = $input_params{'hash_base'};
if (defined $hash_base) {
if (!validate_refname($hash_base)) {
die_error(400, "Invalid hash base parameter");
}
}
-my %allowed_options = (
- "--no-merges" => [ qw(rss atom log shortlog history) ],
-);
-
-our @extra_options = $cgi->param('opt');
-if (defined @extra_options) {
- foreach my $opt (@extra_options) {
- if (not exists $allowed_options{$opt}) {
- die_error(400, "Invalid option parameter");
- }
- if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
- die_error(400, "Invalid option parameter for this action");
- }
+our @extra_options = @{$input_params{'extra_options'}};
+# @extra_options is always defined, since it can only be (currently) set from
+# CGI, and $cgi->param() returns the empty array in array context if the param
+# is not set
+foreach my $opt (@extra_options) {
+ if (not exists $allowed_options{$opt}) {
+ die_error(400, "Invalid option parameter");
+ }
+ if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
+ die_error(400, "Invalid option parameter for this action");
}
}
-our $hash_parent_base = $cgi->param('hpb');
+our $hash_parent_base = $input_params{'hash_parent_base'};
if (defined $hash_parent_base) {
if (!validate_refname($hash_parent_base)) {
die_error(400, "Invalid hash parent base parameter");
}
# other parameters
-our $page = $cgi->param('pg');
+our $page = $input_params{'page'};
if (defined $page) {
if ($page =~ m/[^0-9]/) {
die_error(400, "Invalid page parameter");
}
}
-our $searchtype = $cgi->param('st');
+our $searchtype = $input_params{'searchtype'};
if (defined $searchtype) {
if ($searchtype =~ m/[^a-z]/) {
die_error(400, "Invalid searchtype parameter");
}
}
-our $search_use_regexp = $cgi->param('sr');
+our $search_use_regexp = $input_params{'search_use_regexp'};
-our $searchtext = $cgi->param('s');
+our $searchtext = $input_params{'searchtext'};
our $search_regexp;
if (defined $searchtext) {
if (length($searchtext) < 2) {
$search_regexp = $search_use_regexp ? $searchtext : quotemeta $searchtext;
}
-# now read PATH_INFO and use it as alternative to parameters
-sub evaluate_path_info {
- return if defined $project;
- my $path_info = $ENV{"PATH_INFO"};
- return if !$path_info;
- $path_info =~ s,^/+,,;
- return if !$path_info;
- # find which part of PATH_INFO is project
- $project = $path_info;
- $project =~ s,/+$,,;
- while ($project && !check_head_link("$projectroot/$project")) {
- $project =~ s,/*[^/]*$,,;
- }
- # validate project
- $project = validate_pathname($project);
- if (!$project ||
- ($export_ok && !-e "$projectroot/$project/$export_ok") ||
- ($strict_export && !project_in_list($project))) {
- undef $project;
- return;
- }
- # do not change any parameters if an action is given using the query string
- return if $action;
- $path_info =~ s,^\Q$project\E/*,,;
- my ($refname, $pathname) = split(/:/, $path_info, 2);
- if (defined $pathname) {
- # we got "project.git/branch:filename" or "project.git/branch:dir/"
- # we could use git_get_type(branch:pathname), but it needs $git_dir
- $pathname =~ s,^/+,,;
- if (!$pathname || substr($pathname, -1) eq "/") {
- $action ||= "tree";
- $pathname =~ s,/$,,;
- } else {
- $action ||= "blob_plain";
- }
- $hash_base ||= validate_refname($refname);
- $file_name ||= validate_pathname($pathname);
- } elsif (defined $refname) {
- # we got "project.git/branch"
- $action ||= "shortlog";
- $hash ||= validate_refname($refname);
- }
-}
-evaluate_path_info();
-
# path to the current git repository
our $git_dir;
$git_dir = "$projectroot/$project" if $project;
# dispatch
-my %actions = (
- "blame" => \&git_blame,
- "blobdiff" => \&git_blobdiff,
- "blobdiff_plain" => \&git_blobdiff_plain,
- "blob" => \&git_blob,
- "blob_plain" => \&git_blob_plain,
- "commitdiff" => \&git_commitdiff,
- "commitdiff_plain" => \&git_commitdiff_plain,
- "commit" => \&git_commit,
- "forks" => \&git_forks,
- "heads" => \&git_heads,
- "history" => \&git_history,
- "log" => \&git_log,
- "rss" => \&git_rss,
- "atom" => \&git_atom,
- "search" => \&git_search,
- "search_help" => \&git_search_help,
- "shortlog" => \&git_shortlog,
- "summary" => \&git_summary,
- "tag" => \&git_tag,
- "tags" => \&git_tags,
- "tree" => \&git_tree,
- "snapshot" => \&git_snapshot,
- "object" => \&git_object,
- # those below don't need $project
- "opml" => \&git_opml,
- "project_list" => \&git_project_list,
- "project_index" => \&git_project_index,
-);
-
if (!defined $action) {
if (defined $hash) {
$action = git_get_type($hash);
# default is to use -absolute url() i.e. $my_uri
my $href = $params{-full} ? $my_url : $my_uri;
- # XXX: Warning: If you touch this, check the search form for updating,
- # too.
-
- my @mapping = (
- project => "p",
- action => "a",
- file_name => "f",
- file_parent => "fp",
- hash => "h",
- hash_parent => "hp",
- hash_base => "hb",
- hash_parent_base => "hpb",
- page => "pg",
- order => "o",
- searchtext => "s",
- searchtype => "st",
- snapshot_format => "sf",
- extra_options => "opt",
- search_use_regexp => "sr",
- );
- my %mapping = @mapping;
-
$params{'project'} = $project unless exists $params{'project'};
if ($params{-replay}) {
- while (my ($name, $symbol) = each %mapping) {
+ while (my ($name, $symbol) = each %cgi_param_mapping) {
if (!exists $params{$name}) {
- # to allow for multivalued params we use arrayref form
- $params{$name} = [ $cgi->param($symbol) ];
+ $params{$name} = $input_params{$name};
}
}
}
my ($use_pathinfo) = gitweb_check_feature('pathinfo');
if ($use_pathinfo) {
- # use PATH_INFO for project name
+ # try to put as many parameters as possible in PATH_INFO:
+ # - project name
+ # - action
+ # - hash or hash_base:/filename
+
+ # When the script is the root DirectoryIndex for the domain,
+ # $href here would be something like http://gitweb.example.com/
+ # Thus, we strip any trailing / from $href, to spare us double
+ # slashes in the final URL
+ $href =~ s,/$,,;
+
+ # Then add the project name, if present
$href .= "/".esc_url($params{'project'}) if defined $params{'project'};
delete $params{'project'};
- # Summary just uses the project path URL
- if (defined $params{'action'} && $params{'action'} eq 'summary') {
+ # Summary just uses the project path URL, any other action is
+ # added to the URL
+ if (defined $params{'action'}) {
+ $href .= "/".esc_url($params{'action'}) unless $params{'action'} eq 'summary';
delete $params{'action'};
}
+
+ # Finally, we put either hash_base:/file_name or hash
+ if (defined $params{'hash_base'}) {
+ $href .= "/".esc_url($params{'hash_base'});
+ if (defined $params{'file_name'}) {
+ $href .= ":/".esc_url($params{'file_name'});
+ delete $params{'file_name'};
+ }
+ delete $params{'hash'};
+ delete $params{'hash_base'};
+ } elsif (defined $params{'hash'}) {
+ $href .= "/".esc_url($params{'hash'});
+ delete $params{'hash'};
+ }
}
# now encode the parameters explicitly
my @result = ();
- for (my $i = 0; $i < @mapping; $i += 2) {
- my ($name, $symbol) = ($mapping[$i], $mapping[$i+1]);
+ for (my $i = 0; $i < @cgi_param_mapping; $i += 2) {
+ my ($name, $symbol) = ($cgi_param_mapping[$i], $cgi_param_mapping[$i+1]);
if (defined $params{$name}) {
if (ref($params{$name}) eq "ARRAY") {
foreach my $par (@{$params{$name}}) {
## ======================================================================
## validation, quoting/unquoting and escaping
+sub validate_action {
+ my $input = shift || return undef;
+ return undef unless exists $actions{$input};
+ return $input;
+}
+
+sub validate_project {
+ my $input = shift || return undef;
+ if (!validate_pathname($input) ||
+ !(-d "$projectroot/$input") ||
+ !check_head_link("$projectroot/$input") ||
+ ($export_ok && !(-e "$projectroot/$input/$export_ok")) ||
+ ($strict_export && !project_in_list($input))) {
+ return undef;
+ } else {
+ return $input;
+ }
+}
+
sub validate_pathname {
my $input = shift || return undef;
## actions
sub git_project_list {
- my $order = $cgi->param('o');
+ my $order = $input_params{'order'};
if (defined $order && $order !~ m/none|project|descr|owner|age/) {
die_error(400, "Unknown order parameter");
}
}
sub git_forks {
- my $order = $cgi->param('o');
+ my $order = $input_params{'order'};
if (defined $order && $order !~ m/none|project|descr|owner|age/) {
die_error(400, "Unknown order parameter");
}
my @supported_fmts = gitweb_check_feature('snapshot');
@supported_fmts = filter_snapshot_fmts(@supported_fmts);
- my $format = $cgi->param('sf');
+ my $format = $input_params{'snapshot_format'};
if (!@supported_fmts) {
die_error(403, "Snapshots not allowed");
}