optimize verify-pack a bit
[gitweb.git] / git-cvsserver.perl
index 674892b8163263e85dad28b935f4a448dabd857a..920bbe15a3bbc7768d47a2adbc84aa81264e2226 100755 (executable)
@@ -502,7 +502,7 @@ sub req_add
                 print $state->{CVSROOT} . "/$state->{module}/$filename\n";
 
                 # this is an "entries" line
-                my $kopts = kopts_from_path($filepart);
+                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
                 $log->debug("/$filepart/1.$meta->{revision}//$kopts/");
                 print "/$filepart/1.$meta->{revision}//$kopts/\n";
                 # permissions
@@ -533,9 +533,26 @@ sub req_add
 
         print "Checked-in $dirpart\n";
         print "$filename\n";
-        my $kopts = kopts_from_path($filepart);
+        my $kopts = kopts_from_path($filename,"file",
+                        $state->{entries}{$filename}{modified_filename});
         print "/$filepart/0//$kopts/\n";
 
+        my $requestedKopts = $state->{opt}{k};
+        if(defined($requestedKopts))
+        {
+            $requestedKopts = "-k$requestedKopts";
+        }
+        else
+        {
+            $requestedKopts = "";
+        }
+        if( $kopts ne $requestedKopts )
+        {
+            $log->warn("Ignoring requested -k='$requestedKopts'"
+                        . " for '$filename'; detected -k='$kopts' instead");
+            #TODO: Also have option to send warning to user?
+        }
+
         $addcount++;
     }
 
@@ -615,7 +632,7 @@ sub req_remove
 
         print "Checked-in $dirpart\n";
         print "$filename\n";
-        my $kopts = kopts_from_path($filepart);
+        my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
         print "/$filepart/-1.$wrev//$kopts/\n";
 
         $rmcount++;
@@ -785,6 +802,7 @@ sub req_co
     argsplit("co");
 
     my $module = $state->{args}[0];
+    $state->{module} = $module;
     my $checkout_path = $module;
 
     # use the user specified directory if we're given it
@@ -862,6 +880,7 @@ sub req_co
         # Don't want to check out deleted files
         next if ( $git->{filehash} eq "deleted" );
 
+        my $fullName = $git->{name};
         ( $git->{name}, $git->{dir} ) = filenamesplit($git->{name});
 
        if (length($git->{dir}) && $git->{dir} ne './'
@@ -892,7 +911,7 @@ sub req_co
        print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
 
         # this is an "entries" line
-        my $kopts = kopts_from_path($git->{name});
+        my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});
         print "/$git->{name}/1.$git->{revision}//$kopts/\n";
         # permissions
         print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
@@ -1101,7 +1120,7 @@ sub req_update
                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
 
                # this is an "entries" line
-               my $kopts = kopts_from_path($filepart);
+               my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
                $log->debug("/$filepart/1.$meta->{revision}//$kopts/");
                print "/$filepart/1.$meta->{revision}//$kopts/\n";
 
@@ -1149,7 +1168,8 @@ sub req_update
                     print "Merged $dirpart\n";
                     $log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
                     print $state->{CVSROOT} . "/$state->{module}/$filename\n";
-                    my $kopts = kopts_from_path($filepart);
+                    my $kopts = kopts_from_path("$dirpart/$filepart",
+                                                "file",$mergedFile);
                     $log->debug("/$filepart/1.$meta->{revision}//$kopts/");
                     print "/$filepart/1.$meta->{revision}//$kopts/\n";
                 }
@@ -1165,7 +1185,8 @@ sub req_update
                 {
                     print "Merged $dirpart\n";
                     print $state->{CVSROOT} . "/$state->{module}/$filename\n";
-                    my $kopts = kopts_from_path($filepart);
+                    my $kopts = kopts_from_path("$dirpart/$filepart",
+                                                "file",$mergedFile);
                     print "/$filepart/1.$meta->{revision}/+/$kopts/\n";
                 }
             }
@@ -1416,7 +1437,7 @@ sub req_ci
             }
             print "Checked-in $dirpart\n";
             print "$filename\n";
-            my $kopts = kopts_from_path($filepart);
+            my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
             print "/$filepart/1.$meta->{revision}//$kopts/\n";
         }
     }
@@ -2294,21 +2315,210 @@ sub cleanupTmpDir
 # file should get -kb.
 sub kopts_from_path
 {
-       my ($path) = @_;
+    my ($path, $srcType, $name) = @_;
 
-       # Once it exists, the git attributes system should be used to look up
-       # what attributes apply to this path.
+    if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and
+         $cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i )
+    {
+        my ($val) = check_attr( "crlf", $path );
+        if ( $val eq "set" )
+        {
+            return "";
+        }
+        elsif ( $val eq "unset" )
+        {
+            return "-kb"
+        }
+        else
+        {
+            $log->info("Unrecognized check_attr crlf $path : $val");
+        }
+    }
 
-       # Until then, take the setting from the config file
-    unless ( defined ( $cfg->{gitcvs}{allbinary} ) and $cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i )
+    if ( defined ( $cfg->{gitcvs}{allbinary} ) )
     {
-               # Return "" to give no special treatment to any path
-               return "";
-    } else {
-               # Alternatively, to have all files treated as if they are binary (which
-               # is more like git itself), always return the "-kb" option
-               return "-kb";
+        if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) )
+        {
+            return "-kb";
+        }
+        elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) )
+        {
+            if( $srcType eq "sha1Or-k" &&
+                !defined($name) )
+            {
+                my ($ret)=$state->{entries}{$path}{options};
+                if( !defined($ret) )
+                {
+                    $ret=$state->{opt}{k};
+                    if(defined($ret))
+                    {
+                        $ret="-k$ret";
+                    }
+                    else
+                    {
+                        $ret="";
+                    }
+                }
+                if( ! ($ret=~/^(|-kb|-kkv|-kkvl|-kk|-ko|-kv)$/) )
+                {
+                    print "E Bad -k option\n";
+                    $log->warn("Bad -k option: $ret");
+                    die "Error: Bad -k option: $ret\n";
+                }
+
+                return $ret;
+            }
+            else
+            {
+                if( is_binary($srcType,$name) )
+                {
+                    $log->debug("... as binary");
+                    return "-kb";
+                }
+                else
+                {
+                    $log->debug("... as text");
+                }
+            }
+        }
+    }
+    # Return "" to give no special treatment to any path
+    return "";
+}
+
+sub check_attr
+{
+    my ($attr,$path) = @_;
+    ensureWorkTree();
+    if ( open my $fh, '-|', "git", "check-attr", $attr, "--", $path )
+    {
+        my $val = <$fh>;
+        close $fh;
+        $val =~ s/.*: ([^:\r\n]*)\s*$/$1/;
+        return $val;
+    }
+    else
+    {
+        return undef;
+    }
+}
+
+# This should have the same heuristics as convert.c:is_binary() and related.
+# Note that the bare CR test is done by callers in convert.c.
+sub is_binary
+{
+    my ($srcType,$name) = @_;
+    $log->debug("is_binary($srcType,$name)");
+
+    # Minimize amount of interpreted code run in the inner per-character
+    # loop for large files, by totalling each character value and
+    # then analyzing the totals.
+    my @counts;
+    my $i;
+    for($i=0;$i<256;$i++)
+    {
+        $counts[$i]=0;
+    }
+
+    my $fh = open_blob_or_die($srcType,$name);
+    my $line;
+    while( defined($line=<$fh>) )
+    {
+        # Any '\0' and bare CR are considered binary.
+        if( $line =~ /\0|(\r[^\n])/ )
+        {
+            close($fh);
+            return 1;
+        }
+
+        # Count up each character in the line:
+        my $len=length($line);
+        for($i=0;$i<$len;$i++)
+        {
+            $counts[ord(substr($line,$i,1))]++;
+        }
+    }
+    close $fh;
+
+    # Don't count CR and LF as either printable/nonprintable
+    $counts[ord("\n")]=0;
+    $counts[ord("\r")]=0;
+
+    # Categorize individual character count into printable and nonprintable:
+    my $printable=0;
+    my $nonprintable=0;
+    for($i=0;$i<256;$i++)
+    {
+        if( $i < 32 &&
+            $i != ord("\b") &&
+            $i != ord("\t") &&
+            $i != 033 &&       # ESC
+            $i != 014 )        # FF
+        {
+            $nonprintable+=$counts[$i];
+        }
+        elsif( $i==127 )  # DEL
+        {
+            $nonprintable+=$counts[$i];
+        }
+        else
+        {
+            $printable+=$counts[$i];
+        }
+    }
+
+    return ($printable >> 7) < $nonprintable;
+}
+
+# Returns open file handle.  Possible invocations:
+#  - open_blob_or_die("file",$filename);
+#  - open_blob_or_die("sha1",$filehash);
+sub open_blob_or_die
+{
+    my ($srcType,$name) = @_;
+    my ($fh);
+    if( $srcType eq "file" )
+    {
+        if( !open $fh,"<",$name )
+        {
+            $log->warn("Unable to open file $name: $!");
+            die "Unable to open file $name: $!\n";
+        }
+    }
+    elsif( $srcType eq "sha1" || $srcType eq "sha1Or-k" )
+    {
+        unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ )
+        {
+            $log->warn("Need filehash");
+            die "Need filehash\n";
+        }
+
+        my $type = `git cat-file -t $name`;
+        chomp $type;
+
+        unless ( defined ( $type ) and $type eq "blob" )
+        {
+            $log->warn("Invalid type '$type' for '$name'");
+            die ( "Invalid type '$type' (expected 'blob')" )
+        }
+
+        my $size = `git cat-file -s $name`;
+        chomp $size;
+
+        $log->debug("open_blob_or_die($name) size=$size, type=$type");
+
+        unless( open $fh, '-|', "git", "cat-file", "blob", $name )
+        {
+            $log->warn("Unable to open sha1 $name");
+            die "Unable to open sha1 $name\n";
+        }
+    }
+    else
+    {
+        $log->warn("Unknown type of blob source: $srcType");
+        die "Unknown type of blob source: $srcType\n";
     }
+    return $fh;
 }
 
 # Generate a CVS author name from Git author information, by taking