git-cvsserver.perlon commit cvsserver: add misc commit lookup, file meta data, and file listing functions (658b57a)
   1#!/usr/bin/perl
   2
   3####
   4#### This application is a CVS emulation layer for git.
   5#### It is intended for clients to connect over SSH.
   6#### See the documentation for more details.
   7####
   8#### Copyright The Open University UK - 2006.
   9####
  10#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
  11####          Martin Langhoff <martin@laptop.org>
  12####
  13####
  14#### Released under the GNU Public License, version 2.
  15####
  16####
  17
  18use 5.008;
  19use strict;
  20use warnings;
  21use bytes;
  22
  23use Fcntl;
  24use File::Temp qw/tempdir tempfile/;
  25use File::Path qw/rmtree/;
  26use File::Basename;
  27use Getopt::Long qw(:config require_order no_ignore_case);
  28
  29my $VERSION = '@@GIT_VERSION@@';
  30
  31my $log = GITCVS::log->new();
  32my $cfg;
  33
  34my $DATE_LIST = {
  35    Jan => "01",
  36    Feb => "02",
  37    Mar => "03",
  38    Apr => "04",
  39    May => "05",
  40    Jun => "06",
  41    Jul => "07",
  42    Aug => "08",
  43    Sep => "09",
  44    Oct => "10",
  45    Nov => "11",
  46    Dec => "12",
  47};
  48
  49# Enable autoflush for STDOUT (otherwise the whole thing falls apart)
  50$| = 1;
  51
  52#### Definition and mappings of functions ####
  53
  54# NOTE: Despite the existence of req_CATCHALL and req_EMPTY unimplemented
  55#  requests, this list is incomplete.  It is missing many rarer/optional
  56#  requests.  Perhaps some clients require a claim of support for
  57#  these specific requests for main functionality to work?
  58my $methods = {
  59    'Root'            => \&req_Root,
  60    'Valid-responses' => \&req_Validresponses,
  61    'valid-requests'  => \&req_validrequests,
  62    'Directory'       => \&req_Directory,
  63    'Entry'           => \&req_Entry,
  64    'Modified'        => \&req_Modified,
  65    'Unchanged'       => \&req_Unchanged,
  66    'Questionable'    => \&req_Questionable,
  67    'Argument'        => \&req_Argument,
  68    'Argumentx'       => \&req_Argument,
  69    'expand-modules'  => \&req_expandmodules,
  70    'add'             => \&req_add,
  71    'remove'          => \&req_remove,
  72    'co'              => \&req_co,
  73    'update'          => \&req_update,
  74    'ci'              => \&req_ci,
  75    'diff'            => \&req_diff,
  76    'log'             => \&req_log,
  77    'rlog'            => \&req_log,
  78    'tag'             => \&req_CATCHALL,
  79    'status'          => \&req_status,
  80    'admin'           => \&req_CATCHALL,
  81    'history'         => \&req_CATCHALL,
  82    'watchers'        => \&req_EMPTY,
  83    'editors'         => \&req_EMPTY,
  84    'noop'            => \&req_EMPTY,
  85    'annotate'        => \&req_annotate,
  86    'Global_option'   => \&req_Globaloption,
  87};
  88
  89##############################################
  90
  91
  92# $state holds all the bits of information the clients sends us that could
  93# potentially be useful when it comes to actually _doing_ something.
  94my $state = { prependdir => '' };
  95
  96# Work is for managing temporary working directory
  97my $work =
  98    {
  99        state => undef,  # undef, 1 (empty), 2 (with stuff)
 100        workDir => undef,
 101        index => undef,
 102        emptyDir => undef,
 103        tmpDir => undef
 104    };
 105
 106$log->info("--------------- STARTING -----------------");
 107
 108my $usage =
 109    "Usage: git cvsserver [options] [pserver|server] [<directory> ...]\n".
 110    "    --base-path <path>  : Prepend to requested CVSROOT\n".
 111    "                          Can be read from GIT_CVSSERVER_BASE_PATH\n".
 112    "    --strict-paths      : Don't allow recursing into subdirectories\n".
 113    "    --export-all        : Don't check for gitcvs.enabled in config\n".
 114    "    --version, -V       : Print version information and exit\n".
 115    "    -h, -H              : Print usage information and exit\n".
 116    "\n".
 117    "<directory> ... is a list of allowed directories. If no directories\n".
 118    "are given, all are allowed. This is an additional restriction, gitcvs\n".
 119    "access still needs to be enabled by the gitcvs.enabled config option.\n".
 120    "Alternately, one directory may be specified in GIT_CVSSERVER_ROOT.\n";
 121
 122my @opts = ( 'h|H', 'version|V',
 123             'base-path=s', 'strict-paths', 'export-all' );
 124GetOptions( $state, @opts )
 125    or die $usage;
 126
 127if ($state->{version}) {
 128    print "git-cvsserver version $VERSION\n";
 129    exit;
 130}
 131if ($state->{help}) {
 132    print $usage;
 133    exit;
 134}
 135
 136my $TEMP_DIR = tempdir( CLEANUP => 1 );
 137$log->debug("Temporary directory is '$TEMP_DIR'");
 138
 139$state->{method} = 'ext';
 140if (@ARGV) {
 141    if ($ARGV[0] eq 'pserver') {
 142        $state->{method} = 'pserver';
 143        shift @ARGV;
 144    } elsif ($ARGV[0] eq 'server') {
 145        shift @ARGV;
 146    }
 147}
 148
 149# everything else is a directory
 150$state->{allowed_roots} = [ @ARGV ];
 151
 152# don't export the whole system unless the users requests it
 153if ($state->{'export-all'} && !@{$state->{allowed_roots}}) {
 154    die "--export-all can only be used together with an explicit whitelist\n";
 155}
 156
 157# Environment handling for running under git-shell
 158if (exists $ENV{GIT_CVSSERVER_BASE_PATH}) {
 159    if ($state->{'base-path'}) {
 160        die "Cannot specify base path both ways.\n";
 161    }
 162    my $base_path = $ENV{GIT_CVSSERVER_BASE_PATH};
 163    $state->{'base-path'} = $base_path;
 164    $log->debug("Picked up base path '$base_path' from environment.\n");
 165}
 166if (exists $ENV{GIT_CVSSERVER_ROOT}) {
 167    if (@{$state->{allowed_roots}}) {
 168        die "Cannot specify roots both ways: @ARGV\n";
 169    }
 170    my $allowed_root = $ENV{GIT_CVSSERVER_ROOT};
 171    $state->{allowed_roots} = [ $allowed_root ];
 172    $log->debug("Picked up allowed root '$allowed_root' from environment.\n");
 173}
 174
 175# if we are called with a pserver argument,
 176# deal with the authentication cat before entering the
 177# main loop
 178if ($state->{method} eq 'pserver') {
 179    my $line = <STDIN>; chomp $line;
 180    unless( $line =~ /^BEGIN (AUTH|VERIFICATION) REQUEST$/) {
 181       die "E Do not understand $line - expecting BEGIN AUTH REQUEST\n";
 182    }
 183    my $request = $1;
 184    $line = <STDIN>; chomp $line;
 185    unless (req_Root('root', $line)) { # reuse Root
 186       print "E Invalid root $line \n";
 187       exit 1;
 188    }
 189    $line = <STDIN>; chomp $line;
 190    my $user = $line;
 191    $line = <STDIN>; chomp $line;
 192    my $password = $line;
 193
 194    if ($user eq 'anonymous') {
 195        # "A" will be 1 byte, use length instead in case the
 196        # encryption method ever changes (yeah, right!)
 197        if (length($password) > 1 ) {
 198            print "E Don't supply a password for the `anonymous' user\n";
 199            print "I HATE YOU\n";
 200            exit 1;
 201        }
 202
 203        # Fall through to LOVE
 204    } else {
 205        # Trying to authenticate a user
 206        if (not exists $cfg->{gitcvs}->{authdb}) {
 207            print "E the repo config file needs a [gitcvs] section with an 'authdb' parameter set to the filename of the authentication database\n";
 208            print "I HATE YOU\n";
 209            exit 1;
 210        }
 211
 212        my $authdb = $cfg->{gitcvs}->{authdb};
 213
 214        unless (-e $authdb) {
 215            print "E The authentication database specified in [gitcvs.authdb] does not exist\n";
 216            print "I HATE YOU\n";
 217            exit 1;
 218        }
 219
 220        my $auth_ok;
 221        open my $passwd, "<", $authdb or die $!;
 222        while (<$passwd>) {
 223            if (m{^\Q$user\E:(.*)}) {
 224                if (crypt($user, descramble($password)) eq $1) {
 225                    $auth_ok = 1;
 226                }
 227            };
 228        }
 229        close $passwd;
 230
 231        unless ($auth_ok) {
 232            print "I HATE YOU\n";
 233            exit 1;
 234        }
 235
 236        # Fall through to LOVE
 237    }
 238
 239    # For checking whether the user is anonymous on commit
 240    $state->{user} = $user;
 241
 242    $line = <STDIN>; chomp $line;
 243    unless ($line eq "END $request REQUEST") {
 244       die "E Do not understand $line -- expecting END $request REQUEST\n";
 245    }
 246    print "I LOVE YOU\n";
 247    exit if $request eq 'VERIFICATION'; # cvs login
 248    # and now back to our regular programme...
 249}
 250
 251# Keep going until the client closes the connection
 252while (<STDIN>)
 253{
 254    chomp;
 255
 256    # Check to see if we've seen this method, and call appropriate function.
 257    if ( /^([\w-]+)(?:\s+(.*))?$/ and defined($methods->{$1}) )
 258    {
 259        # use the $methods hash to call the appropriate sub for this command
 260        #$log->info("Method : $1");
 261        &{$methods->{$1}}($1,$2);
 262    } else {
 263        # log fatal because we don't understand this function. If this happens
 264        # we're fairly screwed because we don't know if the client is expecting
 265        # a response. If it is, the client will hang, we'll hang, and the whole
 266        # thing will be custard.
 267        $log->fatal("Don't understand command $_\n");
 268        die("Unknown command $_");
 269    }
 270}
 271
 272$log->debug("Processing time : user=" . (times)[0] . " system=" . (times)[1]);
 273$log->info("--------------- FINISH -----------------");
 274
 275chdir '/';
 276exit 0;
 277
 278# Magic catchall method.
 279#    This is the method that will handle all commands we haven't yet
 280#    implemented. It simply sends a warning to the log file indicating a
 281#    command that hasn't been implemented has been invoked.
 282sub req_CATCHALL
 283{
 284    my ( $cmd, $data ) = @_;
 285    $log->warn("Unhandled command : req_$cmd : $data");
 286}
 287
 288# This method invariably succeeds with an empty response.
 289sub req_EMPTY
 290{
 291    print "ok\n";
 292}
 293
 294# Root pathname \n
 295#     Response expected: no. Tell the server which CVSROOT to use. Note that
 296#     pathname is a local directory and not a fully qualified CVSROOT variable.
 297#     pathname must already exist; if creating a new root, use the init
 298#     request, not Root. pathname does not include the hostname of the server,
 299#     how to access the server, etc.; by the time the CVS protocol is in use,
 300#     connection, authentication, etc., are already taken care of. The Root
 301#     request must be sent only once, and it must be sent before any requests
 302#     other than Valid-responses, valid-requests, UseUnchanged, Set or init.
 303sub req_Root
 304{
 305    my ( $cmd, $data ) = @_;
 306    $log->debug("req_Root : $data");
 307
 308    unless ($data =~ m#^/#) {
 309        print "error 1 Root must be an absolute pathname\n";
 310        return 0;
 311    }
 312
 313    my $cvsroot = $state->{'base-path'} || '';
 314    $cvsroot =~ s#/+$##;
 315    $cvsroot .= $data;
 316
 317    if ($state->{CVSROOT}
 318        && ($state->{CVSROOT} ne $cvsroot)) {
 319        print "error 1 Conflicting roots specified\n";
 320        return 0;
 321    }
 322
 323    $state->{CVSROOT} = $cvsroot;
 324
 325    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
 326
 327    if (@{$state->{allowed_roots}}) {
 328        my $allowed = 0;
 329        foreach my $dir (@{$state->{allowed_roots}}) {
 330            next unless $dir =~ m#^/#;
 331            $dir =~ s#/+$##;
 332            if ($state->{'strict-paths'}) {
 333                if ($ENV{GIT_DIR} =~ m#^\Q$dir\E/?$#) {
 334                    $allowed = 1;
 335                    last;
 336                }
 337            } elsif ($ENV{GIT_DIR} =~ m#^\Q$dir\E(/?$|/)#) {
 338                $allowed = 1;
 339                last;
 340            }
 341        }
 342
 343        unless ($allowed) {
 344            print "E $ENV{GIT_DIR} does not seem to be a valid GIT repository\n";
 345            print "E \n";
 346            print "error 1 $ENV{GIT_DIR} is not a valid repository\n";
 347            return 0;
 348        }
 349    }
 350
 351    unless (-d $ENV{GIT_DIR} && -e $ENV{GIT_DIR}.'HEAD') {
 352       print "E $ENV{GIT_DIR} does not seem to be a valid GIT repository\n";
 353       print "E \n";
 354       print "error 1 $ENV{GIT_DIR} is not a valid repository\n";
 355       return 0;
 356    }
 357
 358    my @gitvars = `git config -l`;
 359    if ($?) {
 360       print "E problems executing git-config on the server -- this is not a git repository or the PATH is not set correctly.\n";
 361        print "E \n";
 362        print "error 1 - problem executing git-config\n";
 363       return 0;
 364    }
 365    foreach my $line ( @gitvars )
 366    {
 367        next unless ( $line =~ /^(gitcvs)\.(?:(ext|pserver)\.)?([\w-]+)=(.*)$/ );
 368        unless ($2) {
 369            $cfg->{$1}{$3} = $4;
 370        } else {
 371            $cfg->{$1}{$2}{$3} = $4;
 372        }
 373    }
 374
 375    my $enabled = ($cfg->{gitcvs}{$state->{method}}{enabled}
 376                   || $cfg->{gitcvs}{enabled});
 377    unless ($state->{'export-all'} ||
 378            ($enabled && $enabled =~ /^\s*(1|true|yes)\s*$/i)) {
 379        print "E GITCVS emulation needs to be enabled on this repo\n";
 380        print "E the repo config file needs a [gitcvs] section added, and the parameter 'enabled' set to 1\n";
 381        print "E \n";
 382        print "error 1 GITCVS emulation disabled\n";
 383        return 0;
 384    }
 385
 386    my $logfile = $cfg->{gitcvs}{$state->{method}}{logfile} || $cfg->{gitcvs}{logfile};
 387    if ( $logfile )
 388    {
 389        $log->setfile($logfile);
 390    } else {
 391        $log->nofile();
 392    }
 393
 394    return 1;
 395}
 396
 397# Global_option option \n
 398#     Response expected: no. Transmit one of the global options `-q', `-Q',
 399#     `-l', `-t', `-r', or `-n'. option must be one of those strings, no
 400#     variations (such as combining of options) are allowed. For graceful
 401#     handling of valid-requests, it is probably better to make new global
 402#     options separate requests, rather than trying to add them to this
 403#     request.
 404sub req_Globaloption
 405{
 406    my ( $cmd, $data ) = @_;
 407    $log->debug("req_Globaloption : $data");
 408    $state->{globaloptions}{$data} = 1;
 409}
 410
 411# Valid-responses request-list \n
 412#     Response expected: no. Tell the server what responses the client will
 413#     accept. request-list is a space separated list of tokens.
 414sub req_Validresponses
 415{
 416    my ( $cmd, $data ) = @_;
 417    $log->debug("req_Validresponses : $data");
 418
 419    # TODO : re-enable this, currently it's not particularly useful
 420    #$state->{validresponses} = [ split /\s+/, $data ];
 421}
 422
 423# valid-requests \n
 424#     Response expected: yes. Ask the server to send back a Valid-requests
 425#     response.
 426sub req_validrequests
 427{
 428    my ( $cmd, $data ) = @_;
 429
 430    $log->debug("req_validrequests");
 431
 432    $log->debug("SEND : Valid-requests " . join(" ",keys %$methods));
 433    $log->debug("SEND : ok");
 434
 435    print "Valid-requests " . join(" ",keys %$methods) . "\n";
 436    print "ok\n";
 437}
 438
 439# Directory local-directory \n
 440#     Additional data: repository \n. Response expected: no. Tell the server
 441#     what directory to use. The repository should be a directory name from a
 442#     previous server response. Note that this both gives a default for Entry
 443#     and Modified and also for ci and the other commands; normal usage is to
 444#     send Directory for each directory in which there will be an Entry or
 445#     Modified, and then a final Directory for the original directory, then the
 446#     command. The local-directory is relative to the top level at which the
 447#     command is occurring (i.e. the last Directory which is sent before the
 448#     command); to indicate that top level, `.' should be sent for
 449#     local-directory.
 450sub req_Directory
 451{
 452    my ( $cmd, $data ) = @_;
 453
 454    my $repository = <STDIN>;
 455    chomp $repository;
 456
 457
 458    $state->{localdir} = $data;
 459    $state->{repository} = $repository;
 460    $state->{path} = $repository;
 461    $state->{path} =~ s/^\Q$state->{CVSROOT}\E\///;
 462    $state->{module} = $1 if ($state->{path} =~ s/^(.*?)(\/|$)//);
 463    $state->{path} .= "/" if ( $state->{path} =~ /\S/ );
 464
 465    $state->{directory} = $state->{localdir};
 466    $state->{directory} = "" if ( $state->{directory} eq "." );
 467    $state->{directory} .= "/" if ( $state->{directory} =~ /\S/ );
 468
 469    if ( (not defined($state->{prependdir}) or $state->{prependdir} eq '') and $state->{localdir} eq "." and $state->{path} =~ /\S/ )
 470    {
 471        $log->info("Setting prepend to '$state->{path}'");
 472        $state->{prependdir} = $state->{path};
 473        foreach my $entry ( keys %{$state->{entries}} )
 474        {
 475            $state->{entries}{$state->{prependdir} . $entry} = $state->{entries}{$entry};
 476            delete $state->{entries}{$entry};
 477        }
 478    }
 479
 480    if ( defined ( $state->{prependdir} ) )
 481    {
 482        $log->debug("Prepending '$state->{prependdir}' to state|directory");
 483        $state->{directory} = $state->{prependdir} . $state->{directory}
 484    }
 485    $log->debug("req_Directory : localdir=$data repository=$repository path=$state->{path} directory=$state->{directory} module=$state->{module}");
 486}
 487
 488# Entry entry-line \n
 489#     Response expected: no. Tell the server what version of a file is on the
 490#     local machine. The name in entry-line is a name relative to the directory
 491#     most recently specified with Directory. If the user is operating on only
 492#     some files in a directory, Entry requests for only those files need be
 493#     included. If an Entry request is sent without Modified, Is-modified, or
 494#     Unchanged, it means the file is lost (does not exist in the working
 495#     directory). If both Entry and one of Modified, Is-modified, or Unchanged
 496#     are sent for the same file, Entry must be sent first. For a given file,
 497#     one can send Modified, Is-modified, or Unchanged, but not more than one
 498#     of these three.
 499sub req_Entry
 500{
 501    my ( $cmd, $data ) = @_;
 502
 503    #$log->debug("req_Entry : $data");
 504
 505    my @data = split(/\//, $data, -1);
 506
 507    $state->{entries}{$state->{directory}.$data[1]} = {
 508        revision    => $data[2],
 509        conflict    => $data[3],
 510        options     => $data[4],
 511        tag_or_date => $data[5],
 512    };
 513
 514    $log->info("Received entry line '$data' => '" . $state->{directory} . $data[1] . "'");
 515}
 516
 517# Questionable filename \n
 518#     Response expected: no. Additional data: no. Tell the server to check
 519#     whether filename should be ignored, and if not, next time the server
 520#     sends responses, send (in a M response) `?' followed by the directory and
 521#     filename. filename must not contain `/'; it needs to be a file in the
 522#     directory named by the most recent Directory request.
 523sub req_Questionable
 524{
 525    my ( $cmd, $data ) = @_;
 526
 527    $log->debug("req_Questionable : $data");
 528    $state->{entries}{$state->{directory}.$data}{questionable} = 1;
 529}
 530
 531# add \n
 532#     Response expected: yes. Add a file or directory. This uses any previous
 533#     Argument, Directory, Entry, or Modified requests, if they have been sent.
 534#     The last Directory sent specifies the working directory at the time of
 535#     the operation. To add a directory, send the directory to be added using
 536#     Directory and Argument requests.
 537sub req_add
 538{
 539    my ( $cmd, $data ) = @_;
 540
 541    argsplit("add");
 542
 543    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
 544    $updater->update();
 545
 546    my $addcount = 0;
 547
 548    foreach my $filename ( @{$state->{args}} )
 549    {
 550        $filename = filecleanup($filename);
 551
 552        my $meta = $updater->getmeta($filename);
 553        my $wrev = revparse($filename);
 554
 555        if ($wrev && $meta && ($wrev=~/^-/))
 556        {
 557            # previously removed file, add back
 558            $log->info("added file $filename was previously removed, send $meta->{revision}");
 559
 560            print "MT +updated\n";
 561            print "MT text U \n";
 562            print "MT fname $filename\n";
 563            print "MT newline\n";
 564            print "MT -updated\n";
 565
 566            unless ( $state->{globaloptions}{-n} )
 567            {
 568                my ( $filepart, $dirpart ) = filenamesplit($filename,1);
 569
 570                print "Created $dirpart\n";
 571                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
 572
 573                # this is an "entries" line
 574                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
 575                $log->debug("/$filepart/$meta->{revision}//$kopts/");
 576                print "/$filepart/$meta->{revision}//$kopts/\n";
 577                # permissions
 578                $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
 579                print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
 580                # transmit file
 581                transmitfile($meta->{filehash});
 582            }
 583
 584            next;
 585        }
 586
 587        unless ( defined ( $state->{entries}{$filename}{modified_filename} ) )
 588        {
 589            print "E cvs add: nothing known about `$filename'\n";
 590            next;
 591        }
 592        # TODO : check we're not squashing an already existing file
 593        if ( defined ( $state->{entries}{$filename}{revision} ) )
 594        {
 595            print "E cvs add: `$filename' has already been entered\n";
 596            next;
 597        }
 598
 599        my ( $filepart, $dirpart ) = filenamesplit($filename, 1);
 600
 601        print "E cvs add: scheduling file `$filename' for addition\n";
 602
 603        print "Checked-in $dirpart\n";
 604        print "$filename\n";
 605        my $kopts = kopts_from_path($filename,"file",
 606                        $state->{entries}{$filename}{modified_filename});
 607        print "/$filepart/0//$kopts/\n";
 608
 609        my $requestedKopts = $state->{opt}{k};
 610        if(defined($requestedKopts))
 611        {
 612            $requestedKopts = "-k$requestedKopts";
 613        }
 614        else
 615        {
 616            $requestedKopts = "";
 617        }
 618        if( $kopts ne $requestedKopts )
 619        {
 620            $log->warn("Ignoring requested -k='$requestedKopts'"
 621                        . " for '$filename'; detected -k='$kopts' instead");
 622            #TODO: Also have option to send warning to user?
 623        }
 624
 625        $addcount++;
 626    }
 627
 628    if ( $addcount == 1 )
 629    {
 630        print "E cvs add: use `cvs commit' to add this file permanently\n";
 631    }
 632    elsif ( $addcount > 1 )
 633    {
 634        print "E cvs add: use `cvs commit' to add these files permanently\n";
 635    }
 636
 637    print "ok\n";
 638}
 639
 640# remove \n
 641#     Response expected: yes. Remove a file. This uses any previous Argument,
 642#     Directory, Entry, or Modified requests, if they have been sent. The last
 643#     Directory sent specifies the working directory at the time of the
 644#     operation. Note that this request does not actually do anything to the
 645#     repository; the only effect of a successful remove request is to supply
 646#     the client with a new entries line containing `-' to indicate a removed
 647#     file. In fact, the client probably could perform this operation without
 648#     contacting the server, although using remove may cause the server to
 649#     perform a few more checks. The client sends a subsequent ci request to
 650#     actually record the removal in the repository.
 651sub req_remove
 652{
 653    my ( $cmd, $data ) = @_;
 654
 655    argsplit("remove");
 656
 657    # Grab a handle to the SQLite db and do any necessary updates
 658    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
 659    $updater->update();
 660
 661    #$log->debug("add state : " . Dumper($state));
 662
 663    my $rmcount = 0;
 664
 665    foreach my $filename ( @{$state->{args}} )
 666    {
 667        $filename = filecleanup($filename);
 668
 669        if ( defined ( $state->{entries}{$filename}{unchanged} ) or defined ( $state->{entries}{$filename}{modified_filename} ) )
 670        {
 671            print "E cvs remove: file `$filename' still in working directory\n";
 672            next;
 673        }
 674
 675        my $meta = $updater->getmeta($filename);
 676        my $wrev = revparse($filename);
 677
 678        unless ( defined ( $wrev ) )
 679        {
 680            print "E cvs remove: nothing known about `$filename'\n";
 681            next;
 682        }
 683
 684        if ( defined($wrev) and ($wrev=~/^-/) )
 685        {
 686            print "E cvs remove: file `$filename' already scheduled for removal\n";
 687            next;
 688        }
 689
 690        unless ( $wrev eq $meta->{revision} )
 691        {
 692            # TODO : not sure if the format of this message is quite correct.
 693            print "E cvs remove: Up to date check failed for `$filename'\n";
 694            next;
 695        }
 696
 697
 698        my ( $filepart, $dirpart ) = filenamesplit($filename, 1);
 699
 700        print "E cvs remove: scheduling `$filename' for removal\n";
 701
 702        print "Checked-in $dirpart\n";
 703        print "$filename\n";
 704        my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
 705        print "/$filepart/-$wrev//$kopts/\n";
 706
 707        $rmcount++;
 708    }
 709
 710    if ( $rmcount == 1 )
 711    {
 712        print "E cvs remove: use `cvs commit' to remove this file permanently\n";
 713    }
 714    elsif ( $rmcount > 1 )
 715    {
 716        print "E cvs remove: use `cvs commit' to remove these files permanently\n";
 717    }
 718
 719    print "ok\n";
 720}
 721
 722# Modified filename \n
 723#     Response expected: no. Additional data: mode, \n, file transmission. Send
 724#     the server a copy of one locally modified file. filename is a file within
 725#     the most recent directory sent with Directory; it must not contain `/'.
 726#     If the user is operating on only some files in a directory, only those
 727#     files need to be included. This can also be sent without Entry, if there
 728#     is no entry for the file.
 729sub req_Modified
 730{
 731    my ( $cmd, $data ) = @_;
 732
 733    my $mode = <STDIN>;
 734    defined $mode
 735        or (print "E end of file reading mode for $data\n"), return;
 736    chomp $mode;
 737    my $size = <STDIN>;
 738    defined $size
 739        or (print "E end of file reading size of $data\n"), return;
 740    chomp $size;
 741
 742    # Grab config information
 743    my $blocksize = 8192;
 744    my $bytesleft = $size;
 745    my $tmp;
 746
 747    # Get a filehandle/name to write it to
 748    my ( $fh, $filename ) = tempfile( DIR => $TEMP_DIR );
 749
 750    # Loop over file data writing out to temporary file.
 751    while ( $bytesleft )
 752    {
 753        $blocksize = $bytesleft if ( $bytesleft < $blocksize );
 754        read STDIN, $tmp, $blocksize;
 755        print $fh $tmp;
 756        $bytesleft -= $blocksize;
 757    }
 758
 759    close $fh
 760        or (print "E failed to write temporary, $filename: $!\n"), return;
 761
 762    # Ensure we have something sensible for the file mode
 763    if ( $mode =~ /u=(\w+)/ )
 764    {
 765        $mode = $1;
 766    } else {
 767        $mode = "rw";
 768    }
 769
 770    # Save the file data in $state
 771    $state->{entries}{$state->{directory}.$data}{modified_filename} = $filename;
 772    $state->{entries}{$state->{directory}.$data}{modified_mode} = $mode;
 773    $state->{entries}{$state->{directory}.$data}{modified_hash} = `git hash-object $filename`;
 774    $state->{entries}{$state->{directory}.$data}{modified_hash} =~ s/\s.*$//s;
 775
 776    #$log->debug("req_Modified : file=$data mode=$mode size=$size");
 777}
 778
 779# Unchanged filename \n
 780#     Response expected: no. Tell the server that filename has not been
 781#     modified in the checked out directory. The filename is a file within the
 782#     most recent directory sent with Directory; it must not contain `/'.
 783sub req_Unchanged
 784{
 785    my ( $cmd, $data ) = @_;
 786
 787    $state->{entries}{$state->{directory}.$data}{unchanged} = 1;
 788
 789    #$log->debug("req_Unchanged : $data");
 790}
 791
 792# Argument text \n
 793#     Response expected: no. Save argument for use in a subsequent command.
 794#     Arguments accumulate until an argument-using command is given, at which
 795#     point they are forgotten.
 796# Argumentx text \n
 797#     Response expected: no. Append \n followed by text to the current argument
 798#     being saved.
 799sub req_Argument
 800{
 801    my ( $cmd, $data ) = @_;
 802
 803    # Argumentx means: append to last Argument (with a newline in front)
 804
 805    $log->debug("$cmd : $data");
 806
 807    if ( $cmd eq 'Argumentx') {
 808        ${$state->{arguments}}[$#{$state->{arguments}}] .= "\n" . $data;
 809    } else {
 810        push @{$state->{arguments}}, $data;
 811    }
 812}
 813
 814# expand-modules \n
 815#     Response expected: yes. Expand the modules which are specified in the
 816#     arguments. Returns the data in Module-expansion responses. Note that the
 817#     server can assume that this is checkout or export, not rtag or rdiff; the
 818#     latter do not access the working directory and thus have no need to
 819#     expand modules on the client side. Expand may not be the best word for
 820#     what this request does. It does not necessarily tell you all the files
 821#     contained in a module, for example. Basically it is a way of telling you
 822#     which working directories the server needs to know about in order to
 823#     handle a checkout of the specified modules. For example, suppose that the
 824#     server has a module defined by
 825#   aliasmodule -a 1dir
 826#     That is, one can check out aliasmodule and it will take 1dir in the
 827#     repository and check it out to 1dir in the working directory. Now suppose
 828#     the client already has this module checked out and is planning on using
 829#     the co request to update it. Without using expand-modules, the client
 830#     would have two bad choices: it could either send information about all
 831#     working directories under the current directory, which could be
 832#     unnecessarily slow, or it could be ignorant of the fact that aliasmodule
 833#     stands for 1dir, and neglect to send information for 1dir, which would
 834#     lead to incorrect operation. With expand-modules, the client would first
 835#     ask for the module to be expanded:
 836sub req_expandmodules
 837{
 838    my ( $cmd, $data ) = @_;
 839
 840    argsplit();
 841
 842    $log->debug("req_expandmodules : " . ( defined($data) ? $data : "[NULL]" ) );
 843
 844    unless ( ref $state->{arguments} eq "ARRAY" )
 845    {
 846        print "ok\n";
 847        return;
 848    }
 849
 850    foreach my $module ( @{$state->{arguments}} )
 851    {
 852        $log->debug("SEND : Module-expansion $module");
 853        print "Module-expansion $module\n";
 854    }
 855
 856    print "ok\n";
 857    statecleanup();
 858}
 859
 860# co \n
 861#     Response expected: yes. Get files from the repository. This uses any
 862#     previous Argument, Directory, Entry, or Modified requests, if they have
 863#     been sent. Arguments to this command are module names; the client cannot
 864#     know what directories they correspond to except by (1) just sending the
 865#     co request, and then seeing what directory names the server sends back in
 866#     its responses, and (2) the expand-modules request.
 867sub req_co
 868{
 869    my ( $cmd, $data ) = @_;
 870
 871    argsplit("co");
 872
 873    # Provide list of modules, if -c was used.
 874    if (exists $state->{opt}{c}) {
 875        my $showref = `git show-ref --heads`;
 876        for my $line (split '\n', $showref) {
 877            if ( $line =~ m% refs/heads/(.*)$% ) {
 878                print "M $1\t$1\n";
 879            }
 880        }
 881        print "ok\n";
 882        return 1;
 883    }
 884
 885    my $module = $state->{args}[0];
 886    $state->{module} = $module;
 887    my $checkout_path = $module;
 888
 889    # use the user specified directory if we're given it
 890    $checkout_path = $state->{opt}{d} if ( exists ( $state->{opt}{d} ) );
 891
 892    $log->debug("req_co : " . ( defined($data) ? $data : "[NULL]" ) );
 893
 894    $log->info("Checking out module '$module' ($state->{CVSROOT}) to '$checkout_path'");
 895
 896    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
 897
 898    # Grab a handle to the SQLite db and do any necessary updates
 899    my $updater = GITCVS::updater->new($state->{CVSROOT}, $module, $log);
 900    $updater->update();
 901
 902    $checkout_path =~ s|/$||; # get rid of trailing slashes
 903
 904    # Eclipse seems to need the Clear-sticky command
 905    # to prepare the 'Entries' file for the new directory.
 906    print "Clear-sticky $checkout_path/\n";
 907    print $state->{CVSROOT} . "/$module/\n";
 908    print "Clear-static-directory $checkout_path/\n";
 909    print $state->{CVSROOT} . "/$module/\n";
 910    print "Clear-sticky $checkout_path/\n"; # yes, twice
 911    print $state->{CVSROOT} . "/$module/\n";
 912    print "Template $checkout_path/\n";
 913    print $state->{CVSROOT} . "/$module/\n";
 914    print "0\n";
 915
 916    # instruct the client that we're checking out to $checkout_path
 917    print "E cvs checkout: Updating $checkout_path\n";
 918
 919    my %seendirs = ();
 920    my $lastdir ='';
 921
 922    # recursive
 923    sub prepdir {
 924       my ($dir, $repodir, $remotedir, $seendirs) = @_;
 925       my $parent = dirname($dir);
 926       $dir       =~ s|/+$||;
 927       $repodir   =~ s|/+$||;
 928       $remotedir =~ s|/+$||;
 929       $parent    =~ s|/+$||;
 930       $log->debug("announcedir $dir, $repodir, $remotedir" );
 931
 932       if ($parent eq '.' || $parent eq './') {
 933           $parent = '';
 934       }
 935       # recurse to announce unseen parents first
 936       if (length($parent) && !exists($seendirs->{$parent})) {
 937           prepdir($parent, $repodir, $remotedir, $seendirs);
 938       }
 939       # Announce that we are going to modify at the parent level
 940       if ($parent) {
 941           print "E cvs checkout: Updating $remotedir/$parent\n";
 942       } else {
 943           print "E cvs checkout: Updating $remotedir\n";
 944       }
 945       print "Clear-sticky $remotedir/$parent/\n";
 946       print "$repodir/$parent/\n";
 947
 948       print "Clear-static-directory $remotedir/$dir/\n";
 949       print "$repodir/$dir/\n";
 950       print "Clear-sticky $remotedir/$parent/\n"; # yes, twice
 951       print "$repodir/$parent/\n";
 952       print "Template $remotedir/$dir/\n";
 953       print "$repodir/$dir/\n";
 954       print "0\n";
 955
 956       $seendirs->{$dir} = 1;
 957    }
 958
 959    foreach my $git ( @{$updater->gethead} )
 960    {
 961        # Don't want to check out deleted files
 962        next if ( $git->{filehash} eq "deleted" );
 963
 964        my $fullName = $git->{name};
 965        ( $git->{name}, $git->{dir} ) = filenamesplit($git->{name});
 966
 967       if (length($git->{dir}) && $git->{dir} ne './'
 968           && $git->{dir} ne $lastdir ) {
 969           unless (exists($seendirs{$git->{dir}})) {
 970               prepdir($git->{dir}, $state->{CVSROOT} . "/$module/",
 971                       $checkout_path, \%seendirs);
 972               $lastdir = $git->{dir};
 973               $seendirs{$git->{dir}} = 1;
 974           }
 975           print "E cvs checkout: Updating /$checkout_path/$git->{dir}\n";
 976       }
 977
 978        # modification time of this file
 979        print "Mod-time $git->{modified}\n";
 980
 981        # print some information to the client
 982        if ( defined ( $git->{dir} ) and $git->{dir} ne "./" )
 983        {
 984            print "M U $checkout_path/$git->{dir}$git->{name}\n";
 985        } else {
 986            print "M U $checkout_path/$git->{name}\n";
 987        }
 988
 989       # instruct client we're sending a file to put in this path
 990       print "Created $checkout_path/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "\n";
 991
 992       print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
 993
 994        # this is an "entries" line
 995        my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});
 996        print "/$git->{name}/$git->{revision}//$kopts/\n";
 997        # permissions
 998        print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
 999
1000        # transmit file
1001        transmitfile($git->{filehash});
1002    }
1003
1004    print "ok\n";
1005
1006    statecleanup();
1007}
1008
1009# update \n
1010#     Response expected: yes. Actually do a cvs update command. This uses any
1011#     previous Argument, Directory, Entry, or Modified requests, if they have
1012#     been sent. The last Directory sent specifies the working directory at the
1013#     time of the operation. The -I option is not used--files which the client
1014#     can decide whether to ignore are not mentioned and the client sends the
1015#     Questionable request for others.
1016sub req_update
1017{
1018    my ( $cmd, $data ) = @_;
1019
1020    $log->debug("req_update : " . ( defined($data) ? $data : "[NULL]" ));
1021
1022    argsplit("update");
1023
1024    #
1025    # It may just be a client exploring the available heads/modules
1026    # in that case, list them as top level directories and leave it
1027    # at that. Eclipse uses this technique to offer you a list of
1028    # projects (heads in this case) to checkout.
1029    #
1030    if ($state->{module} eq '') {
1031        my $showref = `git show-ref --heads`;
1032        print "E cvs update: Updating .\n";
1033        for my $line (split '\n', $showref) {
1034            if ( $line =~ m% refs/heads/(.*)$% ) {
1035                print "E cvs update: New directory `$1'\n";
1036            }
1037        }
1038        print "ok\n";
1039        return 1;
1040    }
1041
1042
1043    # Grab a handle to the SQLite db and do any necessary updates
1044    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1045
1046    $updater->update();
1047
1048    argsfromdir($updater);
1049
1050    #$log->debug("update state : " . Dumper($state));
1051
1052    my $last_dirname = "///";
1053
1054    # foreach file specified on the command line ...
1055    foreach my $filename ( @{$state->{args}} )
1056    {
1057        $filename = filecleanup($filename);
1058
1059        $log->debug("Processing file $filename");
1060
1061        unless ( $state->{globaloptions}{-Q} || $state->{globaloptions}{-q} )
1062        {
1063            my $cur_dirname = dirname($filename);
1064            if ( $cur_dirname ne $last_dirname )
1065            {
1066                $last_dirname = $cur_dirname;
1067                if ( $cur_dirname eq "" )
1068                {
1069                    $cur_dirname = ".";
1070                }
1071                print "E cvs update: Updating $cur_dirname\n";
1072            }
1073        }
1074
1075        # if we have a -C we should pretend we never saw modified stuff
1076        if ( exists ( $state->{opt}{C} ) )
1077        {
1078            delete $state->{entries}{$filename}{modified_hash};
1079            delete $state->{entries}{$filename}{modified_filename};
1080            $state->{entries}{$filename}{unchanged} = 1;
1081        }
1082
1083        my $meta;
1084        if ( defined($state->{opt}{r}) and $state->{opt}{r} =~ /^(1\.\d+)$/ )
1085        {
1086            $meta = $updater->getmeta($filename, $1);
1087        } else {
1088            $meta = $updater->getmeta($filename);
1089        }
1090
1091        # If -p was given, "print" the contents of the requested revision.
1092        if ( exists ( $state->{opt}{p} ) ) {
1093            if ( defined ( $meta->{revision} ) ) {
1094                $log->info("Printing '$filename' revision " . $meta->{revision});
1095
1096                transmitfile($meta->{filehash}, { print => 1 });
1097            }
1098
1099            next;
1100        }
1101
1102        if ( ! defined $meta )
1103        {
1104            $meta = {
1105                name => $filename,
1106                revision => '0',
1107                filehash => 'added'
1108            };
1109        }
1110
1111        my $oldmeta = $meta;
1112
1113        my $wrev = revparse($filename);
1114
1115        # If the working copy is an old revision, lets get that version too for comparison.
1116        if ( defined($wrev) and $wrev ne $meta->{revision} )
1117        {
1118            $oldmeta = $updater->getmeta($filename, $wrev);
1119        }
1120
1121        #$log->debug("Target revision is $meta->{revision}, current working revision is $wrev");
1122
1123        # Files are up to date if the working copy and repo copy have the same revision,
1124        # and the working copy is unmodified _and_ the user hasn't specified -C
1125        next if ( defined ( $wrev )
1126                  and defined($meta->{revision})
1127                  and $wrev eq $meta->{revision}
1128                  and $state->{entries}{$filename}{unchanged}
1129                  and not exists ( $state->{opt}{C} ) );
1130
1131        # If the working copy and repo copy have the same revision,
1132        # but the working copy is modified, tell the client it's modified
1133        if ( defined ( $wrev )
1134             and defined($meta->{revision})
1135             and $wrev eq $meta->{revision}
1136             and defined($state->{entries}{$filename}{modified_hash})
1137             and not exists ( $state->{opt}{C} ) )
1138        {
1139            $log->info("Tell the client the file is modified");
1140            print "MT text M \n";
1141            print "MT fname $filename\n";
1142            print "MT newline\n";
1143            next;
1144        }
1145
1146        if ( $meta->{filehash} eq "deleted" )
1147        {
1148            # TODO: If it has been modified in the sandbox, error out
1149            #   with the appropriate message, rather than deleting a modified
1150            #   file.
1151
1152            my ( $filepart, $dirpart ) = filenamesplit($filename,1);
1153
1154            $log->info("Removing '$filename' from working copy (no longer in the repo)");
1155
1156            print "E cvs update: `$filename' is no longer in the repository\n";
1157            # Don't want to actually _DO_ the update if -n specified
1158            unless ( $state->{globaloptions}{-n} ) {
1159                print "Removed $dirpart\n";
1160                print "$filepart\n";
1161            }
1162        }
1163        elsif ( not defined ( $state->{entries}{$filename}{modified_hash} )
1164                or $state->{entries}{$filename}{modified_hash} eq $oldmeta->{filehash}
1165                or $meta->{filehash} eq 'added' )
1166        {
1167            # normal update, just send the new revision (either U=Update,
1168            # or A=Add, or R=Remove)
1169            if ( defined($wrev) && ($wrev=~/^-/) )
1170            {
1171                $log->info("Tell the client the file is scheduled for removal");
1172                print "MT text R \n";
1173                print "MT fname $filename\n";
1174                print "MT newline\n";
1175                next;
1176            }
1177            elsif ( (!defined($wrev) || $wrev eq '0') &&
1178                    (!defined($meta->{revision}) || $meta->{revision} eq '0') )
1179            {
1180                $log->info("Tell the client the file is scheduled for addition");
1181                print "MT text A \n";
1182                print "MT fname $filename\n";
1183                print "MT newline\n";
1184                next;
1185
1186            }
1187            else {
1188                $log->info("UpdatingX3 '$filename' to ".$meta->{revision});
1189                print "MT +updated\n";
1190                print "MT text U \n";
1191                print "MT fname $filename\n";
1192                print "MT newline\n";
1193                print "MT -updated\n";
1194            }
1195
1196            my ( $filepart, $dirpart ) = filenamesplit($filename,1);
1197
1198            # Don't want to actually _DO_ the update if -n specified
1199            unless ( $state->{globaloptions}{-n} )
1200            {
1201                if ( defined ( $wrev ) )
1202                {
1203                    # instruct client we're sending a file to put in this path as a replacement
1204                    print "Update-existing $dirpart\n";
1205                    $log->debug("Updating existing file 'Update-existing $dirpart'");
1206                } else {
1207                    # instruct client we're sending a file to put in this path as a new file
1208                    print "Clear-static-directory $dirpart\n";
1209                    print $state->{CVSROOT} . "/$state->{module}/$dirpart\n";
1210                    print "Clear-sticky $dirpart\n";
1211                    print $state->{CVSROOT} . "/$state->{module}/$dirpart\n";
1212
1213                    $log->debug("Creating new file 'Created $dirpart'");
1214                    print "Created $dirpart\n";
1215                }
1216                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
1217
1218                # this is an "entries" line
1219                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
1220                $log->debug("/$filepart/$meta->{revision}//$kopts/");
1221                print "/$filepart/$meta->{revision}//$kopts/\n";
1222
1223                # permissions
1224                $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
1225                print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
1226
1227                # transmit file
1228                transmitfile($meta->{filehash});
1229            }
1230        } else {
1231            my ( $filepart, $dirpart ) = filenamesplit($meta->{name},1);
1232
1233            my $mergeDir = setupTmpDir();
1234
1235            my $file_local = $filepart . ".mine";
1236            my $mergedFile = "$mergeDir/$file_local";
1237            system("ln","-s",$state->{entries}{$filename}{modified_filename}, $file_local);
1238            my $file_old = $filepart . "." . $oldmeta->{revision};
1239            transmitfile($oldmeta->{filehash}, { targetfile => $file_old });
1240            my $file_new = $filepart . "." . $meta->{revision};
1241            transmitfile($meta->{filehash}, { targetfile => $file_new });
1242
1243            # we need to merge with the local changes ( M=successful merge, C=conflict merge )
1244            $log->info("Merging $file_local, $file_old, $file_new");
1245            print "M Merging differences between $oldmeta->{revision} and $meta->{revision} into $filename\n";
1246
1247            $log->debug("Temporary directory for merge is $mergeDir");
1248
1249            my $return = system("git", "merge-file", $file_local, $file_old, $file_new);
1250            $return >>= 8;
1251
1252            cleanupTmpDir();
1253
1254            if ( $return == 0 )
1255            {
1256                $log->info("Merged successfully");
1257                print "M M $filename\n";
1258                $log->debug("Merged $dirpart");
1259
1260                # Don't want to actually _DO_ the update if -n specified
1261                unless ( $state->{globaloptions}{-n} )
1262                {
1263                    print "Merged $dirpart\n";
1264                    $log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
1265                    print $state->{CVSROOT} . "/$state->{module}/$filename\n";
1266                    my $kopts = kopts_from_path("$dirpart/$filepart",
1267                                                "file",$mergedFile);
1268                    $log->debug("/$filepart/$meta->{revision}//$kopts/");
1269                    print "/$filepart/$meta->{revision}//$kopts/\n";
1270                }
1271            }
1272            elsif ( $return == 1 )
1273            {
1274                $log->info("Merged with conflicts");
1275                print "E cvs update: conflicts found in $filename\n";
1276                print "M C $filename\n";
1277
1278                # Don't want to actually _DO_ the update if -n specified
1279                unless ( $state->{globaloptions}{-n} )
1280                {
1281                    print "Merged $dirpart\n";
1282                    print $state->{CVSROOT} . "/$state->{module}/$filename\n";
1283                    my $kopts = kopts_from_path("$dirpart/$filepart",
1284                                                "file",$mergedFile);
1285                    print "/$filepart/$meta->{revision}/+/$kopts/\n";
1286                }
1287            }
1288            else
1289            {
1290                $log->warn("Merge failed");
1291                next;
1292            }
1293
1294            # Don't want to actually _DO_ the update if -n specified
1295            unless ( $state->{globaloptions}{-n} )
1296            {
1297                # permissions
1298                $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
1299                print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
1300
1301                # transmit file, format is single integer on a line by itself (file
1302                # size) followed by the file contents
1303                # TODO : we should copy files in blocks
1304                my $data = `cat $mergedFile`;
1305                $log->debug("File size : " . length($data));
1306                print length($data) . "\n";
1307                print $data;
1308            }
1309        }
1310
1311    }
1312
1313    print "ok\n";
1314}
1315
1316sub req_ci
1317{
1318    my ( $cmd, $data ) = @_;
1319
1320    argsplit("ci");
1321
1322    #$log->debug("State : " . Dumper($state));
1323
1324    $log->info("req_ci : " . ( defined($data) ? $data : "[NULL]" ));
1325
1326    if ( $state->{method} eq 'pserver' and $state->{user} eq 'anonymous' )
1327    {
1328        print "error 1 anonymous user cannot commit via pserver\n";
1329        cleanupWorkTree();
1330        exit;
1331    }
1332
1333    if ( -e $state->{CVSROOT} . "/index" )
1334    {
1335        $log->warn("file 'index' already exists in the git repository");
1336        print "error 1 Index already exists in git repo\n";
1337        cleanupWorkTree();
1338        exit;
1339    }
1340
1341    # Grab a handle to the SQLite db and do any necessary updates
1342    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1343    $updater->update();
1344
1345    # Remember where the head was at the beginning.
1346    my $parenthash = `git show-ref -s refs/heads/$state->{module}`;
1347    chomp $parenthash;
1348    if ($parenthash !~ /^[0-9a-f]{40}$/) {
1349            print "error 1 pserver cannot find the current HEAD of module";
1350            cleanupWorkTree();
1351            exit;
1352    }
1353
1354    setupWorkTree($parenthash);
1355
1356    $log->info("Lockless commit start, basing commit on '$work->{workDir}', index file is '$work->{index}'");
1357
1358    $log->info("Created index '$work->{index}' for head $state->{module} - exit status $?");
1359
1360    my @committedfiles = ();
1361    my %oldmeta;
1362
1363    # foreach file specified on the command line ...
1364    foreach my $filename ( @{$state->{args}} )
1365    {
1366        my $committedfile = $filename;
1367        $filename = filecleanup($filename);
1368
1369        next unless ( exists $state->{entries}{$filename}{modified_filename} or not $state->{entries}{$filename}{unchanged} );
1370
1371        my $meta = $updater->getmeta($filename);
1372        $oldmeta{$filename} = $meta;
1373
1374        my $wrev = revparse($filename);
1375
1376        my ( $filepart, $dirpart ) = filenamesplit($filename);
1377
1378        # do a checkout of the file if it is part of this tree
1379        if ($wrev) {
1380            system('git', 'checkout-index', '-f', '-u', $filename);
1381            unless ($? == 0) {
1382                die "Error running git-checkout-index -f -u $filename : $!";
1383            }
1384        }
1385
1386        my $addflag = 0;
1387        my $rmflag = 0;
1388        $rmflag = 1 if ( defined($wrev) and ($wrev=~/^-/) );
1389        $addflag = 1 unless ( -e $filename );
1390
1391        # Do up to date checking
1392        unless ( $addflag or $wrev eq $meta->{revision} or
1393                 ( $rmflag and $wrev eq "-$meta->{revision}" ) )
1394        {
1395            # fail everything if an up to date check fails
1396            print "error 1 Up to date check failed for $filename\n";
1397            cleanupWorkTree();
1398            exit;
1399        }
1400
1401        push @committedfiles, $committedfile;
1402        $log->info("Committing $filename");
1403
1404        system("mkdir","-p",$dirpart) unless ( -d $dirpart );
1405
1406        unless ( $rmflag )
1407        {
1408            $log->debug("rename $state->{entries}{$filename}{modified_filename} $filename");
1409            rename $state->{entries}{$filename}{modified_filename},$filename;
1410
1411            # Calculate modes to remove
1412            my $invmode = "";
1413            foreach ( qw (r w x) ) { $invmode .= $_ unless ( $state->{entries}{$filename}{modified_mode} =~ /$_/ ); }
1414
1415            $log->debug("chmod u+" . $state->{entries}{$filename}{modified_mode} . "-" . $invmode . " $filename");
1416            system("chmod","u+" .  $state->{entries}{$filename}{modified_mode} . "-" . $invmode, $filename);
1417        }
1418
1419        if ( $rmflag )
1420        {
1421            $log->info("Removing file '$filename'");
1422            unlink($filename);
1423            system("git", "update-index", "--remove", $filename);
1424        }
1425        elsif ( $addflag )
1426        {
1427            $log->info("Adding file '$filename'");
1428            system("git", "update-index", "--add", $filename);
1429        } else {
1430            $log->info("UpdatingX2 file '$filename'");
1431            system("git", "update-index", $filename);
1432        }
1433    }
1434
1435    unless ( scalar(@committedfiles) > 0 )
1436    {
1437        print "E No files to commit\n";
1438        print "ok\n";
1439        cleanupWorkTree();
1440        return;
1441    }
1442
1443    my $treehash = `git write-tree`;
1444    chomp $treehash;
1445
1446    $log->debug("Treehash : $treehash, Parenthash : $parenthash");
1447
1448    # write our commit message out if we have one ...
1449    my ( $msg_fh, $msg_filename ) = tempfile( DIR => $TEMP_DIR );
1450    print $msg_fh $state->{opt}{m};# if ( exists ( $state->{opt}{m} ) );
1451    if ( defined ( $cfg->{gitcvs}{commitmsgannotation} ) ) {
1452        if ($cfg->{gitcvs}{commitmsgannotation} !~ /^\s*$/ ) {
1453            print $msg_fh "\n\n".$cfg->{gitcvs}{commitmsgannotation}."\n"
1454        }
1455    } else {
1456        print $msg_fh "\n\nvia git-CVS emulator\n";
1457    }
1458    close $msg_fh;
1459
1460    my $commithash = `git commit-tree $treehash -p $parenthash < $msg_filename`;
1461    chomp($commithash);
1462    $log->info("Commit hash : $commithash");
1463
1464    unless ( $commithash =~ /[a-zA-Z0-9]{40}/ )
1465    {
1466        $log->warn("Commit failed (Invalid commit hash)");
1467        print "error 1 Commit failed (unknown reason)\n";
1468        cleanupWorkTree();
1469        exit;
1470    }
1471
1472        ### Emulate git-receive-pack by running hooks/update
1473        my @hook = ( $ENV{GIT_DIR}.'hooks/update', "refs/heads/$state->{module}",
1474                        $parenthash, $commithash );
1475        if( -x $hook[0] ) {
1476                unless( system( @hook ) == 0 )
1477                {
1478                        $log->warn("Commit failed (update hook declined to update ref)");
1479                        print "error 1 Commit failed (update hook declined)\n";
1480                        cleanupWorkTree();
1481                        exit;
1482                }
1483        }
1484
1485        ### Update the ref
1486        if (system(qw(git update-ref -m), "cvsserver ci",
1487                        "refs/heads/$state->{module}", $commithash, $parenthash)) {
1488                $log->warn("update-ref for $state->{module} failed.");
1489                print "error 1 Cannot commit -- update first\n";
1490                cleanupWorkTree();
1491                exit;
1492        }
1493
1494        ### Emulate git-receive-pack by running hooks/post-receive
1495        my $hook = $ENV{GIT_DIR}.'hooks/post-receive';
1496        if( -x $hook ) {
1497                open(my $pipe, "| $hook") || die "can't fork $!";
1498
1499                local $SIG{PIPE} = sub { die 'pipe broke' };
1500
1501                print $pipe "$parenthash $commithash refs/heads/$state->{module}\n";
1502
1503                close $pipe || die "bad pipe: $! $?";
1504        }
1505
1506    $updater->update();
1507
1508        ### Then hooks/post-update
1509        $hook = $ENV{GIT_DIR}.'hooks/post-update';
1510        if (-x $hook) {
1511                system($hook, "refs/heads/$state->{module}");
1512        }
1513
1514    # foreach file specified on the command line ...
1515    foreach my $filename ( @committedfiles )
1516    {
1517        $filename = filecleanup($filename);
1518
1519        my $meta = $updater->getmeta($filename);
1520        unless (defined $meta->{revision}) {
1521          $meta->{revision} = "1.1";
1522        }
1523
1524        my ( $filepart, $dirpart ) = filenamesplit($filename, 1);
1525
1526        $log->debug("Checked-in $dirpart : $filename");
1527
1528        print "M $state->{CVSROOT}/$state->{module}/$filename,v  <--  $dirpart$filepart\n";
1529        if ( defined $meta->{filehash} && $meta->{filehash} eq "deleted" )
1530        {
1531            print "M new revision: delete; previous revision: $oldmeta{$filename}{revision}\n";
1532            print "Remove-entry $dirpart\n";
1533            print "$filename\n";
1534        } else {
1535            if ($meta->{revision} eq "1.1") {
1536                print "M initial revision: 1.1\n";
1537            } else {
1538                print "M new revision: $meta->{revision}; previous revision: $oldmeta{$filename}{revision}\n";
1539            }
1540            print "Checked-in $dirpart\n";
1541            print "$filename\n";
1542            my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
1543            print "/$filepart/$meta->{revision}//$kopts/\n";
1544        }
1545    }
1546
1547    cleanupWorkTree();
1548    print "ok\n";
1549}
1550
1551sub req_status
1552{
1553    my ( $cmd, $data ) = @_;
1554
1555    argsplit("status");
1556
1557    $log->info("req_status : " . ( defined($data) ? $data : "[NULL]" ));
1558    #$log->debug("status state : " . Dumper($state));
1559
1560    # Grab a handle to the SQLite db and do any necessary updates
1561    my $updater;
1562    $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1563    $updater->update();
1564
1565    # if no files were specified, we need to work out what files we should
1566    # be providing status on ...
1567    argsfromdir($updater);
1568
1569    # foreach file specified on the command line ...
1570    foreach my $filename ( @{$state->{args}} )
1571    {
1572        $filename = filecleanup($filename);
1573
1574        if ( exists($state->{opt}{l}) &&
1575             index($filename, '/', length($state->{prependdir})) >= 0 )
1576        {
1577           next;
1578        }
1579
1580        my $meta = $updater->getmeta($filename);
1581        my $oldmeta = $meta;
1582
1583        my $wrev = revparse($filename);
1584
1585        # If the working copy is an old revision, lets get that
1586        # version too for comparison.
1587        if ( defined($wrev) and $wrev ne $meta->{revision} )
1588        {
1589            $oldmeta = $updater->getmeta($filename, $wrev);
1590        }
1591
1592        # TODO : All possible statuses aren't yet implemented
1593        my $status;
1594        # Files are up to date if the working copy and repo copy have
1595        # the same revision, and the working copy is unmodified
1596        if ( defined ( $wrev ) and defined($meta->{revision}) and
1597             $wrev eq $meta->{revision} and
1598             ( ( $state->{entries}{$filename}{unchanged} and
1599                 ( not defined ( $state->{entries}{$filename}{conflict} ) or
1600                   $state->{entries}{$filename}{conflict} !~ /^\+=/ ) ) or
1601               ( defined($state->{entries}{$filename}{modified_hash}) and
1602                 $state->{entries}{$filename}{modified_hash} eq
1603                        $meta->{filehash} ) ) )
1604        {
1605            $status = "Up-to-date"
1606        }
1607
1608        # Need checkout if the working copy has a different (usually
1609        # older) revision than the repo copy, and the working copy is
1610        # unmodified
1611        if ( defined ( $wrev ) and defined ( $meta->{revision} ) and
1612             $meta->{revision} ne $wrev and
1613             ( $state->{entries}{$filename}{unchanged} or
1614               ( defined($state->{entries}{$filename}{modified_hash}) and
1615                 $state->{entries}{$filename}{modified_hash} eq
1616                                $oldmeta->{filehash} ) ) )
1617        {
1618            $status ||= "Needs Checkout";
1619        }
1620
1621        # Need checkout if it exists in the repo but doesn't have a working
1622        # copy
1623        if ( not defined ( $wrev ) and defined ( $meta->{revision} ) )
1624        {
1625            $status ||= "Needs Checkout";
1626        }
1627
1628        # Locally modified if working copy and repo copy have the
1629        # same revision but there are local changes
1630        if ( defined ( $wrev ) and defined($meta->{revision}) and
1631             $wrev eq $meta->{revision} and
1632             $state->{entries}{$filename}{modified_filename} )
1633        {
1634            $status ||= "Locally Modified";
1635        }
1636
1637        # Needs Merge if working copy revision is different
1638        # (usually older) than repo copy and there are local changes
1639        if ( defined ( $wrev ) and defined ( $meta->{revision} ) and
1640             $meta->{revision} ne $wrev and
1641             $state->{entries}{$filename}{modified_filename} )
1642        {
1643            $status ||= "Needs Merge";
1644        }
1645
1646        if ( defined ( $state->{entries}{$filename}{revision} ) and
1647             not defined ( $meta->{revision} ) )
1648        {
1649            $status ||= "Locally Added";
1650        }
1651        if ( defined ( $wrev ) and defined ( $meta->{revision} ) and
1652             $wrev eq "-$meta->{revision}" )
1653        {
1654            $status ||= "Locally Removed";
1655        }
1656        if ( defined ( $state->{entries}{$filename}{conflict} ) and
1657             $state->{entries}{$filename}{conflict} =~ /^\+=/ )
1658        {
1659            $status ||= "Unresolved Conflict";
1660        }
1661        if ( 0 )
1662        {
1663            $status ||= "File had conflicts on merge";
1664        }
1665
1666        $status ||= "Unknown";
1667
1668        my ($filepart) = filenamesplit($filename);
1669
1670        print "M =======" . ( "=" x 60 ) . "\n";
1671        print "M File: $filepart\tStatus: $status\n";
1672        if ( defined($state->{entries}{$filename}{revision}) )
1673        {
1674            print "M Working revision:\t" .
1675                  $state->{entries}{$filename}{revision} . "\n";
1676        } else {
1677            print "M Working revision:\tNo entry for $filename\n";
1678        }
1679        if ( defined($meta->{revision}) )
1680        {
1681            print "M Repository revision:\t" .
1682                   $meta->{revision} .
1683                   "\t$state->{CVSROOT}/$state->{module}/$filename,v\n";
1684            my($tagOrDate)=$state->{entries}{$filename}{tag_or_date};
1685            my($tag)=($tagOrDate=~m/^T(.+)$/);
1686            if( !defined($tag) )
1687            {
1688                $tag="(none)";
1689            }
1690            print "M Sticky Tag:\t\t$tag\n";
1691            my($date)=($tagOrDate=~m/^D(.+)$/);
1692            if( !defined($date) )
1693            {
1694                $date="(none)";
1695            }
1696            print "M Sticky Date:\t\t$date\n";
1697            my($options)=$state->{entries}{$filename}{options};
1698            if( $options eq "" )
1699            {
1700                $options="(none)";
1701            }
1702            print "M Sticky Options:\t\t$options\n";
1703        } else {
1704            print "M Repository revision:\tNo revision control file\n";
1705        }
1706        print "M\n";
1707    }
1708
1709    print "ok\n";
1710}
1711
1712sub req_diff
1713{
1714    my ( $cmd, $data ) = @_;
1715
1716    argsplit("diff");
1717
1718    $log->debug("req_diff : " . ( defined($data) ? $data : "[NULL]" ));
1719    #$log->debug("status state : " . Dumper($state));
1720
1721    my ($revision1, $revision2);
1722    if ( defined ( $state->{opt}{r} ) and ref $state->{opt}{r} eq "ARRAY" )
1723    {
1724        $revision1 = $state->{opt}{r}[0];
1725        $revision2 = $state->{opt}{r}[1];
1726    } else {
1727        $revision1 = $state->{opt}{r};
1728    }
1729
1730    $log->debug("Diffing revisions " .
1731                ( defined($revision1) ? $revision1 : "[NULL]" ) .
1732                " and " . ( defined($revision2) ? $revision2 : "[NULL]" ) );
1733
1734    # Grab a handle to the SQLite db and do any necessary updates
1735    my $updater;
1736    $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1737    $updater->update();
1738
1739    # if no files were specified, we need to work out what files we should
1740    # be providing status on ...
1741    argsfromdir($updater);
1742
1743    # foreach file specified on the command line ...
1744    foreach my $filename ( @{$state->{args}} )
1745    {
1746        $filename = filecleanup($filename);
1747
1748        my ( $fh, $file1, $file2, $meta1, $meta2, $filediff );
1749
1750        my $wrev = revparse($filename);
1751
1752        # We need _something_ to diff against
1753        next unless ( defined ( $wrev ) );
1754
1755        # if we have a -r switch, use it
1756        if ( defined ( $revision1 ) )
1757        {
1758            ( undef, $file1 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1759            $meta1 = $updater->getmeta($filename, $revision1);
1760            unless ( defined ( $meta1 ) and $meta1->{filehash} ne "deleted" )
1761            {
1762                print "E File $filename at revision $revision1 doesn't exist\n";
1763                next;
1764            }
1765            transmitfile($meta1->{filehash}, { targetfile => $file1 });
1766        }
1767        # otherwise we just use the working copy revision
1768        else
1769        {
1770            ( undef, $file1 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1771            $meta1 = $updater->getmeta($filename, $wrev);
1772            transmitfile($meta1->{filehash}, { targetfile => $file1 });
1773        }
1774
1775        # if we have a second -r switch, use it too
1776        if ( defined ( $revision2 ) )
1777        {
1778            ( undef, $file2 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1779            $meta2 = $updater->getmeta($filename, $revision2);
1780
1781            unless ( defined ( $meta2 ) and $meta2->{filehash} ne "deleted" )
1782            {
1783                print "E File $filename at revision $revision2 doesn't exist\n";
1784                next;
1785            }
1786
1787            transmitfile($meta2->{filehash}, { targetfile => $file2 });
1788        }
1789        # otherwise we just use the working copy
1790        else
1791        {
1792            $file2 = $state->{entries}{$filename}{modified_filename};
1793        }
1794
1795        # if we have been given -r, and we don't have a $file2 yet, lets
1796        # get one
1797        if ( defined ( $revision1 ) and not defined ( $file2 ) )
1798        {
1799            ( undef, $file2 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1800            $meta2 = $updater->getmeta($filename, $wrev);
1801            transmitfile($meta2->{filehash}, { targetfile => $file2 });
1802        }
1803
1804        # We need to have retrieved something useful
1805        next unless ( defined ( $meta1 ) );
1806
1807        # Files to date if the working copy and repo copy have the same
1808        # revision, and the working copy is unmodified
1809        if ( not defined ( $meta2 ) and $wrev eq $meta1->{revision} and
1810             ( ( $state->{entries}{$filename}{unchanged} and
1811                 ( not defined ( $state->{entries}{$filename}{conflict} ) or
1812                   $state->{entries}{$filename}{conflict} !~ /^\+=/ ) ) or
1813               ( defined($state->{entries}{$filename}{modified_hash}) and
1814                 $state->{entries}{$filename}{modified_hash} eq
1815                        $meta1->{filehash} ) ) )
1816        {
1817            next;
1818        }
1819
1820        # Apparently we only show diffs for locally modified files
1821        unless ( defined($meta2) or
1822                 defined ( $state->{entries}{$filename}{modified_filename} ) )
1823        {
1824            next;
1825        }
1826
1827        print "M Index: $filename\n";
1828        print "M =======" . ( "=" x 60 ) . "\n";
1829        print "M RCS file: $state->{CVSROOT}/$state->{module}/$filename,v\n";
1830        if ( defined ( $meta1 ) )
1831        {
1832            print "M retrieving revision $meta1->{revision}\n"
1833        }
1834        if ( defined ( $meta2 ) )
1835        {
1836            print "M retrieving revision $meta2->{revision}\n"
1837        }
1838        print "M diff ";
1839        foreach my $opt ( keys %{$state->{opt}} )
1840        {
1841            if ( ref $state->{opt}{$opt} eq "ARRAY" )
1842            {
1843                foreach my $value ( @{$state->{opt}{$opt}} )
1844                {
1845                    print "-$opt $value ";
1846                }
1847            } else {
1848                print "-$opt ";
1849                if ( defined ( $state->{opt}{$opt} ) )
1850                {
1851                    print "$state->{opt}{$opt} "
1852                }
1853            }
1854        }
1855        print "$filename\n";
1856
1857        $log->info("Diffing $filename -r $meta1->{revision} -r " .
1858                   ( $meta2->{revision} or "workingcopy" ));
1859
1860        ( $fh, $filediff ) = tempfile ( DIR => $TEMP_DIR );
1861
1862        if ( exists $state->{opt}{u} )
1863        {
1864            system("diff -u -L '$filename revision $meta1->{revision}'" .
1865                        " -L '$filename " .
1866                        ( defined($meta2->{revision}) ?
1867                                "revision $meta2->{revision}" :
1868                                "working copy" ) .
1869                        "' $file1 $file2 > $filediff" );
1870        } else {
1871            system("diff $file1 $file2 > $filediff");
1872        }
1873
1874        while ( <$fh> )
1875        {
1876            print "M $_";
1877        }
1878        close $fh;
1879    }
1880
1881    print "ok\n";
1882}
1883
1884sub req_log
1885{
1886    my ( $cmd, $data ) = @_;
1887
1888    argsplit("log");
1889
1890    $log->debug("req_log : " . ( defined($data) ? $data : "[NULL]" ));
1891    #$log->debug("log state : " . Dumper($state));
1892
1893    my ( $revFilter );
1894    if ( defined ( $state->{opt}{r} ) )
1895    {
1896        $revFilter = $state->{opt}{r};
1897    }
1898
1899    # Grab a handle to the SQLite db and do any necessary updates
1900    my $updater;
1901    $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1902    $updater->update();
1903
1904    # if no files were specified, we need to work out what files we
1905    # should be providing status on ...
1906    argsfromdir($updater);
1907
1908    # foreach file specified on the command line ...
1909    foreach my $filename ( @{$state->{args}} )
1910    {
1911        $filename = filecleanup($filename);
1912
1913        my $headmeta = $updater->getmeta($filename);
1914
1915        my ($revisions,$totalrevisions) = $updater->getlog($filename,
1916                                                           $revFilter);
1917
1918        next unless ( scalar(@$revisions) );
1919
1920        print "M \n";
1921        print "M RCS file: $state->{CVSROOT}/$state->{module}/$filename,v\n";
1922        print "M Working file: $filename\n";
1923        print "M head: $headmeta->{revision}\n";
1924        print "M branch:\n";
1925        print "M locks: strict\n";
1926        print "M access list:\n";
1927        print "M symbolic names:\n";
1928        print "M keyword substitution: kv\n";
1929        print "M total revisions: $totalrevisions;\tselected revisions: " .
1930              scalar(@$revisions) . "\n";
1931        print "M description:\n";
1932
1933        foreach my $revision ( @$revisions )
1934        {
1935            print "M ----------------------------\n";
1936            print "M revision $revision->{revision}\n";
1937            # reformat the date for log output
1938            if ( $revision->{modified} =~ /(\d+)\s+(\w+)\s+(\d+)\s+(\S+)/ and
1939                 defined($DATE_LIST->{$2}) )
1940            {
1941                $revision->{modified} = sprintf('%04d/%02d/%02d %s',
1942                                            $3, $DATE_LIST->{$2}, $1, $4 );
1943            }
1944            $revision->{author} = cvs_author($revision->{author});
1945            print "M date: $revision->{modified};" .
1946                  "  author: $revision->{author};  state: " .
1947                  ( $revision->{filehash} eq "deleted" ? "dead" : "Exp" ) .
1948                  ";  lines: +2 -3\n";
1949            my $commitmessage;
1950            $commitmessage = $updater->commitmessage($revision->{commithash});
1951            $commitmessage =~ s/^/M /mg;
1952            print $commitmessage . "\n";
1953        }
1954        print "M =======" . ( "=" x 70 ) . "\n";
1955    }
1956
1957    print "ok\n";
1958}
1959
1960sub req_annotate
1961{
1962    my ( $cmd, $data ) = @_;
1963
1964    argsplit("annotate");
1965
1966    $log->info("req_annotate : " . ( defined($data) ? $data : "[NULL]" ));
1967    #$log->debug("status state : " . Dumper($state));
1968
1969    # Grab a handle to the SQLite db and do any necessary updates
1970    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1971    $updater->update();
1972
1973    # if no files were specified, we need to work out what files we should be providing annotate on ...
1974    argsfromdir($updater);
1975
1976    # we'll need a temporary checkout dir
1977    setupWorkTree();
1978
1979    $log->info("Temp checkoutdir creation successful, basing annotate session work on '$work->{workDir}', index file is '$ENV{GIT_INDEX_FILE}'");
1980
1981    # foreach file specified on the command line ...
1982    foreach my $filename ( @{$state->{args}} )
1983    {
1984        $filename = filecleanup($filename);
1985
1986        my $meta = $updater->getmeta($filename);
1987
1988        next unless ( $meta->{revision} );
1989
1990        # get all the commits that this file was in
1991        # in dense format -- aka skip dead revisions
1992        my $revisions   = $updater->gethistorydense($filename);
1993        my $lastseenin  = $revisions->[0][2];
1994
1995        # populate the temporary index based on the latest commit were we saw
1996        # the file -- but do it cheaply without checking out any files
1997        # TODO: if we got a revision from the client, use that instead
1998        # to look up the commithash in sqlite (still good to default to
1999        # the current head as we do now)
2000        system("git", "read-tree", $lastseenin);
2001        unless ($? == 0)
2002        {
2003            print "E error running git-read-tree $lastseenin $ENV{GIT_INDEX_FILE} $!\n";
2004            return;
2005        }
2006        $log->info("Created index '$ENV{GIT_INDEX_FILE}' with commit $lastseenin - exit status $?");
2007
2008        # do a checkout of the file
2009        system('git', 'checkout-index', '-f', '-u', $filename);
2010        unless ($? == 0) {
2011            print "E error running git-checkout-index -f -u $filename : $!\n";
2012            return;
2013        }
2014
2015        $log->info("Annotate $filename");
2016
2017        # Prepare a file with the commits from the linearized
2018        # history that annotate should know about. This prevents
2019        # git-jsannotate telling us about commits we are hiding
2020        # from the client.
2021
2022        my $a_hints = "$work->{workDir}/.annotate_hints";
2023        if (!open(ANNOTATEHINTS, '>', $a_hints)) {
2024            print "E failed to open '$a_hints' for writing: $!\n";
2025            return;
2026        }
2027        for (my $i=0; $i < @$revisions; $i++)
2028        {
2029            print ANNOTATEHINTS $revisions->[$i][2];
2030            if ($i+1 < @$revisions) { # have we got a parent?
2031                print ANNOTATEHINTS ' ' . $revisions->[$i+1][2];
2032            }
2033            print ANNOTATEHINTS "\n";
2034        }
2035
2036        print ANNOTATEHINTS "\n";
2037        close ANNOTATEHINTS
2038            or (print "E failed to write $a_hints: $!\n"), return;
2039
2040        my @cmd = (qw(git annotate -l -S), $a_hints, $filename);
2041        if (!open(ANNOTATE, "-|", @cmd)) {
2042            print "E error invoking ". join(' ',@cmd) .": $!\n";
2043            return;
2044        }
2045        my $metadata = {};
2046        print "E Annotations for $filename\n";
2047        print "E ***************\n";
2048        while ( <ANNOTATE> )
2049        {
2050            if (m/^([a-zA-Z0-9]{40})\t\([^\)]*\)(.*)$/i)
2051            {
2052                my $commithash = $1;
2053                my $data = $2;
2054                unless ( defined ( $metadata->{$commithash} ) )
2055                {
2056                    $metadata->{$commithash} = $updater->getmeta($filename, $commithash);
2057                    $metadata->{$commithash}{author} = cvs_author($metadata->{$commithash}{author});
2058                    $metadata->{$commithash}{modified} = sprintf("%02d-%s-%02d", $1, $2, $3) if ( $metadata->{$commithash}{modified} =~ /^(\d+)\s(\w+)\s\d\d(\d\d)/ );
2059                }
2060                printf("M %-7s      (%-8s %10s): %s\n",
2061                    $metadata->{$commithash}{revision},
2062                    $metadata->{$commithash}{author},
2063                    $metadata->{$commithash}{modified},
2064                    $data
2065                );
2066            } else {
2067                $log->warn("Error in annotate output! LINE: $_");
2068                print "E Annotate error \n";
2069                next;
2070            }
2071        }
2072        close ANNOTATE;
2073    }
2074
2075    # done; get out of the tempdir
2076    cleanupWorkTree();
2077
2078    print "ok\n";
2079
2080}
2081
2082# This method takes the state->{arguments} array and produces two new arrays.
2083# The first is $state->{args} which is everything before the '--' argument, and
2084# the second is $state->{files} which is everything after it.
2085sub argsplit
2086{
2087    $state->{args} = [];
2088    $state->{files} = [];
2089    $state->{opt} = {};
2090
2091    return unless( defined($state->{arguments}) and ref $state->{arguments} eq "ARRAY" );
2092
2093    my $type = shift;
2094
2095    if ( defined($type) )
2096    {
2097        my $opt = {};
2098        $opt = { A => 0, N => 0, P => 0, R => 0, c => 0, f => 0, l => 0, n => 0, p => 0, s => 0, r => 1, D => 1, d => 1, k => 1, j => 1, } if ( $type eq "co" );
2099        $opt = { v => 0, l => 0, R => 0 } if ( $type eq "status" );
2100        $opt = { A => 0, P => 0, C => 0, d => 0, f => 0, l => 0, R => 0, p => 0, k => 1, r => 1, D => 1, j => 1, I => 1, W => 1 } if ( $type eq "update" );
2101        $opt = { l => 0, R => 0, k => 1, D => 1, D => 1, r => 2 } if ( $type eq "diff" );
2102        $opt = { c => 0, R => 0, l => 0, f => 0, F => 1, m => 1, r => 1 } if ( $type eq "ci" );
2103        $opt = { k => 1, m => 1 } if ( $type eq "add" );
2104        $opt = { f => 0, l => 0, R => 0 } if ( $type eq "remove" );
2105        $opt = { l => 0, b => 0, h => 0, R => 0, t => 0, N => 0, S => 0, r => 1, d => 1, s => 1, w => 1 } if ( $type eq "log" );
2106
2107
2108        while ( scalar ( @{$state->{arguments}} ) > 0 )
2109        {
2110            my $arg = shift @{$state->{arguments}};
2111
2112            next if ( $arg eq "--" );
2113            next unless ( $arg =~ /\S/ );
2114
2115            # if the argument looks like a switch
2116            if ( $arg =~ /^-(\w)(.*)/ )
2117            {
2118                # if it's a switch that takes an argument
2119                if ( $opt->{$1} )
2120                {
2121                    # If this switch has already been provided
2122                    if ( $opt->{$1} > 1 and exists ( $state->{opt}{$1} ) )
2123                    {
2124                        $state->{opt}{$1} = [ $state->{opt}{$1} ];
2125                        if ( length($2) > 0 )
2126                        {
2127                            push @{$state->{opt}{$1}},$2;
2128                        } else {
2129                            push @{$state->{opt}{$1}}, shift @{$state->{arguments}};
2130                        }
2131                    } else {
2132                        # if there's extra data in the arg, use that as the argument for the switch
2133                        if ( length($2) > 0 )
2134                        {
2135                            $state->{opt}{$1} = $2;
2136                        } else {
2137                            $state->{opt}{$1} = shift @{$state->{arguments}};
2138                        }
2139                    }
2140                } else {
2141                    $state->{opt}{$1} = undef;
2142                }
2143            }
2144            else
2145            {
2146                push @{$state->{args}}, $arg;
2147            }
2148        }
2149    }
2150    else
2151    {
2152        my $mode = 0;
2153
2154        foreach my $value ( @{$state->{arguments}} )
2155        {
2156            if ( $value eq "--" )
2157            {
2158                $mode++;
2159                next;
2160            }
2161            push @{$state->{args}}, $value if ( $mode == 0 );
2162            push @{$state->{files}}, $value if ( $mode == 1 );
2163        }
2164    }
2165}
2166
2167# This method uses $state->{directory} to populate $state->{args} with a list of filenames
2168sub argsfromdir
2169{
2170    my $updater = shift;
2171
2172    $state->{args} = [] if ( scalar(@{$state->{args}}) == 1 and $state->{args}[0] eq "." );
2173
2174    return if ( scalar ( @{$state->{args}} ) > 1 );
2175
2176    my @gethead = @{$updater->gethead};
2177
2178    # push added files
2179    foreach my $file (keys %{$state->{entries}}) {
2180        if ( exists $state->{entries}{$file}{revision} &&
2181                $state->{entries}{$file}{revision} eq '0' )
2182        {
2183            push @gethead, { name => $file, filehash => 'added' };
2184        }
2185    }
2186
2187    if ( scalar(@{$state->{args}}) == 1 )
2188    {
2189        my $arg = $state->{args}[0];
2190        $arg .= $state->{prependdir} if ( defined ( $state->{prependdir} ) );
2191
2192        $log->info("Only one arg specified, checking for directory expansion on '$arg'");
2193
2194        foreach my $file ( @gethead )
2195        {
2196            next if ( $file->{filehash} eq "deleted" and not defined ( $state->{entries}{$file->{name}} ) );
2197            next unless ( $file->{name} =~ /^$arg\// or $file->{name} eq $arg  );
2198            push @{$state->{args}}, $file->{name};
2199        }
2200
2201        shift @{$state->{args}} if ( scalar(@{$state->{args}}) > 1 );
2202    } else {
2203        $log->info("Only one arg specified, populating file list automatically");
2204
2205        $state->{args} = [];
2206
2207        foreach my $file ( @gethead )
2208        {
2209            next if ( $file->{filehash} eq "deleted" and not defined ( $state->{entries}{$file->{name}} ) );
2210            next unless ( $file->{name} =~ s/^$state->{prependdir}// );
2211            push @{$state->{args}}, $file->{name};
2212        }
2213    }
2214}
2215
2216# This method cleans up the $state variable after a command that uses arguments has run
2217sub statecleanup
2218{
2219    $state->{files} = [];
2220    $state->{args} = [];
2221    $state->{arguments} = [];
2222    $state->{entries} = {};
2223}
2224
2225# Return working directory CVS revision "1.X" out
2226# of the the working directory "entries" state, for the given filename.
2227# This is prefixed with a dash if the file is scheduled for removal
2228# when it is committed.
2229sub revparse
2230{
2231    my $filename = shift;
2232
2233    return $state->{entries}{$filename}{revision};
2234}
2235
2236# This method takes a file hash and does a CVS "file transfer".  Its
2237# exact behaviour depends on a second, optional hash table argument:
2238# - If $options->{targetfile}, dump the contents to that file;
2239# - If $options->{print}, use M/MT to transmit the contents one line
2240#   at a time;
2241# - Otherwise, transmit the size of the file, followed by the file
2242#   contents.
2243sub transmitfile
2244{
2245    my $filehash = shift;
2246    my $options = shift;
2247
2248    if ( defined ( $filehash ) and $filehash eq "deleted" )
2249    {
2250        $log->warn("filehash is 'deleted'");
2251        return;
2252    }
2253
2254    die "Need filehash" unless ( defined ( $filehash ) and $filehash =~ /^[a-zA-Z0-9]{40}$/ );
2255
2256    my $type = `git cat-file -t $filehash`;
2257    chomp $type;
2258
2259    die ( "Invalid type '$type' (expected 'blob')" ) unless ( defined ( $type ) and $type eq "blob" );
2260
2261    my $size = `git cat-file -s $filehash`;
2262    chomp $size;
2263
2264    $log->debug("transmitfile($filehash) size=$size, type=$type");
2265
2266    if ( open my $fh, '-|', "git", "cat-file", "blob", $filehash )
2267    {
2268        if ( defined ( $options->{targetfile} ) )
2269        {
2270            my $targetfile = $options->{targetfile};
2271            open NEWFILE, ">", $targetfile or die("Couldn't open '$targetfile' for writing : $!");
2272            print NEWFILE $_ while ( <$fh> );
2273            close NEWFILE or die("Failed to write '$targetfile': $!");
2274        } elsif ( defined ( $options->{print} ) && $options->{print} ) {
2275            while ( <$fh> ) {
2276                if( /\n\z/ ) {
2277                    print 'M ', $_;
2278                } else {
2279                    print 'MT text ', $_, "\n";
2280                }
2281            }
2282        } else {
2283            print "$size\n";
2284            print while ( <$fh> );
2285        }
2286        close $fh or die ("Couldn't close filehandle for transmitfile(): $!");
2287    } else {
2288        die("Couldn't execute git-cat-file");
2289    }
2290}
2291
2292# This method takes a file name, and returns ( $dirpart, $filepart ) which
2293# refers to the directory portion and the file portion of the filename
2294# respectively
2295sub filenamesplit
2296{
2297    my $filename = shift;
2298    my $fixforlocaldir = shift;
2299
2300    my ( $filepart, $dirpart ) = ( $filename, "." );
2301    ( $filepart, $dirpart ) = ( $2, $1 ) if ( $filename =~ /(.*)\/(.*)/ );
2302    $dirpart .= "/";
2303
2304    if ( $fixforlocaldir )
2305    {
2306        $dirpart =~ s/^$state->{prependdir}//;
2307    }
2308
2309    return ( $filepart, $dirpart );
2310}
2311
2312# Cleanup various junk in filename (try to canonicalize it), and
2313# add prependdir to accomodate running CVS client from a
2314# subdirectory (so the output is relative to top directory of the project).
2315sub filecleanup
2316{
2317    my $filename = shift;
2318
2319    return undef unless(defined($filename));
2320    if ( $filename =~ /^\// )
2321    {
2322        print "E absolute filenames '$filename' not supported by server\n";
2323        return undef;
2324    }
2325
2326    if($filename eq ".")
2327    {
2328        $filename="";
2329    }
2330    $filename =~ s/^\.\///g;
2331    $filename =~ s%/+%/%g;
2332    $filename = $state->{prependdir} . $filename;
2333    $filename =~ s%/$%%;
2334    return $filename;
2335}
2336
2337# Remove prependdir from the path, so that is is relative to the directory
2338# the CVS client was started from, rather than the top of the project.
2339# Essentially the inverse of filecleanup().
2340sub remove_prependdir
2341{
2342    my($path) = @_;
2343    if(defined($state->{prependdir}) && $state->{prependdir} ne "")
2344    {
2345        my($pre)=$state->{prependdir};
2346        $pre=~s%/$%%;
2347        if(!($path=~s%^\Q$pre\E/?%%))
2348        {
2349            $log->fatal("internal error missing prependdir");
2350            die("internal error missing prependdir");
2351        }
2352    }
2353    return $path;
2354}
2355
2356sub validateGitDir
2357{
2358    if( !defined($state->{CVSROOT}) )
2359    {
2360        print "error 1 CVSROOT not specified\n";
2361        cleanupWorkTree();
2362        exit;
2363    }
2364    if( $ENV{GIT_DIR} ne ($state->{CVSROOT} . '/') )
2365    {
2366        print "error 1 Internally inconsistent CVSROOT\n";
2367        cleanupWorkTree();
2368        exit;
2369    }
2370}
2371
2372# Setup working directory in a work tree with the requested version
2373# loaded in the index.
2374sub setupWorkTree
2375{
2376    my ($ver) = @_;
2377
2378    validateGitDir();
2379
2380    if( ( defined($work->{state}) && $work->{state} != 1 ) ||
2381        defined($work->{tmpDir}) )
2382    {
2383        $log->warn("Bad work tree state management");
2384        print "error 1 Internal setup multiple work trees without cleanup\n";
2385        cleanupWorkTree();
2386        exit;
2387    }
2388
2389    $work->{workDir} = tempdir ( DIR => $TEMP_DIR );
2390
2391    if( !defined($work->{index}) )
2392    {
2393        (undef, $work->{index}) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
2394    }
2395
2396    chdir $work->{workDir} or
2397        die "Unable to chdir to $work->{workDir}\n";
2398
2399    $log->info("Setting up GIT_WORK_TREE as '.' in '$work->{workDir}', index file is '$work->{index}'");
2400
2401    $ENV{GIT_WORK_TREE} = ".";
2402    $ENV{GIT_INDEX_FILE} = $work->{index};
2403    $work->{state} = 2;
2404
2405    if($ver)
2406    {
2407        system("git","read-tree",$ver);
2408        unless ($? == 0)
2409        {
2410            $log->warn("Error running git-read-tree");
2411            die "Error running git-read-tree $ver in $work->{workDir} $!\n";
2412        }
2413    }
2414    # else # req_annotate reads tree for each file
2415}
2416
2417# Ensure current directory is in some kind of working directory,
2418# with a recent version loaded in the index.
2419sub ensureWorkTree
2420{
2421    if( defined($work->{tmpDir}) )
2422    {
2423        $log->warn("Bad work tree state management [ensureWorkTree()]");
2424        print "error 1 Internal setup multiple dirs without cleanup\n";
2425        cleanupWorkTree();
2426        exit;
2427    }
2428    if( $work->{state} )
2429    {
2430        return;
2431    }
2432
2433    validateGitDir();
2434
2435    if( !defined($work->{emptyDir}) )
2436    {
2437        $work->{emptyDir} = tempdir ( DIR => $TEMP_DIR, OPEN => 0);
2438    }
2439    chdir $work->{emptyDir} or
2440        die "Unable to chdir to $work->{emptyDir}\n";
2441
2442    my $ver = `git show-ref -s refs/heads/$state->{module}`;
2443    chomp $ver;
2444    if ($ver !~ /^[0-9a-f]{40}$/)
2445    {
2446        $log->warn("Error from git show-ref -s refs/head$state->{module}");
2447        print "error 1 cannot find the current HEAD of module";
2448        cleanupWorkTree();
2449        exit;
2450    }
2451
2452    if( !defined($work->{index}) )
2453    {
2454        (undef, $work->{index}) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
2455    }
2456
2457    $ENV{GIT_WORK_TREE} = ".";
2458    $ENV{GIT_INDEX_FILE} = $work->{index};
2459    $work->{state} = 1;
2460
2461    system("git","read-tree",$ver);
2462    unless ($? == 0)
2463    {
2464        die "Error running git-read-tree $ver $!\n";
2465    }
2466}
2467
2468# Cleanup working directory that is not needed any longer.
2469sub cleanupWorkTree
2470{
2471    if( ! $work->{state} )
2472    {
2473        return;
2474    }
2475
2476    chdir "/" or die "Unable to chdir '/'\n";
2477
2478    if( defined($work->{workDir}) )
2479    {
2480        rmtree( $work->{workDir} );
2481        undef $work->{workDir};
2482    }
2483    undef $work->{state};
2484}
2485
2486# Setup a temporary directory (not a working tree), typically for
2487# merging dirty state as in req_update.
2488sub setupTmpDir
2489{
2490    $work->{tmpDir} = tempdir ( DIR => $TEMP_DIR );
2491    chdir $work->{tmpDir} or die "Unable to chdir $work->{tmpDir}\n";
2492
2493    return $work->{tmpDir};
2494}
2495
2496# Clean up a previously setupTmpDir.  Restore previous work tree if
2497# appropriate.
2498sub cleanupTmpDir
2499{
2500    if ( !defined($work->{tmpDir}) )
2501    {
2502        $log->warn("cleanup tmpdir that has not been setup");
2503        die "Cleanup tmpDir that has not been setup\n";
2504    }
2505    if( defined($work->{state}) )
2506    {
2507        if( $work->{state} == 1 )
2508        {
2509            chdir $work->{emptyDir} or
2510                die "Unable to chdir to $work->{emptyDir}\n";
2511        }
2512        elsif( $work->{state} == 2 )
2513        {
2514            chdir $work->{workDir} or
2515                die "Unable to chdir to $work->{emptyDir}\n";
2516        }
2517        else
2518        {
2519            $log->warn("Inconsistent work dir state");
2520            die "Inconsistent work dir state\n";
2521        }
2522    }
2523    else
2524    {
2525        chdir "/" or die "Unable to chdir '/'\n";
2526    }
2527}
2528
2529# Given a path, this function returns a string containing the kopts
2530# that should go into that path's Entries line.  For example, a binary
2531# file should get -kb.
2532sub kopts_from_path
2533{
2534    my ($path, $srcType, $name) = @_;
2535
2536    if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and
2537         $cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i )
2538    {
2539        my ($val) = check_attr( "text", $path );
2540        if ( $val eq "unspecified" )
2541        {
2542            $val = check_attr( "crlf", $path );
2543        }
2544        if ( $val eq "unset" )
2545        {
2546            return "-kb"
2547        }
2548        elsif ( check_attr( "eol", $path ) ne "unspecified" ||
2549                $val eq "set" || $val eq "input" )
2550        {
2551            return "";
2552        }
2553        else
2554        {
2555            $log->info("Unrecognized check_attr crlf $path : $val");
2556        }
2557    }
2558
2559    if ( defined ( $cfg->{gitcvs}{allbinary} ) )
2560    {
2561        if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) )
2562        {
2563            return "-kb";
2564        }
2565        elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) )
2566        {
2567            if( is_binary($srcType,$name) )
2568            {
2569                $log->debug("... as binary");
2570                return "-kb";
2571            }
2572            else
2573            {
2574                $log->debug("... as text");
2575            }
2576        }
2577    }
2578    # Return "" to give no special treatment to any path
2579    return "";
2580}
2581
2582sub check_attr
2583{
2584    my ($attr,$path) = @_;
2585    ensureWorkTree();
2586    if ( open my $fh, '-|', "git", "check-attr", $attr, "--", $path )
2587    {
2588        my $val = <$fh>;
2589        close $fh;
2590        $val =~ s/.*: ([^:\r\n]*)\s*$/$1/;
2591        return $val;
2592    }
2593    else
2594    {
2595        return undef;
2596    }
2597}
2598
2599# This should have the same heuristics as convert.c:is_binary() and related.
2600# Note that the bare CR test is done by callers in convert.c.
2601sub is_binary
2602{
2603    my ($srcType,$name) = @_;
2604    $log->debug("is_binary($srcType,$name)");
2605
2606    # Minimize amount of interpreted code run in the inner per-character
2607    # loop for large files, by totalling each character value and
2608    # then analyzing the totals.
2609    my @counts;
2610    my $i;
2611    for($i=0;$i<256;$i++)
2612    {
2613        $counts[$i]=0;
2614    }
2615
2616    my $fh = open_blob_or_die($srcType,$name);
2617    my $line;
2618    while( defined($line=<$fh>) )
2619    {
2620        # Any '\0' and bare CR are considered binary.
2621        if( $line =~ /\0|(\r[^\n])/ )
2622        {
2623            close($fh);
2624            return 1;
2625        }
2626
2627        # Count up each character in the line:
2628        my $len=length($line);
2629        for($i=0;$i<$len;$i++)
2630        {
2631            $counts[ord(substr($line,$i,1))]++;
2632        }
2633    }
2634    close $fh;
2635
2636    # Don't count CR and LF as either printable/nonprintable
2637    $counts[ord("\n")]=0;
2638    $counts[ord("\r")]=0;
2639
2640    # Categorize individual character count into printable and nonprintable:
2641    my $printable=0;
2642    my $nonprintable=0;
2643    for($i=0;$i<256;$i++)
2644    {
2645        if( $i < 32 &&
2646            $i != ord("\b") &&
2647            $i != ord("\t") &&
2648            $i != 033 &&       # ESC
2649            $i != 014 )        # FF
2650        {
2651            $nonprintable+=$counts[$i];
2652        }
2653        elsif( $i==127 )  # DEL
2654        {
2655            $nonprintable+=$counts[$i];
2656        }
2657        else
2658        {
2659            $printable+=$counts[$i];
2660        }
2661    }
2662
2663    return ($printable >> 7) < $nonprintable;
2664}
2665
2666# Returns open file handle.  Possible invocations:
2667#  - open_blob_or_die("file",$filename);
2668#  - open_blob_or_die("sha1",$filehash);
2669sub open_blob_or_die
2670{
2671    my ($srcType,$name) = @_;
2672    my ($fh);
2673    if( $srcType eq "file" )
2674    {
2675        if( !open $fh,"<",$name )
2676        {
2677            $log->warn("Unable to open file $name: $!");
2678            die "Unable to open file $name: $!\n";
2679        }
2680    }
2681    elsif( $srcType eq "sha1" )
2682    {
2683        unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ )
2684        {
2685            $log->warn("Need filehash");
2686            die "Need filehash\n";
2687        }
2688
2689        my $type = `git cat-file -t $name`;
2690        chomp $type;
2691
2692        unless ( defined ( $type ) and $type eq "blob" )
2693        {
2694            $log->warn("Invalid type '$type' for '$name'");
2695            die ( "Invalid type '$type' (expected 'blob')" )
2696        }
2697
2698        my $size = `git cat-file -s $name`;
2699        chomp $size;
2700
2701        $log->debug("open_blob_or_die($name) size=$size, type=$type");
2702
2703        unless( open $fh, '-|', "git", "cat-file", "blob", $name )
2704        {
2705            $log->warn("Unable to open sha1 $name");
2706            die "Unable to open sha1 $name\n";
2707        }
2708    }
2709    else
2710    {
2711        $log->warn("Unknown type of blob source: $srcType");
2712        die "Unknown type of blob source: $srcType\n";
2713    }
2714    return $fh;
2715}
2716
2717# Generate a CVS author name from Git author information, by taking the local
2718# part of the email address and replacing characters not in the Portable
2719# Filename Character Set (see IEEE Std 1003.1-2001, 3.276) by underscores. CVS
2720# Login names are Unix login names, which should be restricted to this
2721# character set.
2722sub cvs_author
2723{
2724    my $author_line = shift;
2725    (my $author) = $author_line =~ /<([^@>]*)/;
2726
2727    $author =~ s/[^-a-zA-Z0-9_.]/_/g;
2728    $author =~ s/^-/_/;
2729
2730    $author;
2731}
2732
2733
2734sub descramble
2735{
2736    # This table is from src/scramble.c in the CVS source
2737    my @SHIFTS = (
2738        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
2739        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2740        114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
2741        111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
2742        41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
2743        125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
2744        36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
2745        58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
2746        225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
2747        199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
2748        174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
2749        207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
2750        192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
2751        227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
2752        182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
2753        243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
2754    );
2755    my ($str) = @_;
2756
2757    # This should never happen, the same password format (A) has been
2758    # used by CVS since the beginning of time
2759    {
2760        my $fmt = substr($str, 0, 1);
2761        die "invalid password format `$fmt'" unless $fmt eq 'A';
2762    }
2763
2764    my @str = unpack "C*", substr($str, 1);
2765    my $ret = join '', map { chr $SHIFTS[$_] } @str;
2766    return $ret;
2767}
2768
2769
2770package GITCVS::log;
2771
2772####
2773#### Copyright The Open University UK - 2006.
2774####
2775#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
2776####          Martin Langhoff <martin@laptop.org>
2777####
2778####
2779
2780use strict;
2781use warnings;
2782
2783=head1 NAME
2784
2785GITCVS::log
2786
2787=head1 DESCRIPTION
2788
2789This module provides very crude logging with a similar interface to
2790Log::Log4perl
2791
2792=head1 METHODS
2793
2794=cut
2795
2796=head2 new
2797
2798Creates a new log object, optionally you can specify a filename here to
2799indicate the file to log to. If no log file is specified, you can specify one
2800later with method setfile, or indicate you no longer want logging with method
2801nofile.
2802
2803Until one of these methods is called, all log calls will buffer messages ready
2804to write out.
2805
2806=cut
2807sub new
2808{
2809    my $class = shift;
2810    my $filename = shift;
2811
2812    my $self = {};
2813
2814    bless $self, $class;
2815
2816    if ( defined ( $filename ) )
2817    {
2818        open $self->{fh}, ">>", $filename or die("Couldn't open '$filename' for writing : $!");
2819    }
2820
2821    return $self;
2822}
2823
2824=head2 setfile
2825
2826This methods takes a filename, and attempts to open that file as the log file.
2827If successful, all buffered data is written out to the file, and any further
2828logging is written directly to the file.
2829
2830=cut
2831sub setfile
2832{
2833    my $self = shift;
2834    my $filename = shift;
2835
2836    if ( defined ( $filename ) )
2837    {
2838        open $self->{fh}, ">>", $filename or die("Couldn't open '$filename' for writing : $!");
2839    }
2840
2841    return unless ( defined ( $self->{buffer} ) and ref $self->{buffer} eq "ARRAY" );
2842
2843    while ( my $line = shift @{$self->{buffer}} )
2844    {
2845        print {$self->{fh}} $line;
2846    }
2847}
2848
2849=head2 nofile
2850
2851This method indicates no logging is going to be used. It flushes any entries in
2852the internal buffer, and sets a flag to ensure no further data is put there.
2853
2854=cut
2855sub nofile
2856{
2857    my $self = shift;
2858
2859    $self->{nolog} = 1;
2860
2861    return unless ( defined ( $self->{buffer} ) and ref $self->{buffer} eq "ARRAY" );
2862
2863    $self->{buffer} = [];
2864}
2865
2866=head2 _logopen
2867
2868Internal method. Returns true if the log file is open, false otherwise.
2869
2870=cut
2871sub _logopen
2872{
2873    my $self = shift;
2874
2875    return 1 if ( defined ( $self->{fh} ) and ref $self->{fh} eq "GLOB" );
2876    return 0;
2877}
2878
2879=head2 debug info warn fatal
2880
2881These four methods are wrappers to _log. They provide the actual interface for
2882logging data.
2883
2884=cut
2885sub debug { my $self = shift; $self->_log("debug", @_); }
2886sub info  { my $self = shift; $self->_log("info" , @_); }
2887sub warn  { my $self = shift; $self->_log("warn" , @_); }
2888sub fatal { my $self = shift; $self->_log("fatal", @_); }
2889
2890=head2 _log
2891
2892This is an internal method called by the logging functions. It generates a
2893timestamp and pushes the logged line either to file, or internal buffer.
2894
2895=cut
2896sub _log
2897{
2898    my $self = shift;
2899    my $level = shift;
2900
2901    return if ( $self->{nolog} );
2902
2903    my @time = localtime;
2904    my $timestring = sprintf("%4d-%02d-%02d %02d:%02d:%02d : %-5s",
2905        $time[5] + 1900,
2906        $time[4] + 1,
2907        $time[3],
2908        $time[2],
2909        $time[1],
2910        $time[0],
2911        uc $level,
2912    );
2913
2914    if ( $self->_logopen )
2915    {
2916        print {$self->{fh}} $timestring . " - " . join(" ",@_) . "\n";
2917    } else {
2918        push @{$self->{buffer}}, $timestring . " - " . join(" ",@_) . "\n";
2919    }
2920}
2921
2922=head2 DESTROY
2923
2924This method simply closes the file handle if one is open
2925
2926=cut
2927sub DESTROY
2928{
2929    my $self = shift;
2930
2931    if ( $self->_logopen )
2932    {
2933        close $self->{fh};
2934    }
2935}
2936
2937package GITCVS::updater;
2938
2939####
2940#### Copyright The Open University UK - 2006.
2941####
2942#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
2943####          Martin Langhoff <martin@laptop.org>
2944####
2945####
2946
2947use strict;
2948use warnings;
2949use DBI;
2950
2951=head1 METHODS
2952
2953=cut
2954
2955=head2 new
2956
2957=cut
2958sub new
2959{
2960    my $class = shift;
2961    my $config = shift;
2962    my $module = shift;
2963    my $log = shift;
2964
2965    die "Need to specify a git repository" unless ( defined($config) and -d $config );
2966    die "Need to specify a module" unless ( defined($module) );
2967
2968    $class = ref($class) || $class;
2969
2970    my $self = {};
2971
2972    bless $self, $class;
2973
2974    $self->{valid_tables} = {'revision' => 1,
2975                             'revision_ix1' => 1,
2976                             'revision_ix2' => 1,
2977                             'head' => 1,
2978                             'head_ix1' => 1,
2979                             'properties' => 1,
2980                             'commitmsgs' => 1};
2981
2982    $self->{module} = $module;
2983    $self->{git_path} = $config . "/";
2984
2985    $self->{log} = $log;
2986
2987    die "Git repo '$self->{git_path}' doesn't exist" unless ( -d $self->{git_path} );
2988
2989    # Stores full sha1's for various branch/tag names, abbreviations, etc:
2990    $self->{commitRefCache} = {};
2991
2992    $self->{dbdriver} = $cfg->{gitcvs}{$state->{method}}{dbdriver} ||
2993        $cfg->{gitcvs}{dbdriver} || "SQLite";
2994    $self->{dbname} = $cfg->{gitcvs}{$state->{method}}{dbname} ||
2995        $cfg->{gitcvs}{dbname} || "%Ggitcvs.%m.sqlite";
2996    $self->{dbuser} = $cfg->{gitcvs}{$state->{method}}{dbuser} ||
2997        $cfg->{gitcvs}{dbuser} || "";
2998    $self->{dbpass} = $cfg->{gitcvs}{$state->{method}}{dbpass} ||
2999        $cfg->{gitcvs}{dbpass} || "";
3000    $self->{dbtablenameprefix} = $cfg->{gitcvs}{$state->{method}}{dbtablenameprefix} ||
3001        $cfg->{gitcvs}{dbtablenameprefix} || "";
3002    my %mapping = ( m => $module,
3003                    a => $state->{method},
3004                    u => getlogin || getpwuid($<) || $<,
3005                    G => $self->{git_path},
3006                    g => mangle_dirname($self->{git_path}),
3007                    );
3008    $self->{dbname} =~ s/%([mauGg])/$mapping{$1}/eg;
3009    $self->{dbuser} =~ s/%([mauGg])/$mapping{$1}/eg;
3010    $self->{dbtablenameprefix} =~ s/%([mauGg])/$mapping{$1}/eg;
3011    $self->{dbtablenameprefix} = mangle_tablename($self->{dbtablenameprefix});
3012
3013    die "Invalid char ':' in dbdriver" if $self->{dbdriver} =~ /:/;
3014    die "Invalid char ';' in dbname" if $self->{dbname} =~ /;/;
3015    $self->{dbh} = DBI->connect("dbi:$self->{dbdriver}:dbname=$self->{dbname}",
3016                                $self->{dbuser},
3017                                $self->{dbpass});
3018    die "Error connecting to database\n" unless defined $self->{dbh};
3019
3020    $self->{tables} = {};
3021    foreach my $table ( keys %{$self->{dbh}->table_info(undef,undef,undef,'TABLE')->fetchall_hashref('TABLE_NAME')} )
3022    {
3023        $self->{tables}{$table} = 1;
3024    }
3025
3026    # Construct the revision table if required
3027    # The revision table stores an entry for each file, each time that file
3028    # changes.
3029    #   numberOfRecords = O( numCommits * averageNumChangedFilesPerCommit )
3030    # This is not sufficient to support "-r {commithash}" for any
3031    # files except files that were modified by that commit (also,
3032    # some places in the code ignore/effectively strip out -r in
3033    # some cases, before it gets passed to getmeta()).
3034    # The "filehash" field typically has a git blob hash, but can also
3035    # be set to "dead" to indicate that the given version of the file
3036    # should not exist in the sandbox.
3037    unless ( $self->{tables}{$self->tablename("revision")} )
3038    {
3039        my $tablename = $self->tablename("revision");
3040        my $ix1name = $self->tablename("revision_ix1");
3041        my $ix2name = $self->tablename("revision_ix2");
3042        $self->{dbh}->do("
3043            CREATE TABLE $tablename (
3044                name       TEXT NOT NULL,
3045                revision   INTEGER NOT NULL,
3046                filehash   TEXT NOT NULL,
3047                commithash TEXT NOT NULL,
3048                author     TEXT NOT NULL,
3049                modified   TEXT NOT NULL,
3050                mode       TEXT NOT NULL
3051            )
3052        ");
3053        $self->{dbh}->do("
3054            CREATE INDEX $ix1name
3055            ON $tablename (name,revision)
3056        ");
3057        $self->{dbh}->do("
3058            CREATE INDEX $ix2name
3059            ON $tablename (name,commithash)
3060        ");
3061    }
3062
3063    # Construct the head table if required
3064    # The head table (along with the "last_commit" entry in the property
3065    # table) is the persisted working state of the "sub update" subroutine.
3066    # All of it's data is read entirely first, and completely recreated
3067    # last, every time "sub update" runs.
3068    # This is also used by "sub getmeta" when it is asked for the latest
3069    # version of a file (as opposed to some specific version).
3070    # Another way of thinking about it is as a single slice out of
3071    # "revisions", giving just the most recent revision information for
3072    # each file.
3073    unless ( $self->{tables}{$self->tablename("head")} )
3074    {
3075        my $tablename = $self->tablename("head");
3076        my $ix1name = $self->tablename("head_ix1");
3077        $self->{dbh}->do("
3078            CREATE TABLE $tablename (
3079                name       TEXT NOT NULL,
3080                revision   INTEGER NOT NULL,
3081                filehash   TEXT NOT NULL,
3082                commithash TEXT NOT NULL,
3083                author     TEXT NOT NULL,
3084                modified   TEXT NOT NULL,
3085                mode       TEXT NOT NULL
3086            )
3087        ");
3088        $self->{dbh}->do("
3089            CREATE INDEX $ix1name
3090            ON $tablename (name)
3091        ");
3092    }
3093
3094    # Construct the properties table if required
3095    #  - "last_commit" - Used by "sub update".
3096    unless ( $self->{tables}{$self->tablename("properties")} )
3097    {
3098        my $tablename = $self->tablename("properties");
3099        $self->{dbh}->do("
3100            CREATE TABLE $tablename (
3101                key        TEXT NOT NULL PRIMARY KEY,
3102                value      TEXT
3103            )
3104        ");
3105    }
3106
3107    # Construct the commitmsgs table if required
3108    # The commitmsgs table is only used for merge commits, since
3109    # "sub update" will only keep one branch of parents.  Shortlogs
3110    # for ignored commits (i.e. not on the chosen branch) will be used
3111    # to construct a replacement "collapsed" merge commit message,
3112    # which will be stored in this table.  See also "sub commitmessage".
3113    unless ( $self->{tables}{$self->tablename("commitmsgs")} )
3114    {
3115        my $tablename = $self->tablename("commitmsgs");
3116        $self->{dbh}->do("
3117            CREATE TABLE $tablename (
3118                key        TEXT NOT NULL PRIMARY KEY,
3119                value      TEXT
3120            )
3121        ");
3122    }
3123
3124    return $self;
3125}
3126
3127=head2 tablename
3128
3129=cut
3130sub tablename
3131{
3132    my $self = shift;
3133    my $name = shift;
3134
3135    if (exists $self->{valid_tables}{$name}) {
3136        return $self->{dbtablenameprefix} . $name;
3137    } else {
3138        return undef;
3139    }
3140}
3141
3142=head2 update
3143
3144Bring the database up to date with the latest changes from
3145the git repository.
3146
3147Internal working state is read out of the "head" table and the
3148"last_commit" property, then it updates "revisions" based on that, and
3149finally it writes the new internal state back to the "head" table
3150so it can be used as a starting point the next time update is called.
3151
3152=cut
3153sub update
3154{
3155    my $self = shift;
3156
3157    # first lets get the commit list
3158    $ENV{GIT_DIR} = $self->{git_path};
3159
3160    my $commitsha1 = `git rev-parse $self->{module}`;
3161    chomp $commitsha1;
3162
3163    my $commitinfo = `git cat-file commit $self->{module} 2>&1`;
3164    unless ( $commitinfo =~ /tree\s+[a-zA-Z0-9]{40}/ )
3165    {
3166        die("Invalid module '$self->{module}'");
3167    }
3168
3169
3170    my $git_log;
3171    my $lastcommit = $self->_get_prop("last_commit");
3172
3173    if (defined $lastcommit && $lastcommit eq $commitsha1) { # up-to-date
3174         return 1;
3175    }
3176
3177    # Start exclusive lock here...
3178    $self->{dbh}->begin_work() or die "Cannot lock database for BEGIN";
3179
3180    # TODO: log processing is memory bound
3181    # if we can parse into a 2nd file that is in reverse order
3182    # we can probably do something really efficient
3183    my @git_log_params = ('--pretty', '--parents', '--topo-order');
3184
3185    if (defined $lastcommit) {
3186        push @git_log_params, "$lastcommit..$self->{module}";
3187    } else {
3188        push @git_log_params, $self->{module};
3189    }
3190    # git-rev-list is the backend / plumbing version of git-log
3191    open(my $gitLogPipe, '-|', 'git', 'rev-list', @git_log_params)
3192                or die "Cannot call git-rev-list: $!";
3193    my @commits=readCommits($gitLogPipe);
3194    close $gitLogPipe;
3195
3196    # Now all the commits are in the @commits bucket
3197    # ordered by time DESC. for each commit that needs processing,
3198    # determine whether it's following the last head we've seen or if
3199    # it's on its own branch, grab a file list, and add whatever's changed
3200    # NOTE: $lastcommit refers to the last commit from previous run
3201    #       $lastpicked is the last commit we picked in this run
3202    my $lastpicked;
3203    my $head = {};
3204    if (defined $lastcommit) {
3205        $lastpicked = $lastcommit;
3206    }
3207
3208    my $committotal = scalar(@commits);
3209    my $commitcount = 0;
3210
3211    # Load the head table into $head (for cached lookups during the update process)
3212    foreach my $file ( @{$self->gethead(1)} )
3213    {
3214        $head->{$file->{name}} = $file;
3215    }
3216
3217    foreach my $commit ( @commits )
3218    {
3219        $self->{log}->debug("GITCVS::updater - Processing commit $commit->{hash} (" . (++$commitcount) . " of $committotal)");
3220        if (defined $lastpicked)
3221        {
3222            if (!in_array($lastpicked, @{$commit->{parents}}))
3223            {
3224                # skip, we'll see this delta
3225                # as part of a merge later
3226                # warn "skipping off-track  $commit->{hash}\n";
3227                next;
3228            } elsif (@{$commit->{parents}} > 1) {
3229                # it is a merge commit, for each parent that is
3230                # not $lastpicked (not given a CVS revision number),
3231                # see if we can get a log
3232                # from the merge-base to that parent to put it
3233                # in the message as a merge summary.
3234                my @parents = @{$commit->{parents}};
3235                foreach my $parent (@parents) {
3236                    if ($parent eq $lastpicked) {
3237                        next;
3238                    }
3239                    # git-merge-base can potentially (but rarely) throw
3240                    # several candidate merge bases. let's assume
3241                    # that the first one is the best one.
3242                    my $base = eval {
3243                            safe_pipe_capture('git', 'merge-base',
3244                                                 $lastpicked, $parent);
3245                    };
3246                    # The two branches may not be related at all,
3247                    # in which case merge base simply fails to find
3248                    # any, but that's Ok.
3249                    next if ($@);
3250
3251                    chomp $base;
3252                    if ($base) {
3253                        my @merged;
3254                        # print "want to log between  $base $parent \n";
3255                        open(GITLOG, '-|', 'git', 'log', '--pretty=medium', "$base..$parent")
3256                          or die "Cannot call git-log: $!";
3257                        my $mergedhash;
3258                        while (<GITLOG>) {
3259                            chomp;
3260                            if (!defined $mergedhash) {
3261                                if (m/^commit\s+(.+)$/) {
3262                                    $mergedhash = $1;
3263                                } else {
3264                                    next;
3265                                }
3266                            } else {
3267                                # grab the first line that looks non-rfc822
3268                                # aka has content after leading space
3269                                if (m/^\s+(\S.*)$/) {
3270                                    my $title = $1;
3271                                    $title = substr($title,0,100); # truncate
3272                                    unshift @merged, "$mergedhash $title";
3273                                    undef $mergedhash;
3274                                }
3275                            }
3276                        }
3277                        close GITLOG;
3278                        if (@merged) {
3279                            $commit->{mergemsg} = $commit->{message};
3280                            $commit->{mergemsg} .= "\nSummary of merged commits:\n\n";
3281                            foreach my $summary (@merged) {
3282                                $commit->{mergemsg} .= "\t$summary\n";
3283                            }
3284                            $commit->{mergemsg} .= "\n\n";
3285                            # print "Message for $commit->{hash} \n$commit->{mergemsg}";
3286                        }
3287                    }
3288                }
3289            }
3290        }
3291
3292        # convert the date to CVS-happy format
3293        my $cvsDate = convertToCvsDate($commit->{date});
3294
3295        if ( defined ( $lastpicked ) )
3296        {
3297            my $filepipe = open(FILELIST, '-|', 'git', 'diff-tree', '-z', '-r', $lastpicked, $commit->{hash}) or die("Cannot call git-diff-tree : $!");
3298            local ($/) = "\0";
3299            while ( <FILELIST> )
3300            {
3301                chomp;
3302                unless ( /^:\d{6}\s+([0-7]{6})\s+[a-f0-9]{40}\s+([a-f0-9]{40})\s+(\w)$/o )
3303                {
3304                    die("Couldn't process git-diff-tree line : $_");
3305                }
3306                my ($mode, $hash, $change) = ($1, $2, $3);
3307                my $name = <FILELIST>;
3308                chomp($name);
3309
3310                # $log->debug("File mode=$mode, hash=$hash, change=$change, name=$name");
3311
3312                my $dbMode = convertToDbMode($mode);
3313
3314                if ( $change eq "D" )
3315                {
3316                    #$log->debug("DELETE   $name");
3317                    $head->{$name} = {
3318                        name => $name,
3319                        revision => $head->{$name}{revision} + 1,
3320                        filehash => "deleted",
3321                        commithash => $commit->{hash},
3322                        modified => $cvsDate,
3323                        author => $commit->{author},
3324                        mode => $dbMode,
3325                    };
3326                    $self->insert_rev($name, $head->{$name}{revision}, $hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3327                }
3328                elsif ( $change eq "M" || $change eq "T" )
3329                {
3330                    #$log->debug("MODIFIED $name");
3331                    $head->{$name} = {
3332                        name => $name,
3333                        revision => $head->{$name}{revision} + 1,
3334                        filehash => $hash,
3335                        commithash => $commit->{hash},
3336                        modified => $cvsDate,
3337                        author => $commit->{author},
3338                        mode => $dbMode,
3339                    };
3340                    $self->insert_rev($name, $head->{$name}{revision}, $hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3341                }
3342                elsif ( $change eq "A" )
3343                {
3344                    #$log->debug("ADDED    $name");
3345                    $head->{$name} = {
3346                        name => $name,
3347                        revision => $head->{$name}{revision} ? $head->{$name}{revision}+1 : 1,
3348                        filehash => $hash,
3349                        commithash => $commit->{hash},
3350                        modified => $cvsDate,
3351                        author => $commit->{author},
3352                        mode => $dbMode,
3353                    };
3354                    $self->insert_rev($name, $head->{$name}{revision}, $hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3355                }
3356                else
3357                {
3358                    $log->warn("UNKNOWN FILE CHANGE mode=$mode, hash=$hash, change=$change, name=$name");
3359                    die;
3360                }
3361            }
3362            close FILELIST;
3363        } else {
3364            # this is used to detect files removed from the repo
3365            my $seen_files = {};
3366
3367            my $filepipe = open(FILELIST, '-|', 'git', 'ls-tree', '-z', '-r', $commit->{hash}) or die("Cannot call git-ls-tree : $!");
3368            local $/ = "\0";
3369            while ( <FILELIST> )
3370            {
3371                chomp;
3372                unless ( /^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3373                {
3374                    die("Couldn't process git-ls-tree line : $_");
3375                }
3376
3377                my ( $mode, $git_type, $git_hash, $git_filename ) = ( $1, $2, $3, $4 );
3378
3379                $seen_files->{$git_filename} = 1;
3380
3381                my ( $oldhash, $oldrevision, $oldmode ) = (
3382                    $head->{$git_filename}{filehash},
3383                    $head->{$git_filename}{revision},
3384                    $head->{$git_filename}{mode}
3385                );
3386
3387                my $dbMode = convertToDbMode($mode);
3388
3389                # unless the file exists with the same hash, we need to update it ...
3390                unless ( defined($oldhash) and $oldhash eq $git_hash and defined($oldmode) and $oldmode eq $dbMode )
3391                {
3392                    my $newrevision = ( $oldrevision or 0 ) + 1;
3393
3394                    $head->{$git_filename} = {
3395                        name => $git_filename,
3396                        revision => $newrevision,
3397                        filehash => $git_hash,
3398                        commithash => $commit->{hash},
3399                        modified => $cvsDate,
3400                        author => $commit->{author},
3401                        mode => $dbMode,
3402                    };
3403
3404
3405                    $self->insert_rev($git_filename, $newrevision, $git_hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3406                }
3407            }
3408            close FILELIST;
3409
3410            # Detect deleted files
3411            foreach my $file ( keys %$head )
3412            {
3413                unless ( exists $seen_files->{$file} or $head->{$file}{filehash} eq "deleted" )
3414                {
3415                    $head->{$file}{revision}++;
3416                    $head->{$file}{filehash} = "deleted";
3417                    $head->{$file}{commithash} = $commit->{hash};
3418                    $head->{$file}{modified} = $cvsDate;
3419                    $head->{$file}{author} = $commit->{author};
3420
3421                    $self->insert_rev($file, $head->{$file}{revision}, $head->{$file}{filehash}, $commit->{hash}, $cvsDate, $commit->{author}, $head->{$file}{mode});
3422                }
3423            }
3424            # END : "Detect deleted files"
3425        }
3426
3427
3428        if (exists $commit->{mergemsg})
3429        {
3430            $self->insert_mergelog($commit->{hash}, $commit->{mergemsg});
3431        }
3432
3433        $lastpicked = $commit->{hash};
3434
3435        $self->_set_prop("last_commit", $commit->{hash});
3436    }
3437
3438    $self->delete_head();
3439    foreach my $file ( keys %$head )
3440    {
3441        $self->insert_head(
3442            $file,
3443            $head->{$file}{revision},
3444            $head->{$file}{filehash},
3445            $head->{$file}{commithash},
3446            $head->{$file}{modified},
3447            $head->{$file}{author},
3448            $head->{$file}{mode},
3449        );
3450    }
3451    # invalidate the gethead cache
3452    $self->clearCommitRefCaches();
3453
3454
3455    # Ending exclusive lock here
3456    $self->{dbh}->commit() or die "Failed to commit changes to SQLite";
3457}
3458
3459sub readCommits
3460{
3461    my $pipeHandle = shift;
3462    my @commits;
3463
3464    my %commit = ();
3465
3466    while ( <$pipeHandle> )
3467    {
3468        chomp;
3469        if (m/^commit\s+(.*)$/) {
3470            # on ^commit lines put the just seen commit in the stack
3471            # and prime things for the next one
3472            if (keys %commit) {
3473                my %copy = %commit;
3474                unshift @commits, \%copy;
3475                %commit = ();
3476            }
3477            my @parents = split(m/\s+/, $1);
3478            $commit{hash} = shift @parents;
3479            $commit{parents} = \@parents;
3480        } elsif (m/^(\w+?):\s+(.*)$/ && !exists($commit{message})) {
3481            # on rfc822-like lines seen before we see any message,
3482            # lowercase the entry and put it in the hash as key-value
3483            $commit{lc($1)} = $2;
3484        } else {
3485            # message lines - skip initial empty line
3486            # and trim whitespace
3487            if (!exists($commit{message}) && m/^\s*$/) {
3488                # define it to mark the end of headers
3489                $commit{message} = '';
3490                next;
3491            }
3492            s/^\s+//; s/\s+$//; # trim ws
3493            $commit{message} .= $_ . "\n";
3494        }
3495    }
3496
3497    unshift @commits, \%commit if ( keys %commit );
3498
3499    return @commits;
3500}
3501
3502sub convertToCvsDate
3503{
3504    my $date = shift;
3505    # Convert from: "git rev-list --pretty" formatted date
3506    # Convert to: "the format specified by RFC822 as modified by RFC1123."
3507    # Example: 26 May 1997 13:01:40 -0400
3508    if( $date =~ /^\w+\s+(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+(\d+)\s+([+-]\d+)$/ )
3509    {
3510        $date = "$2 $1 $4 $3 $5";
3511    }
3512
3513    return $date;
3514}
3515
3516sub convertToDbMode
3517{
3518    my $mode = shift;
3519
3520    # NOTE: The CVS protocol uses a string similar "u=rw,g=rw,o=rw",
3521    #  but the database "mode" column historically (and currently)
3522    #  only stores the "rw" (for user) part of the string.
3523    #    FUTURE: It might make more sense to persist the raw
3524    #  octal mode (or perhaps the final full CVS form) instead of
3525    #  this half-converted form, but it isn't currently worth the
3526    #  backwards compatibility headaches.
3527
3528    $mode=~/^\d\d(\d)\d{3}$/;
3529    my $userBits=$1;
3530
3531    my $dbMode = "";
3532    $dbMode .= "r" if ( $userBits & 4 );
3533    $dbMode .= "w" if ( $userBits & 2 );
3534    $dbMode .= "x" if ( $userBits & 1 );
3535    $dbMode = "rw" if ( $dbMode eq "" );
3536
3537    return $dbMode;
3538}
3539
3540sub insert_rev
3541{
3542    my $self = shift;
3543    my $name = shift;
3544    my $revision = shift;
3545    my $filehash = shift;
3546    my $commithash = shift;
3547    my $modified = shift;
3548    my $author = shift;
3549    my $mode = shift;
3550    my $tablename = $self->tablename("revision");
3551
3552    my $insert_rev = $self->{dbh}->prepare_cached("INSERT INTO $tablename (name, revision, filehash, commithash, modified, author, mode) VALUES (?,?,?,?,?,?,?)",{},1);
3553    $insert_rev->execute($name, $revision, $filehash, $commithash, $modified, $author, $mode);
3554}
3555
3556sub insert_mergelog
3557{
3558    my $self = shift;
3559    my $key = shift;
3560    my $value = shift;
3561    my $tablename = $self->tablename("commitmsgs");
3562
3563    my $insert_mergelog = $self->{dbh}->prepare_cached("INSERT INTO $tablename (key, value) VALUES (?,?)",{},1);
3564    $insert_mergelog->execute($key, $value);
3565}
3566
3567sub delete_head
3568{
3569    my $self = shift;
3570    my $tablename = $self->tablename("head");
3571
3572    my $delete_head = $self->{dbh}->prepare_cached("DELETE FROM $tablename",{},1);
3573    $delete_head->execute();
3574}
3575
3576sub insert_head
3577{
3578    my $self = shift;
3579    my $name = shift;
3580    my $revision = shift;
3581    my $filehash = shift;
3582    my $commithash = shift;
3583    my $modified = shift;
3584    my $author = shift;
3585    my $mode = shift;
3586    my $tablename = $self->tablename("head");
3587
3588    my $insert_head = $self->{dbh}->prepare_cached("INSERT INTO $tablename (name, revision, filehash, commithash, modified, author, mode) VALUES (?,?,?,?,?,?,?)",{},1);
3589    $insert_head->execute($name, $revision, $filehash, $commithash, $modified, $author, $mode);
3590}
3591
3592sub _get_prop
3593{
3594    my $self = shift;
3595    my $key = shift;
3596    my $tablename = $self->tablename("properties");
3597
3598    my $db_query = $self->{dbh}->prepare_cached("SELECT value FROM $tablename WHERE key=?",{},1);
3599    $db_query->execute($key);
3600    my ( $value ) = $db_query->fetchrow_array;
3601
3602    return $value;
3603}
3604
3605sub _set_prop
3606{
3607    my $self = shift;
3608    my $key = shift;
3609    my $value = shift;
3610    my $tablename = $self->tablename("properties");
3611
3612    my $db_query = $self->{dbh}->prepare_cached("UPDATE $tablename SET value=? WHERE key=?",{},1);
3613    $db_query->execute($value, $key);
3614
3615    unless ( $db_query->rows )
3616    {
3617        $db_query = $self->{dbh}->prepare_cached("INSERT INTO $tablename (key, value) VALUES (?,?)",{},1);
3618        $db_query->execute($key, $value);
3619    }
3620
3621    return $value;
3622}
3623
3624=head2 gethead
3625
3626=cut
3627
3628sub gethead
3629{
3630    my $self = shift;
3631    my $intRev = shift;
3632    my $tablename = $self->tablename("head");
3633
3634    return $self->{gethead_cache} if ( defined ( $self->{gethead_cache} ) );
3635
3636    my $db_query = $self->{dbh}->prepare_cached("SELECT name, filehash, mode, revision, modified, commithash, author FROM $tablename ORDER BY name ASC",{},1);
3637    $db_query->execute();
3638
3639    my $tree = [];
3640    while ( my $file = $db_query->fetchrow_hashref )
3641    {
3642        if(!$intRev)
3643        {
3644            $file->{revision} = "1.$file->{revision}"
3645        }
3646        push @$tree, $file;
3647    }
3648
3649    $self->{gethead_cache} = $tree;
3650
3651    return $tree;
3652}
3653
3654=head2 getAnyHead
3655
3656Returns a reference to an array of getmeta structures, one
3657per file in the specified tree hash.
3658
3659=cut
3660
3661sub getAnyHead
3662{
3663    my ($self,$hash) = @_;
3664
3665    if(!defined($hash))
3666    {
3667        return $self->gethead();
3668    }
3669
3670    my @files;
3671    {
3672        open(my $filePipe, '-|', 'git', 'ls-tree', '-z', '-r', $hash)
3673                or die("Cannot call git-ls-tree : $!");
3674        local $/ = "\0";
3675        @files=<$filePipe>;
3676        close $filePipe;
3677    }
3678
3679    my $tree=[];
3680    my($line);
3681    foreach $line (@files)
3682    {
3683        $line=~s/\0$//;
3684        unless ( $line=~/^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3685        {
3686            die("Couldn't process git-ls-tree line : $_");
3687        }
3688
3689        my($mode, $git_type, $git_hash, $git_filename) = ($1, $2, $3, $4);
3690        push @$tree, $self->getMetaFromCommithash($git_filename,$hash);
3691    }
3692
3693    return $tree;
3694}
3695
3696=head2 getRevisionDirMap
3697
3698A "revision dir map" contains all the plain-file filenames associated
3699with a particular revision (treeish), organized by directory:
3700
3701  $type = $out->{$dir}{$fullName}
3702
3703The type of each is "F" (for ordinary file) or "D" (for directory,
3704for which the map $out->{$fullName} will also exist).
3705
3706=cut
3707
3708sub getRevisionDirMap
3709{
3710    my ($self,$ver)=@_;
3711
3712    if(!defined($self->{revisionDirMapCache}))
3713    {
3714        $self->{revisionDirMapCache}={};
3715    }
3716
3717        # Get file list (previously cached results are dependent on HEAD,
3718        # but are early in each case):
3719    my $cacheKey;
3720    my (@fileList);
3721    if( !defined($ver) || $ver eq "" )
3722    {
3723        $cacheKey="";
3724        if( defined($self->{revisionDirMapCache}{$cacheKey}) )
3725        {
3726            return $self->{revisionDirMapCache}{$cacheKey};
3727        }
3728
3729        my @head = @{$self->gethead()};
3730        foreach my $file ( @head )
3731        {
3732            next if ( $file->{filehash} eq "deleted" );
3733
3734            push @fileList,$file->{name};
3735        }
3736    }
3737    else
3738    {
3739        my ($hash)=$self->lookupCommitRef($ver);
3740        if( !defined($hash) )
3741        {
3742            return undef;
3743        }
3744
3745        $cacheKey=$hash;
3746        if( defined($self->{revisionDirMapCache}{$cacheKey}) )
3747        {
3748            return $self->{revisionDirMapCache}{$cacheKey};
3749        }
3750
3751        open(my $filePipe, '-|', 'git', 'ls-tree', '-z', '-r', $hash)
3752                or die("Cannot call git-ls-tree : $!");
3753        local $/ = "\0";
3754        while ( <$filePipe> )
3755        {
3756            chomp;
3757            unless ( /^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3758            {
3759                die("Couldn't process git-ls-tree line : $_");
3760            }
3761
3762            my($mode, $git_type, $git_hash, $git_filename) = ($1, $2, $3, $4);
3763
3764            push @fileList, $git_filename;
3765        }
3766        close $filePipe;
3767    }
3768
3769        # Convert to normalized form:
3770    my %revMap;
3771    my $file;
3772    foreach $file (@fileList)
3773    {
3774        my($dir) = ($file=~m%^(?:(.*)/)?([^/]*)$%);
3775        $dir='' if(!defined($dir));
3776
3777            # parent directories:
3778            # ... create empty dir maps for parent dirs:
3779        my($td)=$dir;
3780        while(!defined($revMap{$td}))
3781        {
3782            $revMap{$td}={};
3783
3784            my($tp)=($td=~m%^(?:(.*)/)?([^/]*)$%);
3785            $tp='' if(!defined($tp));
3786            $td=$tp;
3787        }
3788            # ... add children to parent maps (now that they exist):
3789        $td=$dir;
3790        while($td ne "")
3791        {
3792            my($tp)=($td=~m%^(?:(.*)/)?([^/]*)$%);
3793            $tp='' if(!defined($tp));
3794
3795            if(defined($revMap{$tp}{$td}))
3796            {
3797                if($revMap{$tp}{$td} ne 'D')
3798                {
3799                    die "Weird file/directory inconsistency in $cacheKey";
3800                }
3801                last;   # loop exit
3802            }
3803            $revMap{$tp}{$td}='D';
3804
3805            $td=$tp;
3806        }
3807
3808            # file
3809        $revMap{$dir}{$file}='F';
3810    }
3811
3812        # Save in cache:
3813    $self->{revisionDirMapCache}{$cacheKey}=\%revMap;
3814    return $self->{revisionDirMapCache}{$cacheKey};
3815}
3816
3817=head2 getlog
3818
3819See also gethistorydense().
3820
3821=cut
3822
3823sub getlog
3824{
3825    my $self = shift;
3826    my $filename = shift;
3827    my $revFilter = shift;
3828
3829    my $tablename = $self->tablename("revision");
3830
3831    # Filters:
3832    # TODO: date, state, or by specific logins filters?
3833    # TODO: Handle comma-separated list of revFilter items, each item
3834    #   can be a range [only case currently handled] or individual
3835    #   rev or branch or "branch.".
3836    # TODO: Adjust $db_query WHERE clause based on revFilter, instead of
3837    #   manually filtering the results of the query?
3838    my ( $minrev, $maxrev );
3839    if( defined($revFilter) and
3840        $state->{opt}{r} =~ /^(1.(\d+))?(::?)(1.(\d.+))?$/ )
3841    {
3842        my $control = $3;
3843        $minrev = $2;
3844        $maxrev = $5;
3845        $minrev++ if ( defined($minrev) and $control eq "::" );
3846    }
3847
3848    my $db_query = $self->{dbh}->prepare_cached("SELECT name, filehash, author, mode, revision, modified, commithash FROM $tablename WHERE name=? ORDER BY revision DESC",{},1);
3849    $db_query->execute($filename);
3850
3851    my $totalRevs=0;
3852    my $tree = [];
3853    while ( my $file = $db_query->fetchrow_hashref )
3854    {
3855        $totalRevs++;
3856        if( defined($minrev) and $file->{revision} < $minrev )
3857        {
3858            next;
3859        }
3860        if( defined($maxrev) and $file->{revision} > $maxrev )
3861        {
3862            next;
3863        }
3864
3865        $file->{revision} = "1." . $file->{revision};
3866        push @$tree, $file;
3867    }
3868
3869    return ($tree,$totalRevs);
3870}
3871
3872=head2 getmeta
3873
3874This function takes a filename (with path) argument and returns a hashref of
3875metadata for that file.
3876
3877=cut
3878
3879sub getmeta
3880{
3881    my $self = shift;
3882    my $filename = shift;
3883    my $revision = shift;
3884    my $tablename_rev = $self->tablename("revision");
3885    my $tablename_head = $self->tablename("head");
3886
3887    my $db_query;
3888    if ( defined($revision) and $revision =~ /^1\.(\d+)$/ )
3889    {
3890        my ($intRev) = $1;
3891        $db_query = $self->{dbh}->prepare_cached("SELECT * FROM $tablename_rev WHERE name=? AND revision=?",{},1);
3892        $db_query->execute($filename, $intRev);
3893    }
3894    elsif ( defined($revision) and $revision =~ /^[a-zA-Z0-9]{40}$/ )
3895    {
3896        $db_query = $self->{dbh}->prepare_cached("SELECT * FROM $tablename_rev WHERE name=? AND commithash=?",{},1);
3897        $db_query->execute($filename, $revision);
3898    } else {
3899        $db_query = $self->{dbh}->prepare_cached("SELECT * FROM $tablename_head WHERE name=?",{},1);
3900        $db_query->execute($filename);
3901    }
3902
3903    my $meta = $db_query->fetchrow_hashref;
3904    if($meta)
3905    {
3906        $meta->{revision} = "1.$meta->{revision}";
3907    }
3908    return $meta;
3909}
3910
3911sub getMetaFromCommithash
3912{
3913    my $self = shift;
3914    my $filename = shift;
3915    my $revCommit = shift;
3916
3917    # NOTE: This function doesn't scale well (lots of forks), especially
3918    #   if you have many files that have not been modified for many commits
3919    #   (each git-rev-parse redoes a lot of work for each file
3920    #   that theoretically could be done in parallel by smarter
3921    #   graph traversal).
3922    #
3923    # TODO: Possible optimization strategies:
3924    #   - Solve the issue of assigning and remembering "real" CVS
3925    #     revision numbers for branches, and ensure the
3926    #     data structure can do this efficiently.  Perhaps something
3927    #     similar to "git notes", and carefully structured to take
3928    #     advantage same-sha1-is-same-contents, to roll the same
3929    #     unmodified subdirectory data onto multiple commits?
3930    #   - Write and use a C tool that is like git-blame, but
3931    #     operates on multiple files with file granularity, instead
3932    #     of one file with line granularity.  Cache
3933    #     most-recently-modified in $self->{commitRefCache}{$revCommit}.
3934    #     Try to be intelligent about how many files we do with
3935    #     one fork (perhaps one directory at a time, without recursion,
3936    #     and/or include directory as one line item, recurse from here
3937    #     instead of in C tool?).
3938    #   - Perhaps we could ask the DB for (filename,fileHash),
3939    #     and just guess that it is correct (that the file hadn't
3940    #     changed between $revCommit and the found commit, then
3941    #     changed back, confusing anything trying to interpret
3942    #     history).  Probably need to add another index to revisions
3943    #     DB table for this.
3944    #   - NOTE: Trying to store all (commit,file) keys in DB [to
3945    #     find "lastModfiedCommit] (instead of
3946    #     just files that changed in each commit as we do now) is
3947    #     probably not practical from a disk space perspective.
3948
3949        # Does the file exist in $revCommit?
3950    # TODO: Include file hash in dirmap cache.
3951    my($dirMap)=$self->getRevisionDirMap($revCommit);
3952    my($dir,$file)=($filename=~m%^(?:(.*)/)?([^/]*$)%);
3953    if(!defined($dir))
3954    {
3955        $dir="";
3956    }
3957    if( !defined($dirMap->{$dir}) ||
3958        !defined($dirMap->{$dir}{$filename}) )
3959    {
3960        my($fileHash)="deleted";
3961
3962        my($retVal)={};
3963        $retVal->{name}=$filename;
3964        $retVal->{filehash}=$fileHash;
3965
3966            # not needed and difficult to compute:
3967        $retVal->{revision}="0";  # $revision;
3968        $retVal->{commithash}=$revCommit;
3969        #$retVal->{author}=$commit->{author};
3970        #$retVal->{modified}=convertToCvsDate($commit->{date});
3971        #$retVal->{mode}=convertToDbMode($mode);
3972
3973        return $retVal;
3974    }
3975
3976    my($fileHash)=safe_pipe_capture("git","rev-parse","$revCommit:$filename");
3977    chomp $fileHash;
3978    if(!($fileHash=~/^[0-9a-f]{40}$/))
3979    {
3980        die "Invalid fileHash '$fileHash' looking up"
3981                    ." '$revCommit:$filename'\n";
3982    }
3983
3984    # information about most recent commit to modify $filename:
3985    open(my $gitLogPipe, '-|', 'git', 'rev-list',
3986         '--max-count=1', '--pretty', '--parents',
3987         $revCommit, '--', $filename)
3988                or die "Cannot call git-rev-list: $!";
3989    my @commits=readCommits($gitLogPipe);
3990    close $gitLogPipe;
3991    if(scalar(@commits)!=1)
3992    {
3993        die "Can't find most recent commit changing $filename\n";
3994    }
3995    my($commit)=$commits[0];
3996    if( !defined($commit) || !defined($commit->{hash}) )
3997    {
3998        return undef;
3999    }
4000
4001    # does this (commit,file) have a real assigned CVS revision number?
4002    my $tablename_rev = $self->tablename("revision");
4003    my $db_query;
4004    $db_query = $self->{dbh}->prepare_cached(
4005        "SELECT * FROM $tablename_rev WHERE name=? AND commithash=?",
4006        {},1);
4007    $db_query->execute($filename, $commit->{hash});
4008    my($meta)=$db_query->fetchrow_hashref;
4009    if($meta)
4010    {
4011        $meta->{revision} = "1.$meta->{revision}";
4012        return $meta;
4013    }
4014
4015    # fall back on special revision number
4016    my($revision)=$commit->{hash};
4017    $revision=~s/(..)/'.' . (hex($1)+100)/eg;
4018    $revision="2.1.1.2000$revision";
4019
4020    # meta data about $filename:
4021    open(my $filePipe, '-|', 'git', 'ls-tree', '-z',
4022                $commit->{hash}, '--', $filename)
4023            or die("Cannot call git-ls-tree : $!");
4024    local $/ = "\0";
4025    my $line;
4026    $line=<$filePipe>;
4027    if(defined(<$filePipe>))
4028    {
4029        die "Expected only a single file for git-ls-tree $filename\n";
4030    }
4031    close $filePipe;
4032
4033    chomp $line;
4034    unless ( $line=~m/^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
4035    {
4036        die("Couldn't process git-ls-tree line : $line\n");
4037    }
4038    my ( $mode, $git_type, $git_hash, $git_filename ) = ( $1, $2, $3, $4 );
4039
4040    # save result:
4041    my($retVal)={};
4042    $retVal->{name}=$filename;
4043    $retVal->{revision}=$revision;
4044    $retVal->{filehash}=$fileHash;
4045    $retVal->{commithash}=$revCommit;
4046    $retVal->{author}=$commit->{author};
4047    $retVal->{modified}=convertToCvsDate($commit->{date});
4048    $retVal->{mode}=convertToDbMode($mode);
4049
4050    return $retVal;
4051}
4052
4053=head2 lookupCommitRef
4054
4055Convert tag/branch/abbreviation/etc into a commit sha1 hash.  Caches
4056the result so looking it up again is fast.
4057
4058=cut
4059
4060sub lookupCommitRef
4061{
4062    my $self = shift;
4063    my $ref = shift;
4064
4065    my $commitHash = $self->{commitRefCache}{$ref};
4066    if(defined($commitHash))
4067    {
4068        return $commitHash;
4069    }
4070
4071    $commitHash=safe_pipe_capture("git","rev-parse","--verify","--quiet",
4072                                  $self->unescapeRefName($ref));
4073    $commitHash=~s/\s*$//;
4074    if(!($commitHash=~/^[0-9a-f]{40}$/))
4075    {
4076        $commitHash=undef;
4077    }
4078
4079    if( defined($commitHash) )
4080    {
4081        my $type=safe_pipe_capture("git","cat-file","-t",$commitHash);
4082        if( ! ($type=~/^commit\s*$/ ) )
4083        {
4084            $commitHash=undef;
4085        }
4086    }
4087    if(defined($commitHash))
4088    {
4089        $self->{commitRefCache}{$ref}=$commitHash;
4090    }
4091    return $commitHash;
4092}
4093
4094=head2 clearCommitRefCaches
4095
4096Clears cached commit cache (sha1's for various tags/abbeviations/etc),
4097and related caches.
4098
4099=cut
4100
4101sub clearCommitRefCaches
4102{
4103    my $self = shift;
4104    $self->{commitRefCache} = {};
4105    $self->{revisionDirMapCache} = undef;
4106    $self->{gethead_cache} = undef;
4107}
4108
4109=head2 commitmessage
4110
4111this function takes a commithash and returns the commit message for that commit
4112
4113=cut
4114sub commitmessage
4115{
4116    my $self = shift;
4117    my $commithash = shift;
4118    my $tablename = $self->tablename("commitmsgs");
4119
4120    die("Need commithash") unless ( defined($commithash) and $commithash =~ /^[a-zA-Z0-9]{40}$/ );
4121
4122    my $db_query;
4123    $db_query = $self->{dbh}->prepare_cached("SELECT value FROM $tablename WHERE key=?",{},1);
4124    $db_query->execute($commithash);
4125
4126    my ( $message ) = $db_query->fetchrow_array;
4127
4128    if ( defined ( $message ) )
4129    {
4130        $message .= " " if ( $message =~ /\n$/ );
4131        return $message;
4132    }
4133
4134    my @lines = safe_pipe_capture("git", "cat-file", "commit", $commithash);
4135    shift @lines while ( $lines[0] =~ /\S/ );
4136    $message = join("",@lines);
4137    $message .= " " if ( $message =~ /\n$/ );
4138    return $message;
4139}
4140
4141=head2 gethistorydense
4142
4143This function takes a filename (with path) argument and returns an arrayofarrays
4144containing revision,filehash,commithash ordered by revision descending.
4145
4146This version of gethistory skips deleted entries -- so it is useful for annotate.
4147The 'dense' part is a reference to a '--dense' option available for git-rev-list
4148and other git tools that depend on it.
4149
4150See also getlog().
4151
4152=cut
4153sub gethistorydense
4154{
4155    my $self = shift;
4156    my $filename = shift;
4157    my $tablename = $self->tablename("revision");
4158
4159    my $db_query;
4160    $db_query = $self->{dbh}->prepare_cached("SELECT revision, filehash, commithash FROM $tablename WHERE name=? AND filehash!='deleted' ORDER BY revision DESC",{},1);
4161    $db_query->execute($filename);
4162
4163    my $result = $db_query->fetchall_arrayref;
4164
4165    my $i;
4166    for($i=0 ; $i<scalar(@$result) ; $i++)
4167    {
4168        $result->[$i][0]="1." . $result->[$i][0];
4169    }
4170
4171    return $result;
4172}
4173
4174=head2 escapeRefName
4175
4176Apply an escape mechanism to compensate for characters that
4177git ref names can have that CVS tags can not.
4178
4179=cut
4180sub escapeRefName
4181{
4182    my($self,$refName)=@_;
4183
4184    # CVS officially only allows [-_A-Za-z0-9] in tag names (or in
4185    # many contexts it can also be a CVS revision number).
4186    #
4187    # Git tags commonly use '/' and '.' as well, but also handle
4188    # anything else just in case:
4189    #
4190    #   = "_-s-"  For '/'.
4191    #   = "_-p-"  For '.'.
4192    #   = "_-u-"  For underscore, in case someone wants a literal "_-" in
4193    #     a tag name.
4194    #   = "_-xx-" Where "xx" is the hexadecimal representation of the
4195    #     desired ASCII character byte. (for anything else)
4196
4197    if(! $refName=~/^[1-9][0-9]*(\.[1-9][0-9]*)*$/)
4198    {
4199        $refName=~s/_-/_-u--/g;
4200        $refName=~s/\./_-p-/g;
4201        $refName=~s%/%_-s-%g;
4202        $refName=~s/[^-_a-zA-Z0-9]/sprintf("_-%02x-",$1)/eg;
4203    }
4204}
4205
4206=head2 unescapeRefName
4207
4208Undo an escape mechanism to compensate for characters that
4209git ref names can have that CVS tags can not.
4210
4211=cut
4212sub unescapeRefName
4213{
4214    my($self,$refName)=@_;
4215
4216    # see escapeRefName() for description of escape mechanism.
4217
4218    $refName=~s/_-([spu]|[0-9a-f][0-9a-f])-/unescapeRefNameChar($1)/eg;
4219
4220    # allowed tag names
4221    # TODO: Perhaps use git check-ref-format, with an in-process cache of
4222    #  validated names?
4223    if( !( $refName=~m%^[^-][-a-zA-Z0-9_/.]*$% ) ||
4224        ( $refName=~m%[/.]$% ) ||
4225        ( $refName=~/\.lock$/ ) ||
4226        ( $refName=~m%\.\.|/\.|[[\\:?*~]|\@\{% ) )  # matching }
4227    {
4228        # Error:
4229        $log->warn("illegal refName: $refName");
4230        $refName=undef;
4231    }
4232    return $refName;
4233}
4234
4235sub unescapeRefNameChar
4236{
4237    my($char)=@_;
4238
4239    if($char eq "s")
4240    {
4241        $char="/";
4242    }
4243    elsif($char eq "p")
4244    {
4245        $char=".";
4246    }
4247    elsif($char eq "u")
4248    {
4249        $char="_";
4250    }
4251    elsif($char=~/^[0-9a-f][0-9a-f]$/)
4252    {
4253        $char=chr(hex($char));
4254    }
4255    else
4256    {
4257        # Error case: Maybe it has come straight from user, and
4258        # wasn't supposed to be escaped?  Restore it the way we got it:
4259        $char="_-$char-";
4260    }
4261
4262    return $char;
4263}
4264
4265=head2 in_array()
4266
4267from Array::PAT - mimics the in_array() function
4268found in PHP. Yuck but works for small arrays.
4269
4270=cut
4271sub in_array
4272{
4273    my ($check, @array) = @_;
4274    my $retval = 0;
4275    foreach my $test (@array){
4276        if($check eq $test){
4277            $retval =  1;
4278        }
4279    }
4280    return $retval;
4281}
4282
4283=head2 safe_pipe_capture
4284
4285an alternative to `command` that allows input to be passed as an array
4286to work around shell problems with weird characters in arguments
4287
4288=cut
4289sub safe_pipe_capture {
4290
4291    my @output;
4292
4293    if (my $pid = open my $child, '-|') {
4294        @output = (<$child>);
4295        close $child or die join(' ',@_).": $! $?";
4296    } else {
4297        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
4298    }
4299    return wantarray ? @output : join('',@output);
4300}
4301
4302=head2 mangle_dirname
4303
4304create a string from a directory name that is suitable to use as
4305part of a filename, mainly by converting all chars except \w.- to _
4306
4307=cut
4308sub mangle_dirname {
4309    my $dirname = shift;
4310    return unless defined $dirname;
4311
4312    $dirname =~ s/[^\w.-]/_/g;
4313
4314    return $dirname;
4315}
4316
4317=head2 mangle_tablename
4318
4319create a string from a that is suitable to use as part of an SQL table
4320name, mainly by converting all chars except \w to _
4321
4322=cut
4323sub mangle_tablename {
4324    my $tablename = shift;
4325    return unless defined $tablename;
4326
4327    $tablename =~ s/[^\w_]/_/g;
4328
4329    return $tablename;
4330}
4331
43321;