git-cvsserver.perlon commit cvsserver: generalize getmeta() to recognize commit refs (bfdafa0)
   1#!/usr/bin/perl
   2
   3####
   4#### This application is a CVS emulation layer for git.
   5#### It is intended for clients to connect over SSH.
   6#### See the documentation for more details.
   7####
   8#### Copyright The Open University UK - 2006.
   9####
  10#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
  11####          Martin Langhoff <martin@laptop.org>
  12####
  13####
  14#### Released under the GNU Public License, version 2.
  15####
  16####
  17
  18use 5.008;
  19use strict;
  20use warnings;
  21use bytes;
  22
  23use Fcntl;
  24use File::Temp qw/tempdir tempfile/;
  25use File::Path qw/rmtree/;
  26use File::Basename;
  27use Getopt::Long qw(:config require_order no_ignore_case);
  28
  29my $VERSION = '@@GIT_VERSION@@';
  30
  31my $log = GITCVS::log->new();
  32my $cfg;
  33
  34my $DATE_LIST = {
  35    Jan => "01",
  36    Feb => "02",
  37    Mar => "03",
  38    Apr => "04",
  39    May => "05",
  40    Jun => "06",
  41    Jul => "07",
  42    Aug => "08",
  43    Sep => "09",
  44    Oct => "10",
  45    Nov => "11",
  46    Dec => "12",
  47};
  48
  49# Enable autoflush for STDOUT (otherwise the whole thing falls apart)
  50$| = 1;
  51
  52#### Definition and mappings of functions ####
  53
  54# NOTE: Despite the existence of req_CATCHALL and req_EMPTY unimplemented
  55#  requests, this list is incomplete.  It is missing many rarer/optional
  56#  requests.  Perhaps some clients require a claim of support for
  57#  these specific requests for main functionality to work?
  58my $methods = {
  59    'Root'            => \&req_Root,
  60    'Valid-responses' => \&req_Validresponses,
  61    'valid-requests'  => \&req_validrequests,
  62    'Directory'       => \&req_Directory,
  63    'Sticky'          => \&req_Sticky,
  64    'Entry'           => \&req_Entry,
  65    'Modified'        => \&req_Modified,
  66    'Unchanged'       => \&req_Unchanged,
  67    'Questionable'    => \&req_Questionable,
  68    'Argument'        => \&req_Argument,
  69    'Argumentx'       => \&req_Argument,
  70    'expand-modules'  => \&req_expandmodules,
  71    'add'             => \&req_add,
  72    'remove'          => \&req_remove,
  73    'co'              => \&req_co,
  74    'update'          => \&req_update,
  75    'ci'              => \&req_ci,
  76    'diff'            => \&req_diff,
  77    'log'             => \&req_log,
  78    'rlog'            => \&req_log,
  79    'tag'             => \&req_CATCHALL,
  80    'status'          => \&req_status,
  81    'admin'           => \&req_CATCHALL,
  82    'history'         => \&req_CATCHALL,
  83    'watchers'        => \&req_EMPTY,
  84    'editors'         => \&req_EMPTY,
  85    'noop'            => \&req_EMPTY,
  86    'annotate'        => \&req_annotate,
  87    'Global_option'   => \&req_Globaloption,
  88};
  89
  90##############################################
  91
  92
  93# $state holds all the bits of information the clients sends us that could
  94# potentially be useful when it comes to actually _doing_ something.
  95my $state = { prependdir => '' };
  96
  97# Work is for managing temporary working directory
  98my $work =
  99    {
 100        state => undef,  # undef, 1 (empty), 2 (with stuff)
 101        workDir => undef,
 102        index => undef,
 103        emptyDir => undef,
 104        tmpDir => undef
 105    };
 106
 107$log->info("--------------- STARTING -----------------");
 108
 109my $usage =
 110    "Usage: git cvsserver [options] [pserver|server] [<directory> ...]\n".
 111    "    --base-path <path>  : Prepend to requested CVSROOT\n".
 112    "                          Can be read from GIT_CVSSERVER_BASE_PATH\n".
 113    "    --strict-paths      : Don't allow recursing into subdirectories\n".
 114    "    --export-all        : Don't check for gitcvs.enabled in config\n".
 115    "    --version, -V       : Print version information and exit\n".
 116    "    -h, -H              : Print usage information and exit\n".
 117    "\n".
 118    "<directory> ... is a list of allowed directories. If no directories\n".
 119    "are given, all are allowed. This is an additional restriction, gitcvs\n".
 120    "access still needs to be enabled by the gitcvs.enabled config option.\n".
 121    "Alternately, one directory may be specified in GIT_CVSSERVER_ROOT.\n";
 122
 123my @opts = ( 'h|H', 'version|V',
 124             'base-path=s', 'strict-paths', 'export-all' );
 125GetOptions( $state, @opts )
 126    or die $usage;
 127
 128if ($state->{version}) {
 129    print "git-cvsserver version $VERSION\n";
 130    exit;
 131}
 132if ($state->{help}) {
 133    print $usage;
 134    exit;
 135}
 136
 137my $TEMP_DIR = tempdir( CLEANUP => 1 );
 138$log->debug("Temporary directory is '$TEMP_DIR'");
 139
 140$state->{method} = 'ext';
 141if (@ARGV) {
 142    if ($ARGV[0] eq 'pserver') {
 143        $state->{method} = 'pserver';
 144        shift @ARGV;
 145    } elsif ($ARGV[0] eq 'server') {
 146        shift @ARGV;
 147    }
 148}
 149
 150# everything else is a directory
 151$state->{allowed_roots} = [ @ARGV ];
 152
 153# don't export the whole system unless the users requests it
 154if ($state->{'export-all'} && !@{$state->{allowed_roots}}) {
 155    die "--export-all can only be used together with an explicit whitelist\n";
 156}
 157
 158# Environment handling for running under git-shell
 159if (exists $ENV{GIT_CVSSERVER_BASE_PATH}) {
 160    if ($state->{'base-path'}) {
 161        die "Cannot specify base path both ways.\n";
 162    }
 163    my $base_path = $ENV{GIT_CVSSERVER_BASE_PATH};
 164    $state->{'base-path'} = $base_path;
 165    $log->debug("Picked up base path '$base_path' from environment.\n");
 166}
 167if (exists $ENV{GIT_CVSSERVER_ROOT}) {
 168    if (@{$state->{allowed_roots}}) {
 169        die "Cannot specify roots both ways: @ARGV\n";
 170    }
 171    my $allowed_root = $ENV{GIT_CVSSERVER_ROOT};
 172    $state->{allowed_roots} = [ $allowed_root ];
 173    $log->debug("Picked up allowed root '$allowed_root' from environment.\n");
 174}
 175
 176# if we are called with a pserver argument,
 177# deal with the authentication cat before entering the
 178# main loop
 179if ($state->{method} eq 'pserver') {
 180    my $line = <STDIN>; chomp $line;
 181    unless( $line =~ /^BEGIN (AUTH|VERIFICATION) REQUEST$/) {
 182       die "E Do not understand $line - expecting BEGIN AUTH REQUEST\n";
 183    }
 184    my $request = $1;
 185    $line = <STDIN>; chomp $line;
 186    unless (req_Root('root', $line)) { # reuse Root
 187       print "E Invalid root $line \n";
 188       exit 1;
 189    }
 190    $line = <STDIN>; chomp $line;
 191    my $user = $line;
 192    $line = <STDIN>; chomp $line;
 193    my $password = $line;
 194
 195    if ($user eq 'anonymous') {
 196        # "A" will be 1 byte, use length instead in case the
 197        # encryption method ever changes (yeah, right!)
 198        if (length($password) > 1 ) {
 199            print "E Don't supply a password for the `anonymous' user\n";
 200            print "I HATE YOU\n";
 201            exit 1;
 202        }
 203
 204        # Fall through to LOVE
 205    } else {
 206        # Trying to authenticate a user
 207        if (not exists $cfg->{gitcvs}->{authdb}) {
 208            print "E the repo config file needs a [gitcvs] section with an 'authdb' parameter set to the filename of the authentication database\n";
 209            print "I HATE YOU\n";
 210            exit 1;
 211        }
 212
 213        my $authdb = $cfg->{gitcvs}->{authdb};
 214
 215        unless (-e $authdb) {
 216            print "E The authentication database specified in [gitcvs.authdb] does not exist\n";
 217            print "I HATE YOU\n";
 218            exit 1;
 219        }
 220
 221        my $auth_ok;
 222        open my $passwd, "<", $authdb or die $!;
 223        while (<$passwd>) {
 224            if (m{^\Q$user\E:(.*)}) {
 225                if (crypt($user, descramble($password)) eq $1) {
 226                    $auth_ok = 1;
 227                }
 228            };
 229        }
 230        close $passwd;
 231
 232        unless ($auth_ok) {
 233            print "I HATE YOU\n";
 234            exit 1;
 235        }
 236
 237        # Fall through to LOVE
 238    }
 239
 240    # For checking whether the user is anonymous on commit
 241    $state->{user} = $user;
 242
 243    $line = <STDIN>; chomp $line;
 244    unless ($line eq "END $request REQUEST") {
 245       die "E Do not understand $line -- expecting END $request REQUEST\n";
 246    }
 247    print "I LOVE YOU\n";
 248    exit if $request eq 'VERIFICATION'; # cvs login
 249    # and now back to our regular programme...
 250}
 251
 252# Keep going until the client closes the connection
 253while (<STDIN>)
 254{
 255    chomp;
 256
 257    # Check to see if we've seen this method, and call appropriate function.
 258    if ( /^([\w-]+)(?:\s+(.*))?$/ and defined($methods->{$1}) )
 259    {
 260        # use the $methods hash to call the appropriate sub for this command
 261        #$log->info("Method : $1");
 262        &{$methods->{$1}}($1,$2);
 263    } else {
 264        # log fatal because we don't understand this function. If this happens
 265        # we're fairly screwed because we don't know if the client is expecting
 266        # a response. If it is, the client will hang, we'll hang, and the whole
 267        # thing will be custard.
 268        $log->fatal("Don't understand command $_\n");
 269        die("Unknown command $_");
 270    }
 271}
 272
 273$log->debug("Processing time : user=" . (times)[0] . " system=" . (times)[1]);
 274$log->info("--------------- FINISH -----------------");
 275
 276chdir '/';
 277exit 0;
 278
 279# Magic catchall method.
 280#    This is the method that will handle all commands we haven't yet
 281#    implemented. It simply sends a warning to the log file indicating a
 282#    command that hasn't been implemented has been invoked.
 283sub req_CATCHALL
 284{
 285    my ( $cmd, $data ) = @_;
 286    $log->warn("Unhandled command : req_$cmd : $data");
 287}
 288
 289# This method invariably succeeds with an empty response.
 290sub req_EMPTY
 291{
 292    print "ok\n";
 293}
 294
 295# Root pathname \n
 296#     Response expected: no. Tell the server which CVSROOT to use. Note that
 297#     pathname is a local directory and not a fully qualified CVSROOT variable.
 298#     pathname must already exist; if creating a new root, use the init
 299#     request, not Root. pathname does not include the hostname of the server,
 300#     how to access the server, etc.; by the time the CVS protocol is in use,
 301#     connection, authentication, etc., are already taken care of. The Root
 302#     request must be sent only once, and it must be sent before any requests
 303#     other than Valid-responses, valid-requests, UseUnchanged, Set or init.
 304sub req_Root
 305{
 306    my ( $cmd, $data ) = @_;
 307    $log->debug("req_Root : $data");
 308
 309    unless ($data =~ m#^/#) {
 310        print "error 1 Root must be an absolute pathname\n";
 311        return 0;
 312    }
 313
 314    my $cvsroot = $state->{'base-path'} || '';
 315    $cvsroot =~ s#/+$##;
 316    $cvsroot .= $data;
 317
 318    if ($state->{CVSROOT}
 319        && ($state->{CVSROOT} ne $cvsroot)) {
 320        print "error 1 Conflicting roots specified\n";
 321        return 0;
 322    }
 323
 324    $state->{CVSROOT} = $cvsroot;
 325
 326    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
 327
 328    if (@{$state->{allowed_roots}}) {
 329        my $allowed = 0;
 330        foreach my $dir (@{$state->{allowed_roots}}) {
 331            next unless $dir =~ m#^/#;
 332            $dir =~ s#/+$##;
 333            if ($state->{'strict-paths'}) {
 334                if ($ENV{GIT_DIR} =~ m#^\Q$dir\E/?$#) {
 335                    $allowed = 1;
 336                    last;
 337                }
 338            } elsif ($ENV{GIT_DIR} =~ m#^\Q$dir\E(/?$|/)#) {
 339                $allowed = 1;
 340                last;
 341            }
 342        }
 343
 344        unless ($allowed) {
 345            print "E $ENV{GIT_DIR} does not seem to be a valid GIT repository\n";
 346            print "E \n";
 347            print "error 1 $ENV{GIT_DIR} is not a valid repository\n";
 348            return 0;
 349        }
 350    }
 351
 352    unless (-d $ENV{GIT_DIR} && -e $ENV{GIT_DIR}.'HEAD') {
 353       print "E $ENV{GIT_DIR} does not seem to be a valid GIT repository\n";
 354       print "E \n";
 355       print "error 1 $ENV{GIT_DIR} is not a valid repository\n";
 356       return 0;
 357    }
 358
 359    my @gitvars = `git config -l`;
 360    if ($?) {
 361       print "E problems executing git-config on the server -- this is not a git repository or the PATH is not set correctly.\n";
 362        print "E \n";
 363        print "error 1 - problem executing git-config\n";
 364       return 0;
 365    }
 366    foreach my $line ( @gitvars )
 367    {
 368        next unless ( $line =~ /^(gitcvs)\.(?:(ext|pserver)\.)?([\w-]+)=(.*)$/ );
 369        unless ($2) {
 370            $cfg->{$1}{$3} = $4;
 371        } else {
 372            $cfg->{$1}{$2}{$3} = $4;
 373        }
 374    }
 375
 376    my $enabled = ($cfg->{gitcvs}{$state->{method}}{enabled}
 377                   || $cfg->{gitcvs}{enabled});
 378    unless ($state->{'export-all'} ||
 379            ($enabled && $enabled =~ /^\s*(1|true|yes)\s*$/i)) {
 380        print "E GITCVS emulation needs to be enabled on this repo\n";
 381        print "E the repo config file needs a [gitcvs] section added, and the parameter 'enabled' set to 1\n";
 382        print "E \n";
 383        print "error 1 GITCVS emulation disabled\n";
 384        return 0;
 385    }
 386
 387    my $logfile = $cfg->{gitcvs}{$state->{method}}{logfile} || $cfg->{gitcvs}{logfile};
 388    if ( $logfile )
 389    {
 390        $log->setfile($logfile);
 391    } else {
 392        $log->nofile();
 393    }
 394
 395    return 1;
 396}
 397
 398# Global_option option \n
 399#     Response expected: no. Transmit one of the global options `-q', `-Q',
 400#     `-l', `-t', `-r', or `-n'. option must be one of those strings, no
 401#     variations (such as combining of options) are allowed. For graceful
 402#     handling of valid-requests, it is probably better to make new global
 403#     options separate requests, rather than trying to add them to this
 404#     request.
 405sub req_Globaloption
 406{
 407    my ( $cmd, $data ) = @_;
 408    $log->debug("req_Globaloption : $data");
 409    $state->{globaloptions}{$data} = 1;
 410}
 411
 412# Valid-responses request-list \n
 413#     Response expected: no. Tell the server what responses the client will
 414#     accept. request-list is a space separated list of tokens.
 415sub req_Validresponses
 416{
 417    my ( $cmd, $data ) = @_;
 418    $log->debug("req_Validresponses : $data");
 419
 420    # TODO : re-enable this, currently it's not particularly useful
 421    #$state->{validresponses} = [ split /\s+/, $data ];
 422}
 423
 424# valid-requests \n
 425#     Response expected: yes. Ask the server to send back a Valid-requests
 426#     response.
 427sub req_validrequests
 428{
 429    my ( $cmd, $data ) = @_;
 430
 431    $log->debug("req_validrequests");
 432
 433    $log->debug("SEND : Valid-requests " . join(" ",keys %$methods));
 434    $log->debug("SEND : ok");
 435
 436    print "Valid-requests " . join(" ",keys %$methods) . "\n";
 437    print "ok\n";
 438}
 439
 440# Directory local-directory \n
 441#     Additional data: repository \n. Response expected: no. Tell the server
 442#     what directory to use. The repository should be a directory name from a
 443#     previous server response. Note that this both gives a default for Entry
 444#     and Modified and also for ci and the other commands; normal usage is to
 445#     send Directory for each directory in which there will be an Entry or
 446#     Modified, and then a final Directory for the original directory, then the
 447#     command. The local-directory is relative to the top level at which the
 448#     command is occurring (i.e. the last Directory which is sent before the
 449#     command); to indicate that top level, `.' should be sent for
 450#     local-directory.
 451sub req_Directory
 452{
 453    my ( $cmd, $data ) = @_;
 454
 455    my $repository = <STDIN>;
 456    chomp $repository;
 457
 458
 459    $state->{localdir} = $data;
 460    $state->{repository} = $repository;
 461    $state->{path} = $repository;
 462    $state->{path} =~ s/^\Q$state->{CVSROOT}\E\///;
 463    $state->{module} = $1 if ($state->{path} =~ s/^(.*?)(\/|$)//);
 464    $state->{path} .= "/" if ( $state->{path} =~ /\S/ );
 465
 466    $state->{directory} = $state->{localdir};
 467    $state->{directory} = "" if ( $state->{directory} eq "." );
 468    $state->{directory} .= "/" if ( $state->{directory} =~ /\S/ );
 469
 470    if ( (not defined($state->{prependdir}) or $state->{prependdir} eq '') and $state->{localdir} eq "." and $state->{path} =~ /\S/ )
 471    {
 472        $log->info("Setting prepend to '$state->{path}'");
 473        $state->{prependdir} = $state->{path};
 474        my %entries;
 475        foreach my $entry ( keys %{$state->{entries}} )
 476        {
 477            $entries{$state->{prependdir} . $entry} = $state->{entries}{$entry};
 478        }
 479        $state->{entries}=\%entries;
 480
 481        my %dirMap;
 482        foreach my $dir ( keys %{$state->{dirMap}} )
 483        {
 484            $dirMap{$state->{prependdir} . $dir} = $state->{dirMap}{$dir};
 485        }
 486        $state->{dirMap}=\%dirMap;
 487    }
 488
 489    if ( defined ( $state->{prependdir} ) )
 490    {
 491        $log->debug("Prepending '$state->{prependdir}' to state|directory");
 492        $state->{directory} = $state->{prependdir} . $state->{directory}
 493    }
 494
 495    if ( ! defined($state->{dirMap}{$state->{directory}}) )
 496    {
 497        $state->{dirMap}{$state->{directory}} =
 498            {
 499                'names' => {}
 500                #'tagspec' => undef
 501            };
 502    }
 503
 504    $log->debug("req_Directory : localdir=$data repository=$repository path=$state->{path} directory=$state->{directory} module=$state->{module}");
 505}
 506
 507# Sticky tagspec \n
 508#     Response expected: no. Tell the server that the directory most
 509#     recently specified with Directory has a sticky tag or date
 510#     tagspec. The first character of tagspec is T for a tag, D for
 511#     a date, or some other character supplied by a Set-sticky
 512#     response from a previous request to the server. The remainder
 513#     of tagspec contains the actual tag or date, again as supplied
 514#     by Set-sticky.
 515#          The server should remember Static-directory and Sticky requests
 516#     for a particular directory; the client need not resend them each
 517#     time it sends a Directory request for a given directory. However,
 518#     the server is not obliged to remember them beyond the context
 519#     of a single command.
 520sub req_Sticky
 521{
 522    my ( $cmd, $tagspec ) = @_;
 523
 524    my ( $stickyInfo );
 525    if($tagspec eq "")
 526    {
 527        # nothing
 528    }
 529    elsif($tagspec=~/^T([^ ]+)\s*$/)
 530    {
 531        $stickyInfo = { 'tag' => $1 };
 532    }
 533    elsif($tagspec=~/^D([0-9.]+)\s*$/)
 534    {
 535        $stickyInfo= { 'date' => $1 };
 536    }
 537    else
 538    {
 539        die "Unknown tag_or_date format\n";
 540    }
 541    $state->{dirMap}{$state->{directory}}{stickyInfo}=$stickyInfo;
 542
 543    $log->debug("req_Sticky : tagspec=$tagspec repository=$state->{repository}"
 544                . " path=$state->{path} directory=$state->{directory}"
 545                . " module=$state->{module}");
 546}
 547
 548# Entry entry-line \n
 549#     Response expected: no. Tell the server what version of a file is on the
 550#     local machine. The name in entry-line is a name relative to the directory
 551#     most recently specified with Directory. If the user is operating on only
 552#     some files in a directory, Entry requests for only those files need be
 553#     included. If an Entry request is sent without Modified, Is-modified, or
 554#     Unchanged, it means the file is lost (does not exist in the working
 555#     directory). If both Entry and one of Modified, Is-modified, or Unchanged
 556#     are sent for the same file, Entry must be sent first. For a given file,
 557#     one can send Modified, Is-modified, or Unchanged, but not more than one
 558#     of these three.
 559sub req_Entry
 560{
 561    my ( $cmd, $data ) = @_;
 562
 563    #$log->debug("req_Entry : $data");
 564
 565    my @data = split(/\//, $data, -1);
 566
 567    $state->{entries}{$state->{directory}.$data[1]} = {
 568        revision    => $data[2],
 569        conflict    => $data[3],
 570        options     => $data[4],
 571        tag_or_date => $data[5],
 572    };
 573
 574    $state->{dirMap}{$state->{directory}}{names}{$data[1]} = 'F';
 575
 576    $log->info("Received entry line '$data' => '" . $state->{directory} . $data[1] . "'");
 577}
 578
 579# Questionable filename \n
 580#     Response expected: no. Additional data: no. Tell the server to check
 581#     whether filename should be ignored, and if not, next time the server
 582#     sends responses, send (in a M response) `?' followed by the directory and
 583#     filename. filename must not contain `/'; it needs to be a file in the
 584#     directory named by the most recent Directory request.
 585sub req_Questionable
 586{
 587    my ( $cmd, $data ) = @_;
 588
 589    $log->debug("req_Questionable : $data");
 590    $state->{entries}{$state->{directory}.$data}{questionable} = 1;
 591}
 592
 593# add \n
 594#     Response expected: yes. Add a file or directory. This uses any previous
 595#     Argument, Directory, Entry, or Modified requests, if they have been sent.
 596#     The last Directory sent specifies the working directory at the time of
 597#     the operation. To add a directory, send the directory to be added using
 598#     Directory and Argument requests.
 599sub req_add
 600{
 601    my ( $cmd, $data ) = @_;
 602
 603    argsplit("add");
 604
 605    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
 606    $updater->update();
 607
 608    my $addcount = 0;
 609
 610    foreach my $filename ( @{$state->{args}} )
 611    {
 612        $filename = filecleanup($filename);
 613
 614        my $meta = $updater->getmeta($filename);
 615        my $wrev = revparse($filename);
 616
 617        if ($wrev && $meta && ($wrev=~/^-/))
 618        {
 619            # previously removed file, add back
 620            $log->info("added file $filename was previously removed, send $meta->{revision}");
 621
 622            print "MT +updated\n";
 623            print "MT text U \n";
 624            print "MT fname $filename\n";
 625            print "MT newline\n";
 626            print "MT -updated\n";
 627
 628            unless ( $state->{globaloptions}{-n} )
 629            {
 630                my ( $filepart, $dirpart ) = filenamesplit($filename,1);
 631
 632                print "Created $dirpart\n";
 633                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
 634
 635                # this is an "entries" line
 636                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
 637                $log->debug("/$filepart/$meta->{revision}//$kopts/");
 638                print "/$filepart/$meta->{revision}//$kopts/\n";
 639                # permissions
 640                $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
 641                print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
 642                # transmit file
 643                transmitfile($meta->{filehash});
 644            }
 645
 646            next;
 647        }
 648
 649        unless ( defined ( $state->{entries}{$filename}{modified_filename} ) )
 650        {
 651            print "E cvs add: nothing known about `$filename'\n";
 652            next;
 653        }
 654        # TODO : check we're not squashing an already existing file
 655        if ( defined ( $state->{entries}{$filename}{revision} ) )
 656        {
 657            print "E cvs add: `$filename' has already been entered\n";
 658            next;
 659        }
 660
 661        my ( $filepart, $dirpart ) = filenamesplit($filename, 1);
 662
 663        print "E cvs add: scheduling file `$filename' for addition\n";
 664
 665        print "Checked-in $dirpart\n";
 666        print "$filename\n";
 667        my $kopts = kopts_from_path($filename,"file",
 668                        $state->{entries}{$filename}{modified_filename});
 669        print "/$filepart/0//$kopts/\n";
 670
 671        my $requestedKopts = $state->{opt}{k};
 672        if(defined($requestedKopts))
 673        {
 674            $requestedKopts = "-k$requestedKopts";
 675        }
 676        else
 677        {
 678            $requestedKopts = "";
 679        }
 680        if( $kopts ne $requestedKopts )
 681        {
 682            $log->warn("Ignoring requested -k='$requestedKopts'"
 683                        . " for '$filename'; detected -k='$kopts' instead");
 684            #TODO: Also have option to send warning to user?
 685        }
 686
 687        $addcount++;
 688    }
 689
 690    if ( $addcount == 1 )
 691    {
 692        print "E cvs add: use `cvs commit' to add this file permanently\n";
 693    }
 694    elsif ( $addcount > 1 )
 695    {
 696        print "E cvs add: use `cvs commit' to add these files permanently\n";
 697    }
 698
 699    print "ok\n";
 700}
 701
 702# remove \n
 703#     Response expected: yes. Remove a file. This uses any previous Argument,
 704#     Directory, Entry, or Modified requests, if they have been sent. The last
 705#     Directory sent specifies the working directory at the time of the
 706#     operation. Note that this request does not actually do anything to the
 707#     repository; the only effect of a successful remove request is to supply
 708#     the client with a new entries line containing `-' to indicate a removed
 709#     file. In fact, the client probably could perform this operation without
 710#     contacting the server, although using remove may cause the server to
 711#     perform a few more checks. The client sends a subsequent ci request to
 712#     actually record the removal in the repository.
 713sub req_remove
 714{
 715    my ( $cmd, $data ) = @_;
 716
 717    argsplit("remove");
 718
 719    # Grab a handle to the SQLite db and do any necessary updates
 720    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
 721    $updater->update();
 722
 723    #$log->debug("add state : " . Dumper($state));
 724
 725    my $rmcount = 0;
 726
 727    foreach my $filename ( @{$state->{args}} )
 728    {
 729        $filename = filecleanup($filename);
 730
 731        if ( defined ( $state->{entries}{$filename}{unchanged} ) or defined ( $state->{entries}{$filename}{modified_filename} ) )
 732        {
 733            print "E cvs remove: file `$filename' still in working directory\n";
 734            next;
 735        }
 736
 737        my $meta = $updater->getmeta($filename);
 738        my $wrev = revparse($filename);
 739
 740        unless ( defined ( $wrev ) )
 741        {
 742            print "E cvs remove: nothing known about `$filename'\n";
 743            next;
 744        }
 745
 746        if ( defined($wrev) and ($wrev=~/^-/) )
 747        {
 748            print "E cvs remove: file `$filename' already scheduled for removal\n";
 749            next;
 750        }
 751
 752        unless ( $wrev eq $meta->{revision} )
 753        {
 754            # TODO : not sure if the format of this message is quite correct.
 755            print "E cvs remove: Up to date check failed for `$filename'\n";
 756            next;
 757        }
 758
 759
 760        my ( $filepart, $dirpart ) = filenamesplit($filename, 1);
 761
 762        print "E cvs remove: scheduling `$filename' for removal\n";
 763
 764        print "Checked-in $dirpart\n";
 765        print "$filename\n";
 766        my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
 767        print "/$filepart/-$wrev//$kopts/\n";
 768
 769        $rmcount++;
 770    }
 771
 772    if ( $rmcount == 1 )
 773    {
 774        print "E cvs remove: use `cvs commit' to remove this file permanently\n";
 775    }
 776    elsif ( $rmcount > 1 )
 777    {
 778        print "E cvs remove: use `cvs commit' to remove these files permanently\n";
 779    }
 780
 781    print "ok\n";
 782}
 783
 784# Modified filename \n
 785#     Response expected: no. Additional data: mode, \n, file transmission. Send
 786#     the server a copy of one locally modified file. filename is a file within
 787#     the most recent directory sent with Directory; it must not contain `/'.
 788#     If the user is operating on only some files in a directory, only those
 789#     files need to be included. This can also be sent without Entry, if there
 790#     is no entry for the file.
 791sub req_Modified
 792{
 793    my ( $cmd, $data ) = @_;
 794
 795    my $mode = <STDIN>;
 796    defined $mode
 797        or (print "E end of file reading mode for $data\n"), return;
 798    chomp $mode;
 799    my $size = <STDIN>;
 800    defined $size
 801        or (print "E end of file reading size of $data\n"), return;
 802    chomp $size;
 803
 804    # Grab config information
 805    my $blocksize = 8192;
 806    my $bytesleft = $size;
 807    my $tmp;
 808
 809    # Get a filehandle/name to write it to
 810    my ( $fh, $filename ) = tempfile( DIR => $TEMP_DIR );
 811
 812    # Loop over file data writing out to temporary file.
 813    while ( $bytesleft )
 814    {
 815        $blocksize = $bytesleft if ( $bytesleft < $blocksize );
 816        read STDIN, $tmp, $blocksize;
 817        print $fh $tmp;
 818        $bytesleft -= $blocksize;
 819    }
 820
 821    close $fh
 822        or (print "E failed to write temporary, $filename: $!\n"), return;
 823
 824    # Ensure we have something sensible for the file mode
 825    if ( $mode =~ /u=(\w+)/ )
 826    {
 827        $mode = $1;
 828    } else {
 829        $mode = "rw";
 830    }
 831
 832    # Save the file data in $state
 833    $state->{entries}{$state->{directory}.$data}{modified_filename} = $filename;
 834    $state->{entries}{$state->{directory}.$data}{modified_mode} = $mode;
 835    $state->{entries}{$state->{directory}.$data}{modified_hash} = `git hash-object $filename`;
 836    $state->{entries}{$state->{directory}.$data}{modified_hash} =~ s/\s.*$//s;
 837
 838    #$log->debug("req_Modified : file=$data mode=$mode size=$size");
 839}
 840
 841# Unchanged filename \n
 842#     Response expected: no. Tell the server that filename has not been
 843#     modified in the checked out directory. The filename is a file within the
 844#     most recent directory sent with Directory; it must not contain `/'.
 845sub req_Unchanged
 846{
 847    my ( $cmd, $data ) = @_;
 848
 849    $state->{entries}{$state->{directory}.$data}{unchanged} = 1;
 850
 851    #$log->debug("req_Unchanged : $data");
 852}
 853
 854# Argument text \n
 855#     Response expected: no. Save argument for use in a subsequent command.
 856#     Arguments accumulate until an argument-using command is given, at which
 857#     point they are forgotten.
 858# Argumentx text \n
 859#     Response expected: no. Append \n followed by text to the current argument
 860#     being saved.
 861sub req_Argument
 862{
 863    my ( $cmd, $data ) = @_;
 864
 865    # Argumentx means: append to last Argument (with a newline in front)
 866
 867    $log->debug("$cmd : $data");
 868
 869    if ( $cmd eq 'Argumentx') {
 870        ${$state->{arguments}}[$#{$state->{arguments}}] .= "\n" . $data;
 871    } else {
 872        push @{$state->{arguments}}, $data;
 873    }
 874}
 875
 876# expand-modules \n
 877#     Response expected: yes. Expand the modules which are specified in the
 878#     arguments. Returns the data in Module-expansion responses. Note that the
 879#     server can assume that this is checkout or export, not rtag or rdiff; the
 880#     latter do not access the working directory and thus have no need to
 881#     expand modules on the client side. Expand may not be the best word for
 882#     what this request does. It does not necessarily tell you all the files
 883#     contained in a module, for example. Basically it is a way of telling you
 884#     which working directories the server needs to know about in order to
 885#     handle a checkout of the specified modules. For example, suppose that the
 886#     server has a module defined by
 887#   aliasmodule -a 1dir
 888#     That is, one can check out aliasmodule and it will take 1dir in the
 889#     repository and check it out to 1dir in the working directory. Now suppose
 890#     the client already has this module checked out and is planning on using
 891#     the co request to update it. Without using expand-modules, the client
 892#     would have two bad choices: it could either send information about all
 893#     working directories under the current directory, which could be
 894#     unnecessarily slow, or it could be ignorant of the fact that aliasmodule
 895#     stands for 1dir, and neglect to send information for 1dir, which would
 896#     lead to incorrect operation. With expand-modules, the client would first
 897#     ask for the module to be expanded:
 898sub req_expandmodules
 899{
 900    my ( $cmd, $data ) = @_;
 901
 902    argsplit();
 903
 904    $log->debug("req_expandmodules : " . ( defined($data) ? $data : "[NULL]" ) );
 905
 906    unless ( ref $state->{arguments} eq "ARRAY" )
 907    {
 908        print "ok\n";
 909        return;
 910    }
 911
 912    foreach my $module ( @{$state->{arguments}} )
 913    {
 914        $log->debug("SEND : Module-expansion $module");
 915        print "Module-expansion $module\n";
 916    }
 917
 918    print "ok\n";
 919    statecleanup();
 920}
 921
 922# co \n
 923#     Response expected: yes. Get files from the repository. This uses any
 924#     previous Argument, Directory, Entry, or Modified requests, if they have
 925#     been sent. Arguments to this command are module names; the client cannot
 926#     know what directories they correspond to except by (1) just sending the
 927#     co request, and then seeing what directory names the server sends back in
 928#     its responses, and (2) the expand-modules request.
 929sub req_co
 930{
 931    my ( $cmd, $data ) = @_;
 932
 933    argsplit("co");
 934
 935    # Provide list of modules, if -c was used.
 936    if (exists $state->{opt}{c}) {
 937        my $showref = `git show-ref --heads`;
 938        for my $line (split '\n', $showref) {
 939            if ( $line =~ m% refs/heads/(.*)$% ) {
 940                print "M $1\t$1\n";
 941            }
 942        }
 943        print "ok\n";
 944        return 1;
 945    }
 946
 947    my $module = $state->{args}[0];
 948    $state->{module} = $module;
 949    my $checkout_path = $module;
 950
 951    # use the user specified directory if we're given it
 952    $checkout_path = $state->{opt}{d} if ( exists ( $state->{opt}{d} ) );
 953
 954    $log->debug("req_co : " . ( defined($data) ? $data : "[NULL]" ) );
 955
 956    $log->info("Checking out module '$module' ($state->{CVSROOT}) to '$checkout_path'");
 957
 958    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
 959
 960    # Grab a handle to the SQLite db and do any necessary updates
 961    my $updater = GITCVS::updater->new($state->{CVSROOT}, $module, $log);
 962    $updater->update();
 963
 964    $checkout_path =~ s|/$||; # get rid of trailing slashes
 965
 966    # Eclipse seems to need the Clear-sticky command
 967    # to prepare the 'Entries' file for the new directory.
 968    print "Clear-sticky $checkout_path/\n";
 969    print $state->{CVSROOT} . "/$module/\n";
 970    print "Clear-static-directory $checkout_path/\n";
 971    print $state->{CVSROOT} . "/$module/\n";
 972    print "Clear-sticky $checkout_path/\n"; # yes, twice
 973    print $state->{CVSROOT} . "/$module/\n";
 974    print "Template $checkout_path/\n";
 975    print $state->{CVSROOT} . "/$module/\n";
 976    print "0\n";
 977
 978    # instruct the client that we're checking out to $checkout_path
 979    print "E cvs checkout: Updating $checkout_path\n";
 980
 981    my %seendirs = ();
 982    my $lastdir ='';
 983
 984    # recursive
 985    sub prepdir {
 986       my ($dir, $repodir, $remotedir, $seendirs) = @_;
 987       my $parent = dirname($dir);
 988       $dir       =~ s|/+$||;
 989       $repodir   =~ s|/+$||;
 990       $remotedir =~ s|/+$||;
 991       $parent    =~ s|/+$||;
 992       $log->debug("announcedir $dir, $repodir, $remotedir" );
 993
 994       if ($parent eq '.' || $parent eq './') {
 995           $parent = '';
 996       }
 997       # recurse to announce unseen parents first
 998       if (length($parent) && !exists($seendirs->{$parent})) {
 999           prepdir($parent, $repodir, $remotedir, $seendirs);
1000       }
1001       # Announce that we are going to modify at the parent level
1002       if ($parent) {
1003           print "E cvs checkout: Updating $remotedir/$parent\n";
1004       } else {
1005           print "E cvs checkout: Updating $remotedir\n";
1006       }
1007       print "Clear-sticky $remotedir/$parent/\n";
1008       print "$repodir/$parent/\n";
1009
1010       print "Clear-static-directory $remotedir/$dir/\n";
1011       print "$repodir/$dir/\n";
1012       print "Clear-sticky $remotedir/$parent/\n"; # yes, twice
1013       print "$repodir/$parent/\n";
1014       print "Template $remotedir/$dir/\n";
1015       print "$repodir/$dir/\n";
1016       print "0\n";
1017
1018       $seendirs->{$dir} = 1;
1019    }
1020
1021    foreach my $git ( @{$updater->gethead} )
1022    {
1023        # Don't want to check out deleted files
1024        next if ( $git->{filehash} eq "deleted" );
1025
1026        my $fullName = $git->{name};
1027        ( $git->{name}, $git->{dir} ) = filenamesplit($git->{name});
1028
1029       if (length($git->{dir}) && $git->{dir} ne './'
1030           && $git->{dir} ne $lastdir ) {
1031           unless (exists($seendirs{$git->{dir}})) {
1032               prepdir($git->{dir}, $state->{CVSROOT} . "/$module/",
1033                       $checkout_path, \%seendirs);
1034               $lastdir = $git->{dir};
1035               $seendirs{$git->{dir}} = 1;
1036           }
1037           print "E cvs checkout: Updating /$checkout_path/$git->{dir}\n";
1038       }
1039
1040        # modification time of this file
1041        print "Mod-time $git->{modified}\n";
1042
1043        # print some information to the client
1044        if ( defined ( $git->{dir} ) and $git->{dir} ne "./" )
1045        {
1046            print "M U $checkout_path/$git->{dir}$git->{name}\n";
1047        } else {
1048            print "M U $checkout_path/$git->{name}\n";
1049        }
1050
1051       # instruct client we're sending a file to put in this path
1052       print "Created $checkout_path/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "\n";
1053
1054       print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
1055
1056        # this is an "entries" line
1057        my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});
1058        print "/$git->{name}/$git->{revision}//$kopts/\n";
1059        # permissions
1060        print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
1061
1062        # transmit file
1063        transmitfile($git->{filehash});
1064    }
1065
1066    print "ok\n";
1067
1068    statecleanup();
1069}
1070
1071# update \n
1072#     Response expected: yes. Actually do a cvs update command. This uses any
1073#     previous Argument, Directory, Entry, or Modified requests, if they have
1074#     been sent. The last Directory sent specifies the working directory at the
1075#     time of the operation. The -I option is not used--files which the client
1076#     can decide whether to ignore are not mentioned and the client sends the
1077#     Questionable request for others.
1078sub req_update
1079{
1080    my ( $cmd, $data ) = @_;
1081
1082    $log->debug("req_update : " . ( defined($data) ? $data : "[NULL]" ));
1083
1084    argsplit("update");
1085
1086    #
1087    # It may just be a client exploring the available heads/modules
1088    # in that case, list them as top level directories and leave it
1089    # at that. Eclipse uses this technique to offer you a list of
1090    # projects (heads in this case) to checkout.
1091    #
1092    if ($state->{module} eq '') {
1093        my $showref = `git show-ref --heads`;
1094        print "E cvs update: Updating .\n";
1095        for my $line (split '\n', $showref) {
1096            if ( $line =~ m% refs/heads/(.*)$% ) {
1097                print "E cvs update: New directory `$1'\n";
1098            }
1099        }
1100        print "ok\n";
1101        return 1;
1102    }
1103
1104
1105    # Grab a handle to the SQLite db and do any necessary updates
1106    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1107
1108    $updater->update();
1109
1110    argsfromdir($updater);
1111
1112    #$log->debug("update state : " . Dumper($state));
1113
1114    my $last_dirname = "///";
1115
1116    # foreach file specified on the command line ...
1117    foreach my $filename ( @{$state->{args}} )
1118    {
1119        $filename = filecleanup($filename);
1120
1121        $log->debug("Processing file $filename");
1122
1123        unless ( $state->{globaloptions}{-Q} || $state->{globaloptions}{-q} )
1124        {
1125            my $cur_dirname = dirname($filename);
1126            if ( $cur_dirname ne $last_dirname )
1127            {
1128                $last_dirname = $cur_dirname;
1129                if ( $cur_dirname eq "" )
1130                {
1131                    $cur_dirname = ".";
1132                }
1133                print "E cvs update: Updating $cur_dirname\n";
1134            }
1135        }
1136
1137        # if we have a -C we should pretend we never saw modified stuff
1138        if ( exists ( $state->{opt}{C} ) )
1139        {
1140            delete $state->{entries}{$filename}{modified_hash};
1141            delete $state->{entries}{$filename}{modified_filename};
1142            $state->{entries}{$filename}{unchanged} = 1;
1143        }
1144
1145        my $meta;
1146        if ( defined($state->{opt}{r}) and $state->{opt}{r} =~ /^(1\.\d+)$/ )
1147        {
1148            $meta = $updater->getmeta($filename, $1);
1149        } else {
1150            $meta = $updater->getmeta($filename);
1151        }
1152
1153        # If -p was given, "print" the contents of the requested revision.
1154        if ( exists ( $state->{opt}{p} ) ) {
1155            if ( defined ( $meta->{revision} ) ) {
1156                $log->info("Printing '$filename' revision " . $meta->{revision});
1157
1158                transmitfile($meta->{filehash}, { print => 1 });
1159            }
1160
1161            next;
1162        }
1163
1164        if ( ! defined $meta )
1165        {
1166            $meta = {
1167                name => $filename,
1168                revision => '0',
1169                filehash => 'added'
1170            };
1171        }
1172
1173        my $oldmeta = $meta;
1174
1175        my $wrev = revparse($filename);
1176
1177        # If the working copy is an old revision, lets get that version too for comparison.
1178        if ( defined($wrev) and $wrev ne $meta->{revision} )
1179        {
1180            $oldmeta = $updater->getmeta($filename, $wrev);
1181        }
1182
1183        #$log->debug("Target revision is $meta->{revision}, current working revision is $wrev");
1184
1185        # Files are up to date if the working copy and repo copy have the same revision,
1186        # and the working copy is unmodified _and_ the user hasn't specified -C
1187        next if ( defined ( $wrev )
1188                  and defined($meta->{revision})
1189                  and $wrev eq $meta->{revision}
1190                  and $state->{entries}{$filename}{unchanged}
1191                  and not exists ( $state->{opt}{C} ) );
1192
1193        # If the working copy and repo copy have the same revision,
1194        # but the working copy is modified, tell the client it's modified
1195        if ( defined ( $wrev )
1196             and defined($meta->{revision})
1197             and $wrev eq $meta->{revision}
1198             and defined($state->{entries}{$filename}{modified_hash})
1199             and not exists ( $state->{opt}{C} ) )
1200        {
1201            $log->info("Tell the client the file is modified");
1202            print "MT text M \n";
1203            print "MT fname $filename\n";
1204            print "MT newline\n";
1205            next;
1206        }
1207
1208        if ( $meta->{filehash} eq "deleted" )
1209        {
1210            # TODO: If it has been modified in the sandbox, error out
1211            #   with the appropriate message, rather than deleting a modified
1212            #   file.
1213
1214            my ( $filepart, $dirpart ) = filenamesplit($filename,1);
1215
1216            $log->info("Removing '$filename' from working copy (no longer in the repo)");
1217
1218            print "E cvs update: `$filename' is no longer in the repository\n";
1219            # Don't want to actually _DO_ the update if -n specified
1220            unless ( $state->{globaloptions}{-n} ) {
1221                print "Removed $dirpart\n";
1222                print "$filepart\n";
1223            }
1224        }
1225        elsif ( not defined ( $state->{entries}{$filename}{modified_hash} )
1226                or $state->{entries}{$filename}{modified_hash} eq $oldmeta->{filehash}
1227                or $meta->{filehash} eq 'added' )
1228        {
1229            # normal update, just send the new revision (either U=Update,
1230            # or A=Add, or R=Remove)
1231            if ( defined($wrev) && ($wrev=~/^-/) )
1232            {
1233                $log->info("Tell the client the file is scheduled for removal");
1234                print "MT text R \n";
1235                print "MT fname $filename\n";
1236                print "MT newline\n";
1237                next;
1238            }
1239            elsif ( (!defined($wrev) || $wrev eq '0') &&
1240                    (!defined($meta->{revision}) || $meta->{revision} eq '0') )
1241            {
1242                $log->info("Tell the client the file is scheduled for addition");
1243                print "MT text A \n";
1244                print "MT fname $filename\n";
1245                print "MT newline\n";
1246                next;
1247
1248            }
1249            else {
1250                $log->info("UpdatingX3 '$filename' to ".$meta->{revision});
1251                print "MT +updated\n";
1252                print "MT text U \n";
1253                print "MT fname $filename\n";
1254                print "MT newline\n";
1255                print "MT -updated\n";
1256            }
1257
1258            my ( $filepart, $dirpart ) = filenamesplit($filename,1);
1259
1260            # Don't want to actually _DO_ the update if -n specified
1261            unless ( $state->{globaloptions}{-n} )
1262            {
1263                if ( defined ( $wrev ) )
1264                {
1265                    # instruct client we're sending a file to put in this path as a replacement
1266                    print "Update-existing $dirpart\n";
1267                    $log->debug("Updating existing file 'Update-existing $dirpart'");
1268                } else {
1269                    # instruct client we're sending a file to put in this path as a new file
1270                    print "Clear-static-directory $dirpart\n";
1271                    print $state->{CVSROOT} . "/$state->{module}/$dirpart\n";
1272                    print "Clear-sticky $dirpart\n";
1273                    print $state->{CVSROOT} . "/$state->{module}/$dirpart\n";
1274
1275                    $log->debug("Creating new file 'Created $dirpart'");
1276                    print "Created $dirpart\n";
1277                }
1278                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
1279
1280                # this is an "entries" line
1281                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
1282                $log->debug("/$filepart/$meta->{revision}//$kopts/");
1283                print "/$filepart/$meta->{revision}//$kopts/\n";
1284
1285                # permissions
1286                $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
1287                print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
1288
1289                # transmit file
1290                transmitfile($meta->{filehash});
1291            }
1292        } else {
1293            my ( $filepart, $dirpart ) = filenamesplit($meta->{name},1);
1294
1295            my $mergeDir = setupTmpDir();
1296
1297            my $file_local = $filepart . ".mine";
1298            my $mergedFile = "$mergeDir/$file_local";
1299            system("ln","-s",$state->{entries}{$filename}{modified_filename}, $file_local);
1300            my $file_old = $filepart . "." . $oldmeta->{revision};
1301            transmitfile($oldmeta->{filehash}, { targetfile => $file_old });
1302            my $file_new = $filepart . "." . $meta->{revision};
1303            transmitfile($meta->{filehash}, { targetfile => $file_new });
1304
1305            # we need to merge with the local changes ( M=successful merge, C=conflict merge )
1306            $log->info("Merging $file_local, $file_old, $file_new");
1307            print "M Merging differences between $oldmeta->{revision} and $meta->{revision} into $filename\n";
1308
1309            $log->debug("Temporary directory for merge is $mergeDir");
1310
1311            my $return = system("git", "merge-file", $file_local, $file_old, $file_new);
1312            $return >>= 8;
1313
1314            cleanupTmpDir();
1315
1316            if ( $return == 0 )
1317            {
1318                $log->info("Merged successfully");
1319                print "M M $filename\n";
1320                $log->debug("Merged $dirpart");
1321
1322                # Don't want to actually _DO_ the update if -n specified
1323                unless ( $state->{globaloptions}{-n} )
1324                {
1325                    print "Merged $dirpart\n";
1326                    $log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
1327                    print $state->{CVSROOT} . "/$state->{module}/$filename\n";
1328                    my $kopts = kopts_from_path("$dirpart/$filepart",
1329                                                "file",$mergedFile);
1330                    $log->debug("/$filepart/$meta->{revision}//$kopts/");
1331                    print "/$filepart/$meta->{revision}//$kopts/\n";
1332                }
1333            }
1334            elsif ( $return == 1 )
1335            {
1336                $log->info("Merged with conflicts");
1337                print "E cvs update: conflicts found in $filename\n";
1338                print "M C $filename\n";
1339
1340                # Don't want to actually _DO_ the update if -n specified
1341                unless ( $state->{globaloptions}{-n} )
1342                {
1343                    print "Merged $dirpart\n";
1344                    print $state->{CVSROOT} . "/$state->{module}/$filename\n";
1345                    my $kopts = kopts_from_path("$dirpart/$filepart",
1346                                                "file",$mergedFile);
1347                    print "/$filepart/$meta->{revision}/+/$kopts/\n";
1348                }
1349            }
1350            else
1351            {
1352                $log->warn("Merge failed");
1353                next;
1354            }
1355
1356            # Don't want to actually _DO_ the update if -n specified
1357            unless ( $state->{globaloptions}{-n} )
1358            {
1359                # permissions
1360                $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
1361                print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
1362
1363                # transmit file, format is single integer on a line by itself (file
1364                # size) followed by the file contents
1365                # TODO : we should copy files in blocks
1366                my $data = `cat $mergedFile`;
1367                $log->debug("File size : " . length($data));
1368                print length($data) . "\n";
1369                print $data;
1370            }
1371        }
1372
1373    }
1374
1375    print "ok\n";
1376}
1377
1378sub req_ci
1379{
1380    my ( $cmd, $data ) = @_;
1381
1382    argsplit("ci");
1383
1384    #$log->debug("State : " . Dumper($state));
1385
1386    $log->info("req_ci : " . ( defined($data) ? $data : "[NULL]" ));
1387
1388    if ( $state->{method} eq 'pserver' and $state->{user} eq 'anonymous' )
1389    {
1390        print "error 1 anonymous user cannot commit via pserver\n";
1391        cleanupWorkTree();
1392        exit;
1393    }
1394
1395    if ( -e $state->{CVSROOT} . "/index" )
1396    {
1397        $log->warn("file 'index' already exists in the git repository");
1398        print "error 1 Index already exists in git repo\n";
1399        cleanupWorkTree();
1400        exit;
1401    }
1402
1403    # Grab a handle to the SQLite db and do any necessary updates
1404    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1405    $updater->update();
1406
1407    # Remember where the head was at the beginning.
1408    my $parenthash = `git show-ref -s refs/heads/$state->{module}`;
1409    chomp $parenthash;
1410    if ($parenthash !~ /^[0-9a-f]{40}$/) {
1411            print "error 1 pserver cannot find the current HEAD of module";
1412            cleanupWorkTree();
1413            exit;
1414    }
1415
1416    setupWorkTree($parenthash);
1417
1418    $log->info("Lockless commit start, basing commit on '$work->{workDir}', index file is '$work->{index}'");
1419
1420    $log->info("Created index '$work->{index}' for head $state->{module} - exit status $?");
1421
1422    my @committedfiles = ();
1423    my %oldmeta;
1424
1425    # foreach file specified on the command line ...
1426    foreach my $filename ( @{$state->{args}} )
1427    {
1428        my $committedfile = $filename;
1429        $filename = filecleanup($filename);
1430
1431        next unless ( exists $state->{entries}{$filename}{modified_filename} or not $state->{entries}{$filename}{unchanged} );
1432
1433        my $meta = $updater->getmeta($filename);
1434        $oldmeta{$filename} = $meta;
1435
1436        my $wrev = revparse($filename);
1437
1438        my ( $filepart, $dirpart ) = filenamesplit($filename);
1439
1440        # do a checkout of the file if it is part of this tree
1441        if ($wrev) {
1442            system('git', 'checkout-index', '-f', '-u', $filename);
1443            unless ($? == 0) {
1444                die "Error running git-checkout-index -f -u $filename : $!";
1445            }
1446        }
1447
1448        my $addflag = 0;
1449        my $rmflag = 0;
1450        $rmflag = 1 if ( defined($wrev) and ($wrev=~/^-/) );
1451        $addflag = 1 unless ( -e $filename );
1452
1453        # Do up to date checking
1454        unless ( $addflag or $wrev eq $meta->{revision} or
1455                 ( $rmflag and $wrev eq "-$meta->{revision}" ) )
1456        {
1457            # fail everything if an up to date check fails
1458            print "error 1 Up to date check failed for $filename\n";
1459            cleanupWorkTree();
1460            exit;
1461        }
1462
1463        push @committedfiles, $committedfile;
1464        $log->info("Committing $filename");
1465
1466        system("mkdir","-p",$dirpart) unless ( -d $dirpart );
1467
1468        unless ( $rmflag )
1469        {
1470            $log->debug("rename $state->{entries}{$filename}{modified_filename} $filename");
1471            rename $state->{entries}{$filename}{modified_filename},$filename;
1472
1473            # Calculate modes to remove
1474            my $invmode = "";
1475            foreach ( qw (r w x) ) { $invmode .= $_ unless ( $state->{entries}{$filename}{modified_mode} =~ /$_/ ); }
1476
1477            $log->debug("chmod u+" . $state->{entries}{$filename}{modified_mode} . "-" . $invmode . " $filename");
1478            system("chmod","u+" .  $state->{entries}{$filename}{modified_mode} . "-" . $invmode, $filename);
1479        }
1480
1481        if ( $rmflag )
1482        {
1483            $log->info("Removing file '$filename'");
1484            unlink($filename);
1485            system("git", "update-index", "--remove", $filename);
1486        }
1487        elsif ( $addflag )
1488        {
1489            $log->info("Adding file '$filename'");
1490            system("git", "update-index", "--add", $filename);
1491        } else {
1492            $log->info("UpdatingX2 file '$filename'");
1493            system("git", "update-index", $filename);
1494        }
1495    }
1496
1497    unless ( scalar(@committedfiles) > 0 )
1498    {
1499        print "E No files to commit\n";
1500        print "ok\n";
1501        cleanupWorkTree();
1502        return;
1503    }
1504
1505    my $treehash = `git write-tree`;
1506    chomp $treehash;
1507
1508    $log->debug("Treehash : $treehash, Parenthash : $parenthash");
1509
1510    # write our commit message out if we have one ...
1511    my ( $msg_fh, $msg_filename ) = tempfile( DIR => $TEMP_DIR );
1512    print $msg_fh $state->{opt}{m};# if ( exists ( $state->{opt}{m} ) );
1513    if ( defined ( $cfg->{gitcvs}{commitmsgannotation} ) ) {
1514        if ($cfg->{gitcvs}{commitmsgannotation} !~ /^\s*$/ ) {
1515            print $msg_fh "\n\n".$cfg->{gitcvs}{commitmsgannotation}."\n"
1516        }
1517    } else {
1518        print $msg_fh "\n\nvia git-CVS emulator\n";
1519    }
1520    close $msg_fh;
1521
1522    my $commithash = `git commit-tree $treehash -p $parenthash < $msg_filename`;
1523    chomp($commithash);
1524    $log->info("Commit hash : $commithash");
1525
1526    unless ( $commithash =~ /[a-zA-Z0-9]{40}/ )
1527    {
1528        $log->warn("Commit failed (Invalid commit hash)");
1529        print "error 1 Commit failed (unknown reason)\n";
1530        cleanupWorkTree();
1531        exit;
1532    }
1533
1534        ### Emulate git-receive-pack by running hooks/update
1535        my @hook = ( $ENV{GIT_DIR}.'hooks/update', "refs/heads/$state->{module}",
1536                        $parenthash, $commithash );
1537        if( -x $hook[0] ) {
1538                unless( system( @hook ) == 0 )
1539                {
1540                        $log->warn("Commit failed (update hook declined to update ref)");
1541                        print "error 1 Commit failed (update hook declined)\n";
1542                        cleanupWorkTree();
1543                        exit;
1544                }
1545        }
1546
1547        ### Update the ref
1548        if (system(qw(git update-ref -m), "cvsserver ci",
1549                        "refs/heads/$state->{module}", $commithash, $parenthash)) {
1550                $log->warn("update-ref for $state->{module} failed.");
1551                print "error 1 Cannot commit -- update first\n";
1552                cleanupWorkTree();
1553                exit;
1554        }
1555
1556        ### Emulate git-receive-pack by running hooks/post-receive
1557        my $hook = $ENV{GIT_DIR}.'hooks/post-receive';
1558        if( -x $hook ) {
1559                open(my $pipe, "| $hook") || die "can't fork $!";
1560
1561                local $SIG{PIPE} = sub { die 'pipe broke' };
1562
1563                print $pipe "$parenthash $commithash refs/heads/$state->{module}\n";
1564
1565                close $pipe || die "bad pipe: $! $?";
1566        }
1567
1568    $updater->update();
1569
1570        ### Then hooks/post-update
1571        $hook = $ENV{GIT_DIR}.'hooks/post-update';
1572        if (-x $hook) {
1573                system($hook, "refs/heads/$state->{module}");
1574        }
1575
1576    # foreach file specified on the command line ...
1577    foreach my $filename ( @committedfiles )
1578    {
1579        $filename = filecleanup($filename);
1580
1581        my $meta = $updater->getmeta($filename);
1582        unless (defined $meta->{revision}) {
1583          $meta->{revision} = "1.1";
1584        }
1585
1586        my ( $filepart, $dirpart ) = filenamesplit($filename, 1);
1587
1588        $log->debug("Checked-in $dirpart : $filename");
1589
1590        print "M $state->{CVSROOT}/$state->{module}/$filename,v  <--  $dirpart$filepart\n";
1591        if ( defined $meta->{filehash} && $meta->{filehash} eq "deleted" )
1592        {
1593            print "M new revision: delete; previous revision: $oldmeta{$filename}{revision}\n";
1594            print "Remove-entry $dirpart\n";
1595            print "$filename\n";
1596        } else {
1597            if ($meta->{revision} eq "1.1") {
1598                print "M initial revision: 1.1\n";
1599            } else {
1600                print "M new revision: $meta->{revision}; previous revision: $oldmeta{$filename}{revision}\n";
1601            }
1602            print "Checked-in $dirpart\n";
1603            print "$filename\n";
1604            my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
1605            print "/$filepart/$meta->{revision}//$kopts/\n";
1606        }
1607    }
1608
1609    cleanupWorkTree();
1610    print "ok\n";
1611}
1612
1613sub req_status
1614{
1615    my ( $cmd, $data ) = @_;
1616
1617    argsplit("status");
1618
1619    $log->info("req_status : " . ( defined($data) ? $data : "[NULL]" ));
1620    #$log->debug("status state : " . Dumper($state));
1621
1622    # Grab a handle to the SQLite db and do any necessary updates
1623    my $updater;
1624    $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1625    $updater->update();
1626
1627    # if no files were specified, we need to work out what files we should
1628    # be providing status on ...
1629    argsfromdir($updater);
1630
1631    # foreach file specified on the command line ...
1632    foreach my $filename ( @{$state->{args}} )
1633    {
1634        $filename = filecleanup($filename);
1635
1636        if ( exists($state->{opt}{l}) &&
1637             index($filename, '/', length($state->{prependdir})) >= 0 )
1638        {
1639           next;
1640        }
1641
1642        my $meta = $updater->getmeta($filename);
1643        my $oldmeta = $meta;
1644
1645        my $wrev = revparse($filename);
1646
1647        # If the working copy is an old revision, lets get that
1648        # version too for comparison.
1649        if ( defined($wrev) and $wrev ne $meta->{revision} )
1650        {
1651            $oldmeta = $updater->getmeta($filename, $wrev);
1652        }
1653
1654        # TODO : All possible statuses aren't yet implemented
1655        my $status;
1656        # Files are up to date if the working copy and repo copy have
1657        # the same revision, and the working copy is unmodified
1658        if ( defined ( $wrev ) and defined($meta->{revision}) and
1659             $wrev eq $meta->{revision} and
1660             ( ( $state->{entries}{$filename}{unchanged} and
1661                 ( not defined ( $state->{entries}{$filename}{conflict} ) or
1662                   $state->{entries}{$filename}{conflict} !~ /^\+=/ ) ) or
1663               ( defined($state->{entries}{$filename}{modified_hash}) and
1664                 $state->{entries}{$filename}{modified_hash} eq
1665                        $meta->{filehash} ) ) )
1666        {
1667            $status = "Up-to-date"
1668        }
1669
1670        # Need checkout if the working copy has a different (usually
1671        # older) revision than the repo copy, and the working copy is
1672        # unmodified
1673        if ( defined ( $wrev ) and defined ( $meta->{revision} ) and
1674             $meta->{revision} ne $wrev and
1675             ( $state->{entries}{$filename}{unchanged} or
1676               ( defined($state->{entries}{$filename}{modified_hash}) and
1677                 $state->{entries}{$filename}{modified_hash} eq
1678                                $oldmeta->{filehash} ) ) )
1679        {
1680            $status ||= "Needs Checkout";
1681        }
1682
1683        # Need checkout if it exists in the repo but doesn't have a working
1684        # copy
1685        if ( not defined ( $wrev ) and defined ( $meta->{revision} ) )
1686        {
1687            $status ||= "Needs Checkout";
1688        }
1689
1690        # Locally modified if working copy and repo copy have the
1691        # same revision but there are local changes
1692        if ( defined ( $wrev ) and defined($meta->{revision}) and
1693             $wrev eq $meta->{revision} and
1694             $state->{entries}{$filename}{modified_filename} )
1695        {
1696            $status ||= "Locally Modified";
1697        }
1698
1699        # Needs Merge if working copy revision is different
1700        # (usually older) than repo copy and there are local changes
1701        if ( defined ( $wrev ) and defined ( $meta->{revision} ) and
1702             $meta->{revision} ne $wrev and
1703             $state->{entries}{$filename}{modified_filename} )
1704        {
1705            $status ||= "Needs Merge";
1706        }
1707
1708        if ( defined ( $state->{entries}{$filename}{revision} ) and
1709             not defined ( $meta->{revision} ) )
1710        {
1711            $status ||= "Locally Added";
1712        }
1713        if ( defined ( $wrev ) and defined ( $meta->{revision} ) and
1714             $wrev eq "-$meta->{revision}" )
1715        {
1716            $status ||= "Locally Removed";
1717        }
1718        if ( defined ( $state->{entries}{$filename}{conflict} ) and
1719             $state->{entries}{$filename}{conflict} =~ /^\+=/ )
1720        {
1721            $status ||= "Unresolved Conflict";
1722        }
1723        if ( 0 )
1724        {
1725            $status ||= "File had conflicts on merge";
1726        }
1727
1728        $status ||= "Unknown";
1729
1730        my ($filepart) = filenamesplit($filename);
1731
1732        print "M =======" . ( "=" x 60 ) . "\n";
1733        print "M File: $filepart\tStatus: $status\n";
1734        if ( defined($state->{entries}{$filename}{revision}) )
1735        {
1736            print "M Working revision:\t" .
1737                  $state->{entries}{$filename}{revision} . "\n";
1738        } else {
1739            print "M Working revision:\tNo entry for $filename\n";
1740        }
1741        if ( defined($meta->{revision}) )
1742        {
1743            print "M Repository revision:\t" .
1744                   $meta->{revision} .
1745                   "\t$state->{CVSROOT}/$state->{module}/$filename,v\n";
1746            my($tagOrDate)=$state->{entries}{$filename}{tag_or_date};
1747            my($tag)=($tagOrDate=~m/^T(.+)$/);
1748            if( !defined($tag) )
1749            {
1750                $tag="(none)";
1751            }
1752            print "M Sticky Tag:\t\t$tag\n";
1753            my($date)=($tagOrDate=~m/^D(.+)$/);
1754            if( !defined($date) )
1755            {
1756                $date="(none)";
1757            }
1758            print "M Sticky Date:\t\t$date\n";
1759            my($options)=$state->{entries}{$filename}{options};
1760            if( $options eq "" )
1761            {
1762                $options="(none)";
1763            }
1764            print "M Sticky Options:\t\t$options\n";
1765        } else {
1766            print "M Repository revision:\tNo revision control file\n";
1767        }
1768        print "M\n";
1769    }
1770
1771    print "ok\n";
1772}
1773
1774sub req_diff
1775{
1776    my ( $cmd, $data ) = @_;
1777
1778    argsplit("diff");
1779
1780    $log->debug("req_diff : " . ( defined($data) ? $data : "[NULL]" ));
1781    #$log->debug("status state : " . Dumper($state));
1782
1783    my ($revision1, $revision2);
1784    if ( defined ( $state->{opt}{r} ) and ref $state->{opt}{r} eq "ARRAY" )
1785    {
1786        $revision1 = $state->{opt}{r}[0];
1787        $revision2 = $state->{opt}{r}[1];
1788    } else {
1789        $revision1 = $state->{opt}{r};
1790    }
1791
1792    $log->debug("Diffing revisions " .
1793                ( defined($revision1) ? $revision1 : "[NULL]" ) .
1794                " and " . ( defined($revision2) ? $revision2 : "[NULL]" ) );
1795
1796    # Grab a handle to the SQLite db and do any necessary updates
1797    my $updater;
1798    $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1799    $updater->update();
1800
1801    # if no files were specified, we need to work out what files we should
1802    # be providing status on ...
1803    argsfromdir($updater);
1804
1805    # foreach file specified on the command line ...
1806    foreach my $filename ( @{$state->{args}} )
1807    {
1808        $filename = filecleanup($filename);
1809
1810        my ( $fh, $file1, $file2, $meta1, $meta2, $filediff );
1811
1812        my $wrev = revparse($filename);
1813
1814        # We need _something_ to diff against
1815        next unless ( defined ( $wrev ) );
1816
1817        # if we have a -r switch, use it
1818        if ( defined ( $revision1 ) )
1819        {
1820            ( undef, $file1 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1821            $meta1 = $updater->getmeta($filename, $revision1);
1822            unless ( defined ( $meta1 ) and $meta1->{filehash} ne "deleted" )
1823            {
1824                print "E File $filename at revision $revision1 doesn't exist\n";
1825                next;
1826            }
1827            transmitfile($meta1->{filehash}, { targetfile => $file1 });
1828        }
1829        # otherwise we just use the working copy revision
1830        else
1831        {
1832            ( undef, $file1 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1833            $meta1 = $updater->getmeta($filename, $wrev);
1834            transmitfile($meta1->{filehash}, { targetfile => $file1 });
1835        }
1836
1837        # if we have a second -r switch, use it too
1838        if ( defined ( $revision2 ) )
1839        {
1840            ( undef, $file2 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1841            $meta2 = $updater->getmeta($filename, $revision2);
1842
1843            unless ( defined ( $meta2 ) and $meta2->{filehash} ne "deleted" )
1844            {
1845                print "E File $filename at revision $revision2 doesn't exist\n";
1846                next;
1847            }
1848
1849            transmitfile($meta2->{filehash}, { targetfile => $file2 });
1850        }
1851        # otherwise we just use the working copy
1852        else
1853        {
1854            $file2 = $state->{entries}{$filename}{modified_filename};
1855        }
1856
1857        # if we have been given -r, and we don't have a $file2 yet, lets
1858        # get one
1859        if ( defined ( $revision1 ) and not defined ( $file2 ) )
1860        {
1861            ( undef, $file2 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
1862            $meta2 = $updater->getmeta($filename, $wrev);
1863            transmitfile($meta2->{filehash}, { targetfile => $file2 });
1864        }
1865
1866        # We need to have retrieved something useful
1867        next unless ( defined ( $meta1 ) );
1868
1869        # Files to date if the working copy and repo copy have the same
1870        # revision, and the working copy is unmodified
1871        if ( not defined ( $meta2 ) and $wrev eq $meta1->{revision} and
1872             ( ( $state->{entries}{$filename}{unchanged} and
1873                 ( not defined ( $state->{entries}{$filename}{conflict} ) or
1874                   $state->{entries}{$filename}{conflict} !~ /^\+=/ ) ) or
1875               ( defined($state->{entries}{$filename}{modified_hash}) and
1876                 $state->{entries}{$filename}{modified_hash} eq
1877                        $meta1->{filehash} ) ) )
1878        {
1879            next;
1880        }
1881
1882        # Apparently we only show diffs for locally modified files
1883        unless ( defined($meta2) or
1884                 defined ( $state->{entries}{$filename}{modified_filename} ) )
1885        {
1886            next;
1887        }
1888
1889        print "M Index: $filename\n";
1890        print "M =======" . ( "=" x 60 ) . "\n";
1891        print "M RCS file: $state->{CVSROOT}/$state->{module}/$filename,v\n";
1892        if ( defined ( $meta1 ) )
1893        {
1894            print "M retrieving revision $meta1->{revision}\n"
1895        }
1896        if ( defined ( $meta2 ) )
1897        {
1898            print "M retrieving revision $meta2->{revision}\n"
1899        }
1900        print "M diff ";
1901        foreach my $opt ( keys %{$state->{opt}} )
1902        {
1903            if ( ref $state->{opt}{$opt} eq "ARRAY" )
1904            {
1905                foreach my $value ( @{$state->{opt}{$opt}} )
1906                {
1907                    print "-$opt $value ";
1908                }
1909            } else {
1910                print "-$opt ";
1911                if ( defined ( $state->{opt}{$opt} ) )
1912                {
1913                    print "$state->{opt}{$opt} "
1914                }
1915            }
1916        }
1917        print "$filename\n";
1918
1919        $log->info("Diffing $filename -r $meta1->{revision} -r " .
1920                   ( $meta2->{revision} or "workingcopy" ));
1921
1922        ( $fh, $filediff ) = tempfile ( DIR => $TEMP_DIR );
1923
1924        if ( exists $state->{opt}{u} )
1925        {
1926            system("diff -u -L '$filename revision $meta1->{revision}'" .
1927                        " -L '$filename " .
1928                        ( defined($meta2->{revision}) ?
1929                                "revision $meta2->{revision}" :
1930                                "working copy" ) .
1931                        "' $file1 $file2 > $filediff" );
1932        } else {
1933            system("diff $file1 $file2 > $filediff");
1934        }
1935
1936        while ( <$fh> )
1937        {
1938            print "M $_";
1939        }
1940        close $fh;
1941    }
1942
1943    print "ok\n";
1944}
1945
1946sub req_log
1947{
1948    my ( $cmd, $data ) = @_;
1949
1950    argsplit("log");
1951
1952    $log->debug("req_log : " . ( defined($data) ? $data : "[NULL]" ));
1953    #$log->debug("log state : " . Dumper($state));
1954
1955    my ( $revFilter );
1956    if ( defined ( $state->{opt}{r} ) )
1957    {
1958        $revFilter = $state->{opt}{r};
1959    }
1960
1961    # Grab a handle to the SQLite db and do any necessary updates
1962    my $updater;
1963    $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
1964    $updater->update();
1965
1966    # if no files were specified, we need to work out what files we
1967    # should be providing status on ...
1968    argsfromdir($updater);
1969
1970    # foreach file specified on the command line ...
1971    foreach my $filename ( @{$state->{args}} )
1972    {
1973        $filename = filecleanup($filename);
1974
1975        my $headmeta = $updater->getmeta($filename);
1976
1977        my ($revisions,$totalrevisions) = $updater->getlog($filename,
1978                                                           $revFilter);
1979
1980        next unless ( scalar(@$revisions) );
1981
1982        print "M \n";
1983        print "M RCS file: $state->{CVSROOT}/$state->{module}/$filename,v\n";
1984        print "M Working file: $filename\n";
1985        print "M head: $headmeta->{revision}\n";
1986        print "M branch:\n";
1987        print "M locks: strict\n";
1988        print "M access list:\n";
1989        print "M symbolic names:\n";
1990        print "M keyword substitution: kv\n";
1991        print "M total revisions: $totalrevisions;\tselected revisions: " .
1992              scalar(@$revisions) . "\n";
1993        print "M description:\n";
1994
1995        foreach my $revision ( @$revisions )
1996        {
1997            print "M ----------------------------\n";
1998            print "M revision $revision->{revision}\n";
1999            # reformat the date for log output
2000            if ( $revision->{modified} =~ /(\d+)\s+(\w+)\s+(\d+)\s+(\S+)/ and
2001                 defined($DATE_LIST->{$2}) )
2002            {
2003                $revision->{modified} = sprintf('%04d/%02d/%02d %s',
2004                                            $3, $DATE_LIST->{$2}, $1, $4 );
2005            }
2006            $revision->{author} = cvs_author($revision->{author});
2007            print "M date: $revision->{modified};" .
2008                  "  author: $revision->{author};  state: " .
2009                  ( $revision->{filehash} eq "deleted" ? "dead" : "Exp" ) .
2010                  ";  lines: +2 -3\n";
2011            my $commitmessage;
2012            $commitmessage = $updater->commitmessage($revision->{commithash});
2013            $commitmessage =~ s/^/M /mg;
2014            print $commitmessage . "\n";
2015        }
2016        print "M =======" . ( "=" x 70 ) . "\n";
2017    }
2018
2019    print "ok\n";
2020}
2021
2022sub req_annotate
2023{
2024    my ( $cmd, $data ) = @_;
2025
2026    argsplit("annotate");
2027
2028    $log->info("req_annotate : " . ( defined($data) ? $data : "[NULL]" ));
2029    #$log->debug("status state : " . Dumper($state));
2030
2031    # Grab a handle to the SQLite db and do any necessary updates
2032    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
2033    $updater->update();
2034
2035    # if no files were specified, we need to work out what files we should be providing annotate on ...
2036    argsfromdir($updater);
2037
2038    # we'll need a temporary checkout dir
2039    setupWorkTree();
2040
2041    $log->info("Temp checkoutdir creation successful, basing annotate session work on '$work->{workDir}', index file is '$ENV{GIT_INDEX_FILE}'");
2042
2043    # foreach file specified on the command line ...
2044    foreach my $filename ( @{$state->{args}} )
2045    {
2046        $filename = filecleanup($filename);
2047
2048        my $meta = $updater->getmeta($filename);
2049
2050        next unless ( $meta->{revision} );
2051
2052        # get all the commits that this file was in
2053        # in dense format -- aka skip dead revisions
2054        my $revisions   = $updater->gethistorydense($filename);
2055        my $lastseenin  = $revisions->[0][2];
2056
2057        # populate the temporary index based on the latest commit were we saw
2058        # the file -- but do it cheaply without checking out any files
2059        # TODO: if we got a revision from the client, use that instead
2060        # to look up the commithash in sqlite (still good to default to
2061        # the current head as we do now)
2062        system("git", "read-tree", $lastseenin);
2063        unless ($? == 0)
2064        {
2065            print "E error running git-read-tree $lastseenin $ENV{GIT_INDEX_FILE} $!\n";
2066            return;
2067        }
2068        $log->info("Created index '$ENV{GIT_INDEX_FILE}' with commit $lastseenin - exit status $?");
2069
2070        # do a checkout of the file
2071        system('git', 'checkout-index', '-f', '-u', $filename);
2072        unless ($? == 0) {
2073            print "E error running git-checkout-index -f -u $filename : $!\n";
2074            return;
2075        }
2076
2077        $log->info("Annotate $filename");
2078
2079        # Prepare a file with the commits from the linearized
2080        # history that annotate should know about. This prevents
2081        # git-jsannotate telling us about commits we are hiding
2082        # from the client.
2083
2084        my $a_hints = "$work->{workDir}/.annotate_hints";
2085        if (!open(ANNOTATEHINTS, '>', $a_hints)) {
2086            print "E failed to open '$a_hints' for writing: $!\n";
2087            return;
2088        }
2089        for (my $i=0; $i < @$revisions; $i++)
2090        {
2091            print ANNOTATEHINTS $revisions->[$i][2];
2092            if ($i+1 < @$revisions) { # have we got a parent?
2093                print ANNOTATEHINTS ' ' . $revisions->[$i+1][2];
2094            }
2095            print ANNOTATEHINTS "\n";
2096        }
2097
2098        print ANNOTATEHINTS "\n";
2099        close ANNOTATEHINTS
2100            or (print "E failed to write $a_hints: $!\n"), return;
2101
2102        my @cmd = (qw(git annotate -l -S), $a_hints, $filename);
2103        if (!open(ANNOTATE, "-|", @cmd)) {
2104            print "E error invoking ". join(' ',@cmd) .": $!\n";
2105            return;
2106        }
2107        my $metadata = {};
2108        print "E Annotations for $filename\n";
2109        print "E ***************\n";
2110        while ( <ANNOTATE> )
2111        {
2112            if (m/^([a-zA-Z0-9]{40})\t\([^\)]*\)(.*)$/i)
2113            {
2114                my $commithash = $1;
2115                my $data = $2;
2116                unless ( defined ( $metadata->{$commithash} ) )
2117                {
2118                    $metadata->{$commithash} = $updater->getmeta($filename, $commithash);
2119                    $metadata->{$commithash}{author} = cvs_author($metadata->{$commithash}{author});
2120                    $metadata->{$commithash}{modified} = sprintf("%02d-%s-%02d", $1, $2, $3) if ( $metadata->{$commithash}{modified} =~ /^(\d+)\s(\w+)\s\d\d(\d\d)/ );
2121                }
2122                printf("M %-7s      (%-8s %10s): %s\n",
2123                    $metadata->{$commithash}{revision},
2124                    $metadata->{$commithash}{author},
2125                    $metadata->{$commithash}{modified},
2126                    $data
2127                );
2128            } else {
2129                $log->warn("Error in annotate output! LINE: $_");
2130                print "E Annotate error \n";
2131                next;
2132            }
2133        }
2134        close ANNOTATE;
2135    }
2136
2137    # done; get out of the tempdir
2138    cleanupWorkTree();
2139
2140    print "ok\n";
2141
2142}
2143
2144# This method takes the state->{arguments} array and produces two new arrays.
2145# The first is $state->{args} which is everything before the '--' argument, and
2146# the second is $state->{files} which is everything after it.
2147sub argsplit
2148{
2149    $state->{args} = [];
2150    $state->{files} = [];
2151    $state->{opt} = {};
2152
2153    return unless( defined($state->{arguments}) and ref $state->{arguments} eq "ARRAY" );
2154
2155    my $type = shift;
2156
2157    if ( defined($type) )
2158    {
2159        my $opt = {};
2160        $opt = { A => 0, N => 0, P => 0, R => 0, c => 0, f => 0, l => 0, n => 0, p => 0, s => 0, r => 1, D => 1, d => 1, k => 1, j => 1, } if ( $type eq "co" );
2161        $opt = { v => 0, l => 0, R => 0 } if ( $type eq "status" );
2162        $opt = { A => 0, P => 0, C => 0, d => 0, f => 0, l => 0, R => 0, p => 0, k => 1, r => 1, D => 1, j => 1, I => 1, W => 1 } if ( $type eq "update" );
2163        $opt = { l => 0, R => 0, k => 1, D => 1, D => 1, r => 2 } if ( $type eq "diff" );
2164        $opt = { c => 0, R => 0, l => 0, f => 0, F => 1, m => 1, r => 1 } if ( $type eq "ci" );
2165        $opt = { k => 1, m => 1 } if ( $type eq "add" );
2166        $opt = { f => 0, l => 0, R => 0 } if ( $type eq "remove" );
2167        $opt = { l => 0, b => 0, h => 0, R => 0, t => 0, N => 0, S => 0, r => 1, d => 1, s => 1, w => 1 } if ( $type eq "log" );
2168
2169
2170        while ( scalar ( @{$state->{arguments}} ) > 0 )
2171        {
2172            my $arg = shift @{$state->{arguments}};
2173
2174            next if ( $arg eq "--" );
2175            next unless ( $arg =~ /\S/ );
2176
2177            # if the argument looks like a switch
2178            if ( $arg =~ /^-(\w)(.*)/ )
2179            {
2180                # if it's a switch that takes an argument
2181                if ( $opt->{$1} )
2182                {
2183                    # If this switch has already been provided
2184                    if ( $opt->{$1} > 1 and exists ( $state->{opt}{$1} ) )
2185                    {
2186                        $state->{opt}{$1} = [ $state->{opt}{$1} ];
2187                        if ( length($2) > 0 )
2188                        {
2189                            push @{$state->{opt}{$1}},$2;
2190                        } else {
2191                            push @{$state->{opt}{$1}}, shift @{$state->{arguments}};
2192                        }
2193                    } else {
2194                        # if there's extra data in the arg, use that as the argument for the switch
2195                        if ( length($2) > 0 )
2196                        {
2197                            $state->{opt}{$1} = $2;
2198                        } else {
2199                            $state->{opt}{$1} = shift @{$state->{arguments}};
2200                        }
2201                    }
2202                } else {
2203                    $state->{opt}{$1} = undef;
2204                }
2205            }
2206            else
2207            {
2208                push @{$state->{args}}, $arg;
2209            }
2210        }
2211    }
2212    else
2213    {
2214        my $mode = 0;
2215
2216        foreach my $value ( @{$state->{arguments}} )
2217        {
2218            if ( $value eq "--" )
2219            {
2220                $mode++;
2221                next;
2222            }
2223            push @{$state->{args}}, $value if ( $mode == 0 );
2224            push @{$state->{files}}, $value if ( $mode == 1 );
2225        }
2226    }
2227}
2228
2229# This method uses $state->{directory} to populate $state->{args} with a list of filenames
2230sub argsfromdir
2231{
2232    my $updater = shift;
2233
2234    $state->{args} = [] if ( scalar(@{$state->{args}}) == 1 and $state->{args}[0] eq "." );
2235
2236    return if ( scalar ( @{$state->{args}} ) > 1 );
2237
2238    my @gethead = @{$updater->gethead};
2239
2240    # push added files
2241    foreach my $file (keys %{$state->{entries}}) {
2242        if ( exists $state->{entries}{$file}{revision} &&
2243                $state->{entries}{$file}{revision} eq '0' )
2244        {
2245            push @gethead, { name => $file, filehash => 'added' };
2246        }
2247    }
2248
2249    if ( scalar(@{$state->{args}}) == 1 )
2250    {
2251        my $arg = $state->{args}[0];
2252        $arg .= $state->{prependdir} if ( defined ( $state->{prependdir} ) );
2253
2254        $log->info("Only one arg specified, checking for directory expansion on '$arg'");
2255
2256        foreach my $file ( @gethead )
2257        {
2258            next if ( $file->{filehash} eq "deleted" and not defined ( $state->{entries}{$file->{name}} ) );
2259            next unless ( $file->{name} =~ /^$arg\// or $file->{name} eq $arg  );
2260            push @{$state->{args}}, $file->{name};
2261        }
2262
2263        shift @{$state->{args}} if ( scalar(@{$state->{args}}) > 1 );
2264    } else {
2265        $log->info("Only one arg specified, populating file list automatically");
2266
2267        $state->{args} = [];
2268
2269        foreach my $file ( @gethead )
2270        {
2271            next if ( $file->{filehash} eq "deleted" and not defined ( $state->{entries}{$file->{name}} ) );
2272            next unless ( $file->{name} =~ s/^$state->{prependdir}// );
2273            push @{$state->{args}}, $file->{name};
2274        }
2275    }
2276}
2277
2278
2279## look up directory sticky tag, of either fullPath or a parent:
2280sub getDirStickyInfo
2281{
2282    my($fullPath)=@_;
2283
2284    $fullPath=~s%/+$%%;
2285    while($fullPath ne "" && !defined($state->{dirMap}{"$fullPath/"}))
2286    {
2287        $fullPath=~s%/?[^/]*$%%;
2288    }
2289
2290    if( !defined($state->{dirMap}{"$fullPath/"}) &&
2291        ( $fullPath eq "" ||
2292          $fullPath eq "." ) )
2293    {
2294        return $state->{dirMap}{""}{stickyInfo};
2295    }
2296    else
2297    {
2298        return $state->{dirMap}{"$fullPath/"}{stickyInfo};
2299    }
2300}
2301
2302# Resolve precedence of various ways of specifying which version of
2303# a file you want.  Returns undef (for default head), or a ref to a hash
2304# that contains "tag" and/or "date" keys.
2305sub resolveStickyInfo
2306{
2307    my($filename,$stickyTag,$stickyDate,$reset) = @_;
2308
2309    # Order of precedence of sticky tags:
2310    #    -A       [head]
2311    #    -r /tag/
2312    #    [file entry sticky tag]
2313    #    [the tag specified in dir req_Sticky]
2314    #    [the tag specified in a parent dir req_Sticky]
2315    #    [head]
2316
2317    my $result;
2318    if($reset)
2319    {
2320        # $result=undef;
2321    }
2322    elsif( defined($stickyTag) && $stickyTag ne "" )
2323           # || ( defined($stickyDate) && $stickyDate ne "" )   # TODO
2324    {
2325        $result={ 'tag' => (defined($stickyTag)?$stickyTag:undef) };
2326
2327        # TODO: Convert -D value into the form 2011.04.10.04.46.57,
2328        #   similar to an entry line's sticky date, without the D prefix.
2329        #   It sometimes (always?) arrives as something more like
2330        #   '10 Apr 2011 04:46:57 -0000'...
2331        # $result={ 'date' => (defined($stickyDate)?$stickyDate:undef) };
2332    }
2333    elsif( defined($state->{entries}{$filename}) &&
2334           defined($state->{entries}{$filename}{tag_or_date}) &&
2335           $state->{entries}{$filename}{tag_or_date} ne "" )
2336    {
2337        my($tagOrDate)=$state->{entries}{$filename}{tag_or_date};
2338        if($tagOrDate=~/^T([^ ]+)\s*$/)
2339        {
2340            $result = { 'tag' => $1 };
2341        }
2342        elsif($tagOrDate=~/^D([0-9.]+)\s*$/)
2343        {
2344            $result= { 'date' => $1 };
2345        }
2346        else
2347        {
2348            die "Unknown tag_or_date format\n";
2349        }
2350    }
2351    else
2352    {
2353        $result=getDirStickyInfo($filename);
2354    }
2355
2356    return $result;
2357}
2358
2359# Convert a stickyInfo (ref to a hash) as returned by resolveStickyInfo into
2360# a form appropriate for the sticky tag field of an Entries
2361# line (field index 5, 0-based).
2362sub getStickyTagOrDate
2363{
2364    my($stickyInfo)=@_;
2365
2366    my $result;
2367    if(defined($stickyInfo) && defined($stickyInfo->{tag}))
2368    {
2369        $result="T$stickyInfo->{tag}";
2370    }
2371    # TODO: When/if we actually pick versions by {date} properly,
2372    #   also handle it here:
2373    #   "D$stickyInfo->{date}" (example: "D2011.04.13.20.37.07").
2374    else
2375    {
2376        $result="";
2377    }
2378
2379    return $result;
2380}
2381
2382# This method cleans up the $state variable after a command that uses arguments has run
2383sub statecleanup
2384{
2385    $state->{files} = [];
2386    $state->{args} = [];
2387    $state->{arguments} = [];
2388    $state->{entries} = {};
2389    $state->{dirMap} = {};
2390}
2391
2392# Return working directory CVS revision "1.X" out
2393# of the the working directory "entries" state, for the given filename.
2394# This is prefixed with a dash if the file is scheduled for removal
2395# when it is committed.
2396sub revparse
2397{
2398    my $filename = shift;
2399
2400    return $state->{entries}{$filename}{revision};
2401}
2402
2403# This method takes a file hash and does a CVS "file transfer".  Its
2404# exact behaviour depends on a second, optional hash table argument:
2405# - If $options->{targetfile}, dump the contents to that file;
2406# - If $options->{print}, use M/MT to transmit the contents one line
2407#   at a time;
2408# - Otherwise, transmit the size of the file, followed by the file
2409#   contents.
2410sub transmitfile
2411{
2412    my $filehash = shift;
2413    my $options = shift;
2414
2415    if ( defined ( $filehash ) and $filehash eq "deleted" )
2416    {
2417        $log->warn("filehash is 'deleted'");
2418        return;
2419    }
2420
2421    die "Need filehash" unless ( defined ( $filehash ) and $filehash =~ /^[a-zA-Z0-9]{40}$/ );
2422
2423    my $type = `git cat-file -t $filehash`;
2424    chomp $type;
2425
2426    die ( "Invalid type '$type' (expected 'blob')" ) unless ( defined ( $type ) and $type eq "blob" );
2427
2428    my $size = `git cat-file -s $filehash`;
2429    chomp $size;
2430
2431    $log->debug("transmitfile($filehash) size=$size, type=$type");
2432
2433    if ( open my $fh, '-|', "git", "cat-file", "blob", $filehash )
2434    {
2435        if ( defined ( $options->{targetfile} ) )
2436        {
2437            my $targetfile = $options->{targetfile};
2438            open NEWFILE, ">", $targetfile or die("Couldn't open '$targetfile' for writing : $!");
2439            print NEWFILE $_ while ( <$fh> );
2440            close NEWFILE or die("Failed to write '$targetfile': $!");
2441        } elsif ( defined ( $options->{print} ) && $options->{print} ) {
2442            while ( <$fh> ) {
2443                if( /\n\z/ ) {
2444                    print 'M ', $_;
2445                } else {
2446                    print 'MT text ', $_, "\n";
2447                }
2448            }
2449        } else {
2450            print "$size\n";
2451            print while ( <$fh> );
2452        }
2453        close $fh or die ("Couldn't close filehandle for transmitfile(): $!");
2454    } else {
2455        die("Couldn't execute git-cat-file");
2456    }
2457}
2458
2459# This method takes a file name, and returns ( $dirpart, $filepart ) which
2460# refers to the directory portion and the file portion of the filename
2461# respectively
2462sub filenamesplit
2463{
2464    my $filename = shift;
2465    my $fixforlocaldir = shift;
2466
2467    my ( $filepart, $dirpart ) = ( $filename, "." );
2468    ( $filepart, $dirpart ) = ( $2, $1 ) if ( $filename =~ /(.*)\/(.*)/ );
2469    $dirpart .= "/";
2470
2471    if ( $fixforlocaldir )
2472    {
2473        $dirpart =~ s/^$state->{prependdir}//;
2474    }
2475
2476    return ( $filepart, $dirpart );
2477}
2478
2479# Cleanup various junk in filename (try to canonicalize it), and
2480# add prependdir to accomodate running CVS client from a
2481# subdirectory (so the output is relative to top directory of the project).
2482sub filecleanup
2483{
2484    my $filename = shift;
2485
2486    return undef unless(defined($filename));
2487    if ( $filename =~ /^\// )
2488    {
2489        print "E absolute filenames '$filename' not supported by server\n";
2490        return undef;
2491    }
2492
2493    if($filename eq ".")
2494    {
2495        $filename="";
2496    }
2497    $filename =~ s/^\.\///g;
2498    $filename =~ s%/+%/%g;
2499    $filename = $state->{prependdir} . $filename;
2500    $filename =~ s%/$%%;
2501    return $filename;
2502}
2503
2504# Remove prependdir from the path, so that is is relative to the directory
2505# the CVS client was started from, rather than the top of the project.
2506# Essentially the inverse of filecleanup().
2507sub remove_prependdir
2508{
2509    my($path) = @_;
2510    if(defined($state->{prependdir}) && $state->{prependdir} ne "")
2511    {
2512        my($pre)=$state->{prependdir};
2513        $pre=~s%/$%%;
2514        if(!($path=~s%^\Q$pre\E/?%%))
2515        {
2516            $log->fatal("internal error missing prependdir");
2517            die("internal error missing prependdir");
2518        }
2519    }
2520    return $path;
2521}
2522
2523sub validateGitDir
2524{
2525    if( !defined($state->{CVSROOT}) )
2526    {
2527        print "error 1 CVSROOT not specified\n";
2528        cleanupWorkTree();
2529        exit;
2530    }
2531    if( $ENV{GIT_DIR} ne ($state->{CVSROOT} . '/') )
2532    {
2533        print "error 1 Internally inconsistent CVSROOT\n";
2534        cleanupWorkTree();
2535        exit;
2536    }
2537}
2538
2539# Setup working directory in a work tree with the requested version
2540# loaded in the index.
2541sub setupWorkTree
2542{
2543    my ($ver) = @_;
2544
2545    validateGitDir();
2546
2547    if( ( defined($work->{state}) && $work->{state} != 1 ) ||
2548        defined($work->{tmpDir}) )
2549    {
2550        $log->warn("Bad work tree state management");
2551        print "error 1 Internal setup multiple work trees without cleanup\n";
2552        cleanupWorkTree();
2553        exit;
2554    }
2555
2556    $work->{workDir} = tempdir ( DIR => $TEMP_DIR );
2557
2558    if( !defined($work->{index}) )
2559    {
2560        (undef, $work->{index}) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
2561    }
2562
2563    chdir $work->{workDir} or
2564        die "Unable to chdir to $work->{workDir}\n";
2565
2566    $log->info("Setting up GIT_WORK_TREE as '.' in '$work->{workDir}', index file is '$work->{index}'");
2567
2568    $ENV{GIT_WORK_TREE} = ".";
2569    $ENV{GIT_INDEX_FILE} = $work->{index};
2570    $work->{state} = 2;
2571
2572    if($ver)
2573    {
2574        system("git","read-tree",$ver);
2575        unless ($? == 0)
2576        {
2577            $log->warn("Error running git-read-tree");
2578            die "Error running git-read-tree $ver in $work->{workDir} $!\n";
2579        }
2580    }
2581    # else # req_annotate reads tree for each file
2582}
2583
2584# Ensure current directory is in some kind of working directory,
2585# with a recent version loaded in the index.
2586sub ensureWorkTree
2587{
2588    if( defined($work->{tmpDir}) )
2589    {
2590        $log->warn("Bad work tree state management [ensureWorkTree()]");
2591        print "error 1 Internal setup multiple dirs without cleanup\n";
2592        cleanupWorkTree();
2593        exit;
2594    }
2595    if( $work->{state} )
2596    {
2597        return;
2598    }
2599
2600    validateGitDir();
2601
2602    if( !defined($work->{emptyDir}) )
2603    {
2604        $work->{emptyDir} = tempdir ( DIR => $TEMP_DIR, OPEN => 0);
2605    }
2606    chdir $work->{emptyDir} or
2607        die "Unable to chdir to $work->{emptyDir}\n";
2608
2609    my $ver = `git show-ref -s refs/heads/$state->{module}`;
2610    chomp $ver;
2611    if ($ver !~ /^[0-9a-f]{40}$/)
2612    {
2613        $log->warn("Error from git show-ref -s refs/head$state->{module}");
2614        print "error 1 cannot find the current HEAD of module";
2615        cleanupWorkTree();
2616        exit;
2617    }
2618
2619    if( !defined($work->{index}) )
2620    {
2621        (undef, $work->{index}) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
2622    }
2623
2624    $ENV{GIT_WORK_TREE} = ".";
2625    $ENV{GIT_INDEX_FILE} = $work->{index};
2626    $work->{state} = 1;
2627
2628    system("git","read-tree",$ver);
2629    unless ($? == 0)
2630    {
2631        die "Error running git-read-tree $ver $!\n";
2632    }
2633}
2634
2635# Cleanup working directory that is not needed any longer.
2636sub cleanupWorkTree
2637{
2638    if( ! $work->{state} )
2639    {
2640        return;
2641    }
2642
2643    chdir "/" or die "Unable to chdir '/'\n";
2644
2645    if( defined($work->{workDir}) )
2646    {
2647        rmtree( $work->{workDir} );
2648        undef $work->{workDir};
2649    }
2650    undef $work->{state};
2651}
2652
2653# Setup a temporary directory (not a working tree), typically for
2654# merging dirty state as in req_update.
2655sub setupTmpDir
2656{
2657    $work->{tmpDir} = tempdir ( DIR => $TEMP_DIR );
2658    chdir $work->{tmpDir} or die "Unable to chdir $work->{tmpDir}\n";
2659
2660    return $work->{tmpDir};
2661}
2662
2663# Clean up a previously setupTmpDir.  Restore previous work tree if
2664# appropriate.
2665sub cleanupTmpDir
2666{
2667    if ( !defined($work->{tmpDir}) )
2668    {
2669        $log->warn("cleanup tmpdir that has not been setup");
2670        die "Cleanup tmpDir that has not been setup\n";
2671    }
2672    if( defined($work->{state}) )
2673    {
2674        if( $work->{state} == 1 )
2675        {
2676            chdir $work->{emptyDir} or
2677                die "Unable to chdir to $work->{emptyDir}\n";
2678        }
2679        elsif( $work->{state} == 2 )
2680        {
2681            chdir $work->{workDir} or
2682                die "Unable to chdir to $work->{emptyDir}\n";
2683        }
2684        else
2685        {
2686            $log->warn("Inconsistent work dir state");
2687            die "Inconsistent work dir state\n";
2688        }
2689    }
2690    else
2691    {
2692        chdir "/" or die "Unable to chdir '/'\n";
2693    }
2694}
2695
2696# Given a path, this function returns a string containing the kopts
2697# that should go into that path's Entries line.  For example, a binary
2698# file should get -kb.
2699sub kopts_from_path
2700{
2701    my ($path, $srcType, $name) = @_;
2702
2703    if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and
2704         $cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i )
2705    {
2706        my ($val) = check_attr( "text", $path );
2707        if ( $val eq "unspecified" )
2708        {
2709            $val = check_attr( "crlf", $path );
2710        }
2711        if ( $val eq "unset" )
2712        {
2713            return "-kb"
2714        }
2715        elsif ( check_attr( "eol", $path ) ne "unspecified" ||
2716                $val eq "set" || $val eq "input" )
2717        {
2718            return "";
2719        }
2720        else
2721        {
2722            $log->info("Unrecognized check_attr crlf $path : $val");
2723        }
2724    }
2725
2726    if ( defined ( $cfg->{gitcvs}{allbinary} ) )
2727    {
2728        if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) )
2729        {
2730            return "-kb";
2731        }
2732        elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) )
2733        {
2734            if( is_binary($srcType,$name) )
2735            {
2736                $log->debug("... as binary");
2737                return "-kb";
2738            }
2739            else
2740            {
2741                $log->debug("... as text");
2742            }
2743        }
2744    }
2745    # Return "" to give no special treatment to any path
2746    return "";
2747}
2748
2749sub check_attr
2750{
2751    my ($attr,$path) = @_;
2752    ensureWorkTree();
2753    if ( open my $fh, '-|', "git", "check-attr", $attr, "--", $path )
2754    {
2755        my $val = <$fh>;
2756        close $fh;
2757        $val =~ s/.*: ([^:\r\n]*)\s*$/$1/;
2758        return $val;
2759    }
2760    else
2761    {
2762        return undef;
2763    }
2764}
2765
2766# This should have the same heuristics as convert.c:is_binary() and related.
2767# Note that the bare CR test is done by callers in convert.c.
2768sub is_binary
2769{
2770    my ($srcType,$name) = @_;
2771    $log->debug("is_binary($srcType,$name)");
2772
2773    # Minimize amount of interpreted code run in the inner per-character
2774    # loop for large files, by totalling each character value and
2775    # then analyzing the totals.
2776    my @counts;
2777    my $i;
2778    for($i=0;$i<256;$i++)
2779    {
2780        $counts[$i]=0;
2781    }
2782
2783    my $fh = open_blob_or_die($srcType,$name);
2784    my $line;
2785    while( defined($line=<$fh>) )
2786    {
2787        # Any '\0' and bare CR are considered binary.
2788        if( $line =~ /\0|(\r[^\n])/ )
2789        {
2790            close($fh);
2791            return 1;
2792        }
2793
2794        # Count up each character in the line:
2795        my $len=length($line);
2796        for($i=0;$i<$len;$i++)
2797        {
2798            $counts[ord(substr($line,$i,1))]++;
2799        }
2800    }
2801    close $fh;
2802
2803    # Don't count CR and LF as either printable/nonprintable
2804    $counts[ord("\n")]=0;
2805    $counts[ord("\r")]=0;
2806
2807    # Categorize individual character count into printable and nonprintable:
2808    my $printable=0;
2809    my $nonprintable=0;
2810    for($i=0;$i<256;$i++)
2811    {
2812        if( $i < 32 &&
2813            $i != ord("\b") &&
2814            $i != ord("\t") &&
2815            $i != 033 &&       # ESC
2816            $i != 014 )        # FF
2817        {
2818            $nonprintable+=$counts[$i];
2819        }
2820        elsif( $i==127 )  # DEL
2821        {
2822            $nonprintable+=$counts[$i];
2823        }
2824        else
2825        {
2826            $printable+=$counts[$i];
2827        }
2828    }
2829
2830    return ($printable >> 7) < $nonprintable;
2831}
2832
2833# Returns open file handle.  Possible invocations:
2834#  - open_blob_or_die("file",$filename);
2835#  - open_blob_or_die("sha1",$filehash);
2836sub open_blob_or_die
2837{
2838    my ($srcType,$name) = @_;
2839    my ($fh);
2840    if( $srcType eq "file" )
2841    {
2842        if( !open $fh,"<",$name )
2843        {
2844            $log->warn("Unable to open file $name: $!");
2845            die "Unable to open file $name: $!\n";
2846        }
2847    }
2848    elsif( $srcType eq "sha1" )
2849    {
2850        unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ )
2851        {
2852            $log->warn("Need filehash");
2853            die "Need filehash\n";
2854        }
2855
2856        my $type = `git cat-file -t $name`;
2857        chomp $type;
2858
2859        unless ( defined ( $type ) and $type eq "blob" )
2860        {
2861            $log->warn("Invalid type '$type' for '$name'");
2862            die ( "Invalid type '$type' (expected 'blob')" )
2863        }
2864
2865        my $size = `git cat-file -s $name`;
2866        chomp $size;
2867
2868        $log->debug("open_blob_or_die($name) size=$size, type=$type");
2869
2870        unless( open $fh, '-|', "git", "cat-file", "blob", $name )
2871        {
2872            $log->warn("Unable to open sha1 $name");
2873            die "Unable to open sha1 $name\n";
2874        }
2875    }
2876    else
2877    {
2878        $log->warn("Unknown type of blob source: $srcType");
2879        die "Unknown type of blob source: $srcType\n";
2880    }
2881    return $fh;
2882}
2883
2884# Generate a CVS author name from Git author information, by taking the local
2885# part of the email address and replacing characters not in the Portable
2886# Filename Character Set (see IEEE Std 1003.1-2001, 3.276) by underscores. CVS
2887# Login names are Unix login names, which should be restricted to this
2888# character set.
2889sub cvs_author
2890{
2891    my $author_line = shift;
2892    (my $author) = $author_line =~ /<([^@>]*)/;
2893
2894    $author =~ s/[^-a-zA-Z0-9_.]/_/g;
2895    $author =~ s/^-/_/;
2896
2897    $author;
2898}
2899
2900
2901sub descramble
2902{
2903    # This table is from src/scramble.c in the CVS source
2904    my @SHIFTS = (
2905        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
2906        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2907        114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
2908        111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
2909        41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
2910        125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
2911        36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
2912        58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
2913        225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
2914        199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
2915        174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
2916        207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
2917        192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
2918        227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
2919        182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
2920        243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
2921    );
2922    my ($str) = @_;
2923
2924    # This should never happen, the same password format (A) has been
2925    # used by CVS since the beginning of time
2926    {
2927        my $fmt = substr($str, 0, 1);
2928        die "invalid password format `$fmt'" unless $fmt eq 'A';
2929    }
2930
2931    my @str = unpack "C*", substr($str, 1);
2932    my $ret = join '', map { chr $SHIFTS[$_] } @str;
2933    return $ret;
2934}
2935
2936
2937package GITCVS::log;
2938
2939####
2940#### Copyright The Open University UK - 2006.
2941####
2942#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
2943####          Martin Langhoff <martin@laptop.org>
2944####
2945####
2946
2947use strict;
2948use warnings;
2949
2950=head1 NAME
2951
2952GITCVS::log
2953
2954=head1 DESCRIPTION
2955
2956This module provides very crude logging with a similar interface to
2957Log::Log4perl
2958
2959=head1 METHODS
2960
2961=cut
2962
2963=head2 new
2964
2965Creates a new log object, optionally you can specify a filename here to
2966indicate the file to log to. If no log file is specified, you can specify one
2967later with method setfile, or indicate you no longer want logging with method
2968nofile.
2969
2970Until one of these methods is called, all log calls will buffer messages ready
2971to write out.
2972
2973=cut
2974sub new
2975{
2976    my $class = shift;
2977    my $filename = shift;
2978
2979    my $self = {};
2980
2981    bless $self, $class;
2982
2983    if ( defined ( $filename ) )
2984    {
2985        open $self->{fh}, ">>", $filename or die("Couldn't open '$filename' for writing : $!");
2986    }
2987
2988    return $self;
2989}
2990
2991=head2 setfile
2992
2993This methods takes a filename, and attempts to open that file as the log file.
2994If successful, all buffered data is written out to the file, and any further
2995logging is written directly to the file.
2996
2997=cut
2998sub setfile
2999{
3000    my $self = shift;
3001    my $filename = shift;
3002
3003    if ( defined ( $filename ) )
3004    {
3005        open $self->{fh}, ">>", $filename or die("Couldn't open '$filename' for writing : $!");
3006    }
3007
3008    return unless ( defined ( $self->{buffer} ) and ref $self->{buffer} eq "ARRAY" );
3009
3010    while ( my $line = shift @{$self->{buffer}} )
3011    {
3012        print {$self->{fh}} $line;
3013    }
3014}
3015
3016=head2 nofile
3017
3018This method indicates no logging is going to be used. It flushes any entries in
3019the internal buffer, and sets a flag to ensure no further data is put there.
3020
3021=cut
3022sub nofile
3023{
3024    my $self = shift;
3025
3026    $self->{nolog} = 1;
3027
3028    return unless ( defined ( $self->{buffer} ) and ref $self->{buffer} eq "ARRAY" );
3029
3030    $self->{buffer} = [];
3031}
3032
3033=head2 _logopen
3034
3035Internal method. Returns true if the log file is open, false otherwise.
3036
3037=cut
3038sub _logopen
3039{
3040    my $self = shift;
3041
3042    return 1 if ( defined ( $self->{fh} ) and ref $self->{fh} eq "GLOB" );
3043    return 0;
3044}
3045
3046=head2 debug info warn fatal
3047
3048These four methods are wrappers to _log. They provide the actual interface for
3049logging data.
3050
3051=cut
3052sub debug { my $self = shift; $self->_log("debug", @_); }
3053sub info  { my $self = shift; $self->_log("info" , @_); }
3054sub warn  { my $self = shift; $self->_log("warn" , @_); }
3055sub fatal { my $self = shift; $self->_log("fatal", @_); }
3056
3057=head2 _log
3058
3059This is an internal method called by the logging functions. It generates a
3060timestamp and pushes the logged line either to file, or internal buffer.
3061
3062=cut
3063sub _log
3064{
3065    my $self = shift;
3066    my $level = shift;
3067
3068    return if ( $self->{nolog} );
3069
3070    my @time = localtime;
3071    my $timestring = sprintf("%4d-%02d-%02d %02d:%02d:%02d : %-5s",
3072        $time[5] + 1900,
3073        $time[4] + 1,
3074        $time[3],
3075        $time[2],
3076        $time[1],
3077        $time[0],
3078        uc $level,
3079    );
3080
3081    if ( $self->_logopen )
3082    {
3083        print {$self->{fh}} $timestring . " - " . join(" ",@_) . "\n";
3084    } else {
3085        push @{$self->{buffer}}, $timestring . " - " . join(" ",@_) . "\n";
3086    }
3087}
3088
3089=head2 DESTROY
3090
3091This method simply closes the file handle if one is open
3092
3093=cut
3094sub DESTROY
3095{
3096    my $self = shift;
3097
3098    if ( $self->_logopen )
3099    {
3100        close $self->{fh};
3101    }
3102}
3103
3104package GITCVS::updater;
3105
3106####
3107#### Copyright The Open University UK - 2006.
3108####
3109#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
3110####          Martin Langhoff <martin@laptop.org>
3111####
3112####
3113
3114use strict;
3115use warnings;
3116use DBI;
3117
3118=head1 METHODS
3119
3120=cut
3121
3122=head2 new
3123
3124=cut
3125sub new
3126{
3127    my $class = shift;
3128    my $config = shift;
3129    my $module = shift;
3130    my $log = shift;
3131
3132    die "Need to specify a git repository" unless ( defined($config) and -d $config );
3133    die "Need to specify a module" unless ( defined($module) );
3134
3135    $class = ref($class) || $class;
3136
3137    my $self = {};
3138
3139    bless $self, $class;
3140
3141    $self->{valid_tables} = {'revision' => 1,
3142                             'revision_ix1' => 1,
3143                             'revision_ix2' => 1,
3144                             'head' => 1,
3145                             'head_ix1' => 1,
3146                             'properties' => 1,
3147                             'commitmsgs' => 1};
3148
3149    $self->{module} = $module;
3150    $self->{git_path} = $config . "/";
3151
3152    $self->{log} = $log;
3153
3154    die "Git repo '$self->{git_path}' doesn't exist" unless ( -d $self->{git_path} );
3155
3156    # Stores full sha1's for various branch/tag names, abbreviations, etc:
3157    $self->{commitRefCache} = {};
3158
3159    $self->{dbdriver} = $cfg->{gitcvs}{$state->{method}}{dbdriver} ||
3160        $cfg->{gitcvs}{dbdriver} || "SQLite";
3161    $self->{dbname} = $cfg->{gitcvs}{$state->{method}}{dbname} ||
3162        $cfg->{gitcvs}{dbname} || "%Ggitcvs.%m.sqlite";
3163    $self->{dbuser} = $cfg->{gitcvs}{$state->{method}}{dbuser} ||
3164        $cfg->{gitcvs}{dbuser} || "";
3165    $self->{dbpass} = $cfg->{gitcvs}{$state->{method}}{dbpass} ||
3166        $cfg->{gitcvs}{dbpass} || "";
3167    $self->{dbtablenameprefix} = $cfg->{gitcvs}{$state->{method}}{dbtablenameprefix} ||
3168        $cfg->{gitcvs}{dbtablenameprefix} || "";
3169    my %mapping = ( m => $module,
3170                    a => $state->{method},
3171                    u => getlogin || getpwuid($<) || $<,
3172                    G => $self->{git_path},
3173                    g => mangle_dirname($self->{git_path}),
3174                    );
3175    $self->{dbname} =~ s/%([mauGg])/$mapping{$1}/eg;
3176    $self->{dbuser} =~ s/%([mauGg])/$mapping{$1}/eg;
3177    $self->{dbtablenameprefix} =~ s/%([mauGg])/$mapping{$1}/eg;
3178    $self->{dbtablenameprefix} = mangle_tablename($self->{dbtablenameprefix});
3179
3180    die "Invalid char ':' in dbdriver" if $self->{dbdriver} =~ /:/;
3181    die "Invalid char ';' in dbname" if $self->{dbname} =~ /;/;
3182    $self->{dbh} = DBI->connect("dbi:$self->{dbdriver}:dbname=$self->{dbname}",
3183                                $self->{dbuser},
3184                                $self->{dbpass});
3185    die "Error connecting to database\n" unless defined $self->{dbh};
3186
3187    $self->{tables} = {};
3188    foreach my $table ( keys %{$self->{dbh}->table_info(undef,undef,undef,'TABLE')->fetchall_hashref('TABLE_NAME')} )
3189    {
3190        $self->{tables}{$table} = 1;
3191    }
3192
3193    # Construct the revision table if required
3194    # The revision table stores an entry for each file, each time that file
3195    # changes.
3196    #   numberOfRecords = O( numCommits * averageNumChangedFilesPerCommit )
3197    # This is not sufficient to support "-r {commithash}" for any
3198    # files except files that were modified by that commit (also,
3199    # some places in the code ignore/effectively strip out -r in
3200    # some cases, before it gets passed to getmeta()).
3201    # The "filehash" field typically has a git blob hash, but can also
3202    # be set to "dead" to indicate that the given version of the file
3203    # should not exist in the sandbox.
3204    unless ( $self->{tables}{$self->tablename("revision")} )
3205    {
3206        my $tablename = $self->tablename("revision");
3207        my $ix1name = $self->tablename("revision_ix1");
3208        my $ix2name = $self->tablename("revision_ix2");
3209        $self->{dbh}->do("
3210            CREATE TABLE $tablename (
3211                name       TEXT NOT NULL,
3212                revision   INTEGER NOT NULL,
3213                filehash   TEXT NOT NULL,
3214                commithash TEXT NOT NULL,
3215                author     TEXT NOT NULL,
3216                modified   TEXT NOT NULL,
3217                mode       TEXT NOT NULL
3218            )
3219        ");
3220        $self->{dbh}->do("
3221            CREATE INDEX $ix1name
3222            ON $tablename (name,revision)
3223        ");
3224        $self->{dbh}->do("
3225            CREATE INDEX $ix2name
3226            ON $tablename (name,commithash)
3227        ");
3228    }
3229
3230    # Construct the head table if required
3231    # The head table (along with the "last_commit" entry in the property
3232    # table) is the persisted working state of the "sub update" subroutine.
3233    # All of it's data is read entirely first, and completely recreated
3234    # last, every time "sub update" runs.
3235    # This is also used by "sub getmeta" when it is asked for the latest
3236    # version of a file (as opposed to some specific version).
3237    # Another way of thinking about it is as a single slice out of
3238    # "revisions", giving just the most recent revision information for
3239    # each file.
3240    unless ( $self->{tables}{$self->tablename("head")} )
3241    {
3242        my $tablename = $self->tablename("head");
3243        my $ix1name = $self->tablename("head_ix1");
3244        $self->{dbh}->do("
3245            CREATE TABLE $tablename (
3246                name       TEXT NOT NULL,
3247                revision   INTEGER NOT NULL,
3248                filehash   TEXT NOT NULL,
3249                commithash TEXT NOT NULL,
3250                author     TEXT NOT NULL,
3251                modified   TEXT NOT NULL,
3252                mode       TEXT NOT NULL
3253            )
3254        ");
3255        $self->{dbh}->do("
3256            CREATE INDEX $ix1name
3257            ON $tablename (name)
3258        ");
3259    }
3260
3261    # Construct the properties table if required
3262    #  - "last_commit" - Used by "sub update".
3263    unless ( $self->{tables}{$self->tablename("properties")} )
3264    {
3265        my $tablename = $self->tablename("properties");
3266        $self->{dbh}->do("
3267            CREATE TABLE $tablename (
3268                key        TEXT NOT NULL PRIMARY KEY,
3269                value      TEXT
3270            )
3271        ");
3272    }
3273
3274    # Construct the commitmsgs table if required
3275    # The commitmsgs table is only used for merge commits, since
3276    # "sub update" will only keep one branch of parents.  Shortlogs
3277    # for ignored commits (i.e. not on the chosen branch) will be used
3278    # to construct a replacement "collapsed" merge commit message,
3279    # which will be stored in this table.  See also "sub commitmessage".
3280    unless ( $self->{tables}{$self->tablename("commitmsgs")} )
3281    {
3282        my $tablename = $self->tablename("commitmsgs");
3283        $self->{dbh}->do("
3284            CREATE TABLE $tablename (
3285                key        TEXT NOT NULL PRIMARY KEY,
3286                value      TEXT
3287            )
3288        ");
3289    }
3290
3291    return $self;
3292}
3293
3294=head2 tablename
3295
3296=cut
3297sub tablename
3298{
3299    my $self = shift;
3300    my $name = shift;
3301
3302    if (exists $self->{valid_tables}{$name}) {
3303        return $self->{dbtablenameprefix} . $name;
3304    } else {
3305        return undef;
3306    }
3307}
3308
3309=head2 update
3310
3311Bring the database up to date with the latest changes from
3312the git repository.
3313
3314Internal working state is read out of the "head" table and the
3315"last_commit" property, then it updates "revisions" based on that, and
3316finally it writes the new internal state back to the "head" table
3317so it can be used as a starting point the next time update is called.
3318
3319=cut
3320sub update
3321{
3322    my $self = shift;
3323
3324    # first lets get the commit list
3325    $ENV{GIT_DIR} = $self->{git_path};
3326
3327    my $commitsha1 = `git rev-parse $self->{module}`;
3328    chomp $commitsha1;
3329
3330    my $commitinfo = `git cat-file commit $self->{module} 2>&1`;
3331    unless ( $commitinfo =~ /tree\s+[a-zA-Z0-9]{40}/ )
3332    {
3333        die("Invalid module '$self->{module}'");
3334    }
3335
3336
3337    my $git_log;
3338    my $lastcommit = $self->_get_prop("last_commit");
3339
3340    if (defined $lastcommit && $lastcommit eq $commitsha1) { # up-to-date
3341         return 1;
3342    }
3343
3344    # Start exclusive lock here...
3345    $self->{dbh}->begin_work() or die "Cannot lock database for BEGIN";
3346
3347    # TODO: log processing is memory bound
3348    # if we can parse into a 2nd file that is in reverse order
3349    # we can probably do something really efficient
3350    my @git_log_params = ('--pretty', '--parents', '--topo-order');
3351
3352    if (defined $lastcommit) {
3353        push @git_log_params, "$lastcommit..$self->{module}";
3354    } else {
3355        push @git_log_params, $self->{module};
3356    }
3357    # git-rev-list is the backend / plumbing version of git-log
3358    open(my $gitLogPipe, '-|', 'git', 'rev-list', @git_log_params)
3359                or die "Cannot call git-rev-list: $!";
3360    my @commits=readCommits($gitLogPipe);
3361    close $gitLogPipe;
3362
3363    # Now all the commits are in the @commits bucket
3364    # ordered by time DESC. for each commit that needs processing,
3365    # determine whether it's following the last head we've seen or if
3366    # it's on its own branch, grab a file list, and add whatever's changed
3367    # NOTE: $lastcommit refers to the last commit from previous run
3368    #       $lastpicked is the last commit we picked in this run
3369    my $lastpicked;
3370    my $head = {};
3371    if (defined $lastcommit) {
3372        $lastpicked = $lastcommit;
3373    }
3374
3375    my $committotal = scalar(@commits);
3376    my $commitcount = 0;
3377
3378    # Load the head table into $head (for cached lookups during the update process)
3379    foreach my $file ( @{$self->gethead(1)} )
3380    {
3381        $head->{$file->{name}} = $file;
3382    }
3383
3384    foreach my $commit ( @commits )
3385    {
3386        $self->{log}->debug("GITCVS::updater - Processing commit $commit->{hash} (" . (++$commitcount) . " of $committotal)");
3387        if (defined $lastpicked)
3388        {
3389            if (!in_array($lastpicked, @{$commit->{parents}}))
3390            {
3391                # skip, we'll see this delta
3392                # as part of a merge later
3393                # warn "skipping off-track  $commit->{hash}\n";
3394                next;
3395            } elsif (@{$commit->{parents}} > 1) {
3396                # it is a merge commit, for each parent that is
3397                # not $lastpicked (not given a CVS revision number),
3398                # see if we can get a log
3399                # from the merge-base to that parent to put it
3400                # in the message as a merge summary.
3401                my @parents = @{$commit->{parents}};
3402                foreach my $parent (@parents) {
3403                    if ($parent eq $lastpicked) {
3404                        next;
3405                    }
3406                    # git-merge-base can potentially (but rarely) throw
3407                    # several candidate merge bases. let's assume
3408                    # that the first one is the best one.
3409                    my $base = eval {
3410                            safe_pipe_capture('git', 'merge-base',
3411                                                 $lastpicked, $parent);
3412                    };
3413                    # The two branches may not be related at all,
3414                    # in which case merge base simply fails to find
3415                    # any, but that's Ok.
3416                    next if ($@);
3417
3418                    chomp $base;
3419                    if ($base) {
3420                        my @merged;
3421                        # print "want to log between  $base $parent \n";
3422                        open(GITLOG, '-|', 'git', 'log', '--pretty=medium', "$base..$parent")
3423                          or die "Cannot call git-log: $!";
3424                        my $mergedhash;
3425                        while (<GITLOG>) {
3426                            chomp;
3427                            if (!defined $mergedhash) {
3428                                if (m/^commit\s+(.+)$/) {
3429                                    $mergedhash = $1;
3430                                } else {
3431                                    next;
3432                                }
3433                            } else {
3434                                # grab the first line that looks non-rfc822
3435                                # aka has content after leading space
3436                                if (m/^\s+(\S.*)$/) {
3437                                    my $title = $1;
3438                                    $title = substr($title,0,100); # truncate
3439                                    unshift @merged, "$mergedhash $title";
3440                                    undef $mergedhash;
3441                                }
3442                            }
3443                        }
3444                        close GITLOG;
3445                        if (@merged) {
3446                            $commit->{mergemsg} = $commit->{message};
3447                            $commit->{mergemsg} .= "\nSummary of merged commits:\n\n";
3448                            foreach my $summary (@merged) {
3449                                $commit->{mergemsg} .= "\t$summary\n";
3450                            }
3451                            $commit->{mergemsg} .= "\n\n";
3452                            # print "Message for $commit->{hash} \n$commit->{mergemsg}";
3453                        }
3454                    }
3455                }
3456            }
3457        }
3458
3459        # convert the date to CVS-happy format
3460        my $cvsDate = convertToCvsDate($commit->{date});
3461
3462        if ( defined ( $lastpicked ) )
3463        {
3464            my $filepipe = open(FILELIST, '-|', 'git', 'diff-tree', '-z', '-r', $lastpicked, $commit->{hash}) or die("Cannot call git-diff-tree : $!");
3465            local ($/) = "\0";
3466            while ( <FILELIST> )
3467            {
3468                chomp;
3469                unless ( /^:\d{6}\s+([0-7]{6})\s+[a-f0-9]{40}\s+([a-f0-9]{40})\s+(\w)$/o )
3470                {
3471                    die("Couldn't process git-diff-tree line : $_");
3472                }
3473                my ($mode, $hash, $change) = ($1, $2, $3);
3474                my $name = <FILELIST>;
3475                chomp($name);
3476
3477                # $log->debug("File mode=$mode, hash=$hash, change=$change, name=$name");
3478
3479                my $dbMode = convertToDbMode($mode);
3480
3481                if ( $change eq "D" )
3482                {
3483                    #$log->debug("DELETE   $name");
3484                    $head->{$name} = {
3485                        name => $name,
3486                        revision => $head->{$name}{revision} + 1,
3487                        filehash => "deleted",
3488                        commithash => $commit->{hash},
3489                        modified => $cvsDate,
3490                        author => $commit->{author},
3491                        mode => $dbMode,
3492                    };
3493                    $self->insert_rev($name, $head->{$name}{revision}, $hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3494                }
3495                elsif ( $change eq "M" || $change eq "T" )
3496                {
3497                    #$log->debug("MODIFIED $name");
3498                    $head->{$name} = {
3499                        name => $name,
3500                        revision => $head->{$name}{revision} + 1,
3501                        filehash => $hash,
3502                        commithash => $commit->{hash},
3503                        modified => $cvsDate,
3504                        author => $commit->{author},
3505                        mode => $dbMode,
3506                    };
3507                    $self->insert_rev($name, $head->{$name}{revision}, $hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3508                }
3509                elsif ( $change eq "A" )
3510                {
3511                    #$log->debug("ADDED    $name");
3512                    $head->{$name} = {
3513                        name => $name,
3514                        revision => $head->{$name}{revision} ? $head->{$name}{revision}+1 : 1,
3515                        filehash => $hash,
3516                        commithash => $commit->{hash},
3517                        modified => $cvsDate,
3518                        author => $commit->{author},
3519                        mode => $dbMode,
3520                    };
3521                    $self->insert_rev($name, $head->{$name}{revision}, $hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3522                }
3523                else
3524                {
3525                    $log->warn("UNKNOWN FILE CHANGE mode=$mode, hash=$hash, change=$change, name=$name");
3526                    die;
3527                }
3528            }
3529            close FILELIST;
3530        } else {
3531            # this is used to detect files removed from the repo
3532            my $seen_files = {};
3533
3534            my $filepipe = open(FILELIST, '-|', 'git', 'ls-tree', '-z', '-r', $commit->{hash}) or die("Cannot call git-ls-tree : $!");
3535            local $/ = "\0";
3536            while ( <FILELIST> )
3537            {
3538                chomp;
3539                unless ( /^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3540                {
3541                    die("Couldn't process git-ls-tree line : $_");
3542                }
3543
3544                my ( $mode, $git_type, $git_hash, $git_filename ) = ( $1, $2, $3, $4 );
3545
3546                $seen_files->{$git_filename} = 1;
3547
3548                my ( $oldhash, $oldrevision, $oldmode ) = (
3549                    $head->{$git_filename}{filehash},
3550                    $head->{$git_filename}{revision},
3551                    $head->{$git_filename}{mode}
3552                );
3553
3554                my $dbMode = convertToDbMode($mode);
3555
3556                # unless the file exists with the same hash, we need to update it ...
3557                unless ( defined($oldhash) and $oldhash eq $git_hash and defined($oldmode) and $oldmode eq $dbMode )
3558                {
3559                    my $newrevision = ( $oldrevision or 0 ) + 1;
3560
3561                    $head->{$git_filename} = {
3562                        name => $git_filename,
3563                        revision => $newrevision,
3564                        filehash => $git_hash,
3565                        commithash => $commit->{hash},
3566                        modified => $cvsDate,
3567                        author => $commit->{author},
3568                        mode => $dbMode,
3569                    };
3570
3571
3572                    $self->insert_rev($git_filename, $newrevision, $git_hash, $commit->{hash}, $cvsDate, $commit->{author}, $dbMode);
3573                }
3574            }
3575            close FILELIST;
3576
3577            # Detect deleted files
3578            foreach my $file ( keys %$head )
3579            {
3580                unless ( exists $seen_files->{$file} or $head->{$file}{filehash} eq "deleted" )
3581                {
3582                    $head->{$file}{revision}++;
3583                    $head->{$file}{filehash} = "deleted";
3584                    $head->{$file}{commithash} = $commit->{hash};
3585                    $head->{$file}{modified} = $cvsDate;
3586                    $head->{$file}{author} = $commit->{author};
3587
3588                    $self->insert_rev($file, $head->{$file}{revision}, $head->{$file}{filehash}, $commit->{hash}, $cvsDate, $commit->{author}, $head->{$file}{mode});
3589                }
3590            }
3591            # END : "Detect deleted files"
3592        }
3593
3594
3595        if (exists $commit->{mergemsg})
3596        {
3597            $self->insert_mergelog($commit->{hash}, $commit->{mergemsg});
3598        }
3599
3600        $lastpicked = $commit->{hash};
3601
3602        $self->_set_prop("last_commit", $commit->{hash});
3603    }
3604
3605    $self->delete_head();
3606    foreach my $file ( keys %$head )
3607    {
3608        $self->insert_head(
3609            $file,
3610            $head->{$file}{revision},
3611            $head->{$file}{filehash},
3612            $head->{$file}{commithash},
3613            $head->{$file}{modified},
3614            $head->{$file}{author},
3615            $head->{$file}{mode},
3616        );
3617    }
3618    # invalidate the gethead cache
3619    $self->clearCommitRefCaches();
3620
3621
3622    # Ending exclusive lock here
3623    $self->{dbh}->commit() or die "Failed to commit changes to SQLite";
3624}
3625
3626sub readCommits
3627{
3628    my $pipeHandle = shift;
3629    my @commits;
3630
3631    my %commit = ();
3632
3633    while ( <$pipeHandle> )
3634    {
3635        chomp;
3636        if (m/^commit\s+(.*)$/) {
3637            # on ^commit lines put the just seen commit in the stack
3638            # and prime things for the next one
3639            if (keys %commit) {
3640                my %copy = %commit;
3641                unshift @commits, \%copy;
3642                %commit = ();
3643            }
3644            my @parents = split(m/\s+/, $1);
3645            $commit{hash} = shift @parents;
3646            $commit{parents} = \@parents;
3647        } elsif (m/^(\w+?):\s+(.*)$/ && !exists($commit{message})) {
3648            # on rfc822-like lines seen before we see any message,
3649            # lowercase the entry and put it in the hash as key-value
3650            $commit{lc($1)} = $2;
3651        } else {
3652            # message lines - skip initial empty line
3653            # and trim whitespace
3654            if (!exists($commit{message}) && m/^\s*$/) {
3655                # define it to mark the end of headers
3656                $commit{message} = '';
3657                next;
3658            }
3659            s/^\s+//; s/\s+$//; # trim ws
3660            $commit{message} .= $_ . "\n";
3661        }
3662    }
3663
3664    unshift @commits, \%commit if ( keys %commit );
3665
3666    return @commits;
3667}
3668
3669sub convertToCvsDate
3670{
3671    my $date = shift;
3672    # Convert from: "git rev-list --pretty" formatted date
3673    # Convert to: "the format specified by RFC822 as modified by RFC1123."
3674    # Example: 26 May 1997 13:01:40 -0400
3675    if( $date =~ /^\w+\s+(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+(\d+)\s+([+-]\d+)$/ )
3676    {
3677        $date = "$2 $1 $4 $3 $5";
3678    }
3679
3680    return $date;
3681}
3682
3683sub convertToDbMode
3684{
3685    my $mode = shift;
3686
3687    # NOTE: The CVS protocol uses a string similar "u=rw,g=rw,o=rw",
3688    #  but the database "mode" column historically (and currently)
3689    #  only stores the "rw" (for user) part of the string.
3690    #    FUTURE: It might make more sense to persist the raw
3691    #  octal mode (or perhaps the final full CVS form) instead of
3692    #  this half-converted form, but it isn't currently worth the
3693    #  backwards compatibility headaches.
3694
3695    $mode=~/^\d\d(\d)\d{3}$/;
3696    my $userBits=$1;
3697
3698    my $dbMode = "";
3699    $dbMode .= "r" if ( $userBits & 4 );
3700    $dbMode .= "w" if ( $userBits & 2 );
3701    $dbMode .= "x" if ( $userBits & 1 );
3702    $dbMode = "rw" if ( $dbMode eq "" );
3703
3704    return $dbMode;
3705}
3706
3707sub insert_rev
3708{
3709    my $self = shift;
3710    my $name = shift;
3711    my $revision = shift;
3712    my $filehash = shift;
3713    my $commithash = shift;
3714    my $modified = shift;
3715    my $author = shift;
3716    my $mode = shift;
3717    my $tablename = $self->tablename("revision");
3718
3719    my $insert_rev = $self->{dbh}->prepare_cached("INSERT INTO $tablename (name, revision, filehash, commithash, modified, author, mode) VALUES (?,?,?,?,?,?,?)",{},1);
3720    $insert_rev->execute($name, $revision, $filehash, $commithash, $modified, $author, $mode);
3721}
3722
3723sub insert_mergelog
3724{
3725    my $self = shift;
3726    my $key = shift;
3727    my $value = shift;
3728    my $tablename = $self->tablename("commitmsgs");
3729
3730    my $insert_mergelog = $self->{dbh}->prepare_cached("INSERT INTO $tablename (key, value) VALUES (?,?)",{},1);
3731    $insert_mergelog->execute($key, $value);
3732}
3733
3734sub delete_head
3735{
3736    my $self = shift;
3737    my $tablename = $self->tablename("head");
3738
3739    my $delete_head = $self->{dbh}->prepare_cached("DELETE FROM $tablename",{},1);
3740    $delete_head->execute();
3741}
3742
3743sub insert_head
3744{
3745    my $self = shift;
3746    my $name = shift;
3747    my $revision = shift;
3748    my $filehash = shift;
3749    my $commithash = shift;
3750    my $modified = shift;
3751    my $author = shift;
3752    my $mode = shift;
3753    my $tablename = $self->tablename("head");
3754
3755    my $insert_head = $self->{dbh}->prepare_cached("INSERT INTO $tablename (name, revision, filehash, commithash, modified, author, mode) VALUES (?,?,?,?,?,?,?)",{},1);
3756    $insert_head->execute($name, $revision, $filehash, $commithash, $modified, $author, $mode);
3757}
3758
3759sub _get_prop
3760{
3761    my $self = shift;
3762    my $key = shift;
3763    my $tablename = $self->tablename("properties");
3764
3765    my $db_query = $self->{dbh}->prepare_cached("SELECT value FROM $tablename WHERE key=?",{},1);
3766    $db_query->execute($key);
3767    my ( $value ) = $db_query->fetchrow_array;
3768
3769    return $value;
3770}
3771
3772sub _set_prop
3773{
3774    my $self = shift;
3775    my $key = shift;
3776    my $value = shift;
3777    my $tablename = $self->tablename("properties");
3778
3779    my $db_query = $self->{dbh}->prepare_cached("UPDATE $tablename SET value=? WHERE key=?",{},1);
3780    $db_query->execute($value, $key);
3781
3782    unless ( $db_query->rows )
3783    {
3784        $db_query = $self->{dbh}->prepare_cached("INSERT INTO $tablename (key, value) VALUES (?,?)",{},1);
3785        $db_query->execute($key, $value);
3786    }
3787
3788    return $value;
3789}
3790
3791=head2 gethead
3792
3793=cut
3794
3795sub gethead
3796{
3797    my $self = shift;
3798    my $intRev = shift;
3799    my $tablename = $self->tablename("head");
3800
3801    return $self->{gethead_cache} if ( defined ( $self->{gethead_cache} ) );
3802
3803    my $db_query = $self->{dbh}->prepare_cached("SELECT name, filehash, mode, revision, modified, commithash, author FROM $tablename ORDER BY name ASC",{},1);
3804    $db_query->execute();
3805
3806    my $tree = [];
3807    while ( my $file = $db_query->fetchrow_hashref )
3808    {
3809        if(!$intRev)
3810        {
3811            $file->{revision} = "1.$file->{revision}"
3812        }
3813        push @$tree, $file;
3814    }
3815
3816    $self->{gethead_cache} = $tree;
3817
3818    return $tree;
3819}
3820
3821=head2 getAnyHead
3822
3823Returns a reference to an array of getmeta structures, one
3824per file in the specified tree hash.
3825
3826=cut
3827
3828sub getAnyHead
3829{
3830    my ($self,$hash) = @_;
3831
3832    if(!defined($hash))
3833    {
3834        return $self->gethead();
3835    }
3836
3837    my @files;
3838    {
3839        open(my $filePipe, '-|', 'git', 'ls-tree', '-z', '-r', $hash)
3840                or die("Cannot call git-ls-tree : $!");
3841        local $/ = "\0";
3842        @files=<$filePipe>;
3843        close $filePipe;
3844    }
3845
3846    my $tree=[];
3847    my($line);
3848    foreach $line (@files)
3849    {
3850        $line=~s/\0$//;
3851        unless ( $line=~/^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3852        {
3853            die("Couldn't process git-ls-tree line : $_");
3854        }
3855
3856        my($mode, $git_type, $git_hash, $git_filename) = ($1, $2, $3, $4);
3857        push @$tree, $self->getMetaFromCommithash($git_filename,$hash);
3858    }
3859
3860    return $tree;
3861}
3862
3863=head2 getRevisionDirMap
3864
3865A "revision dir map" contains all the plain-file filenames associated
3866with a particular revision (treeish), organized by directory:
3867
3868  $type = $out->{$dir}{$fullName}
3869
3870The type of each is "F" (for ordinary file) or "D" (for directory,
3871for which the map $out->{$fullName} will also exist).
3872
3873=cut
3874
3875sub getRevisionDirMap
3876{
3877    my ($self,$ver)=@_;
3878
3879    if(!defined($self->{revisionDirMapCache}))
3880    {
3881        $self->{revisionDirMapCache}={};
3882    }
3883
3884        # Get file list (previously cached results are dependent on HEAD,
3885        # but are early in each case):
3886    my $cacheKey;
3887    my (@fileList);
3888    if( !defined($ver) || $ver eq "" )
3889    {
3890        $cacheKey="";
3891        if( defined($self->{revisionDirMapCache}{$cacheKey}) )
3892        {
3893            return $self->{revisionDirMapCache}{$cacheKey};
3894        }
3895
3896        my @head = @{$self->gethead()};
3897        foreach my $file ( @head )
3898        {
3899            next if ( $file->{filehash} eq "deleted" );
3900
3901            push @fileList,$file->{name};
3902        }
3903    }
3904    else
3905    {
3906        my ($hash)=$self->lookupCommitRef($ver);
3907        if( !defined($hash) )
3908        {
3909            return undef;
3910        }
3911
3912        $cacheKey=$hash;
3913        if( defined($self->{revisionDirMapCache}{$cacheKey}) )
3914        {
3915            return $self->{revisionDirMapCache}{$cacheKey};
3916        }
3917
3918        open(my $filePipe, '-|', 'git', 'ls-tree', '-z', '-r', $hash)
3919                or die("Cannot call git-ls-tree : $!");
3920        local $/ = "\0";
3921        while ( <$filePipe> )
3922        {
3923            chomp;
3924            unless ( /^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3925            {
3926                die("Couldn't process git-ls-tree line : $_");
3927            }
3928
3929            my($mode, $git_type, $git_hash, $git_filename) = ($1, $2, $3, $4);
3930
3931            push @fileList, $git_filename;
3932        }
3933        close $filePipe;
3934    }
3935
3936        # Convert to normalized form:
3937    my %revMap;
3938    my $file;
3939    foreach $file (@fileList)
3940    {
3941        my($dir) = ($file=~m%^(?:(.*)/)?([^/]*)$%);
3942        $dir='' if(!defined($dir));
3943
3944            # parent directories:
3945            # ... create empty dir maps for parent dirs:
3946        my($td)=$dir;
3947        while(!defined($revMap{$td}))
3948        {
3949            $revMap{$td}={};
3950
3951            my($tp)=($td=~m%^(?:(.*)/)?([^/]*)$%);
3952            $tp='' if(!defined($tp));
3953            $td=$tp;
3954        }
3955            # ... add children to parent maps (now that they exist):
3956        $td=$dir;
3957        while($td ne "")
3958        {
3959            my($tp)=($td=~m%^(?:(.*)/)?([^/]*)$%);
3960            $tp='' if(!defined($tp));
3961
3962            if(defined($revMap{$tp}{$td}))
3963            {
3964                if($revMap{$tp}{$td} ne 'D')
3965                {
3966                    die "Weird file/directory inconsistency in $cacheKey";
3967                }
3968                last;   # loop exit
3969            }
3970            $revMap{$tp}{$td}='D';
3971
3972            $td=$tp;
3973        }
3974
3975            # file
3976        $revMap{$dir}{$file}='F';
3977    }
3978
3979        # Save in cache:
3980    $self->{revisionDirMapCache}{$cacheKey}=\%revMap;
3981    return $self->{revisionDirMapCache}{$cacheKey};
3982}
3983
3984=head2 getlog
3985
3986See also gethistorydense().
3987
3988=cut
3989
3990sub getlog
3991{
3992    my $self = shift;
3993    my $filename = shift;
3994    my $revFilter = shift;
3995
3996    my $tablename = $self->tablename("revision");
3997
3998    # Filters:
3999    # TODO: date, state, or by specific logins filters?
4000    # TODO: Handle comma-separated list of revFilter items, each item
4001    #   can be a range [only case currently handled] or individual
4002    #   rev or branch or "branch.".
4003    # TODO: Adjust $db_query WHERE clause based on revFilter, instead of
4004    #   manually filtering the results of the query?
4005    my ( $minrev, $maxrev );
4006    if( defined($revFilter) and
4007        $state->{opt}{r} =~ /^(1.(\d+))?(::?)(1.(\d.+))?$/ )
4008    {
4009        my $control = $3;
4010        $minrev = $2;
4011        $maxrev = $5;
4012        $minrev++ if ( defined($minrev) and $control eq "::" );
4013    }
4014
4015    my $db_query = $self->{dbh}->prepare_cached("SELECT name, filehash, author, mode, revision, modified, commithash FROM $tablename WHERE name=? ORDER BY revision DESC",{},1);
4016    $db_query->execute($filename);
4017
4018    my $totalRevs=0;
4019    my $tree = [];
4020    while ( my $file = $db_query->fetchrow_hashref )
4021    {
4022        $totalRevs++;
4023        if( defined($minrev) and $file->{revision} < $minrev )
4024        {
4025            next;
4026        }
4027        if( defined($maxrev) and $file->{revision} > $maxrev )
4028        {
4029            next;
4030        }
4031
4032        $file->{revision} = "1." . $file->{revision};
4033        push @$tree, $file;
4034    }
4035
4036    return ($tree,$totalRevs);
4037}
4038
4039=head2 getmeta
4040
4041This function takes a filename (with path) argument and returns a hashref of
4042metadata for that file.
4043
4044There are several ways $revision can be specified:
4045
4046   - A reference to hash that contains a "tag" that is the
4047     actual revision (one of the below).  TODO: Also allow it to
4048     specify a "date" in the hash.
4049   - undef, to refer to the latest version on the main branch.
4050   - Full CVS client revision number (mapped to integer in DB, without the
4051     "1." prefix),
4052   - Complex CVS-compatible "special" revision number for
4053     non-linear history (see comment below)
4054   - git commit sha1 hash
4055   - branch or tag name
4056
4057=cut
4058
4059sub getmeta
4060{
4061    my $self = shift;
4062    my $filename = shift;
4063    my $revision = shift;
4064    my $tablename_rev = $self->tablename("revision");
4065    my $tablename_head = $self->tablename("head");
4066
4067    if ( ref($revision) eq "HASH" )
4068    {
4069        $revision = $revision->{tag};
4070    }
4071
4072    # Overview of CVS revision numbers:
4073    #
4074    # General CVS numbering scheme:
4075    #   - Basic mainline branch numbers: "1.1", "1.2", "1.3", etc.
4076    #   - Result of "cvs checkin -r" (possible, but not really
4077    #     recommended): "2.1", "2.2", etc
4078    #   - Branch tag: "1.2.0.n", where "1.2" is revision it was branched
4079    #     from, "0" is a magic placeholder that identifies it as a
4080    #     branch tag instead of a version tag, and n is 2 times the
4081    #     branch number off of "1.2", starting with "2".
4082    #   - Version on a branch: "1.2.n.x", where "1.2" is branch-from, "n"
4083    #     is branch number off of "1.2" (like n above), and "x" is
4084    #     the version number on the branch.
4085    #   - Branches can branch off of branches: "1.3.2.7.4.1" (even number
4086    #     of components).
4087    #   - Odd "n"s are used by "vendor branches" that result
4088    #     from "cvs import".  Vendor branches have additional
4089    #     strangeness in the sense that the main rcs "head" of the main
4090    #     branch will (temporarily until first normal commit) point
4091    #     to the version on the vendor branch, rather than the actual
4092    #     main branch.  (FUTURE: This may provide an opportunity
4093    #     to use "strange" revision numbers for fast-forward-merged
4094    #     branch tip when CVS client is asking for the main branch.)
4095    #
4096    # git-cvsserver CVS-compatible special numbering schemes:
4097    #   - Currently git-cvsserver only tries to be identical to CVS for
4098    #     simple "1.x" numbers on the "main" branch (as identified
4099    #     by the module name that was originally cvs checkout'ed).
4100    #   - The database only stores the "x" part, for historical reasons.
4101    #     But most of the rest of the cvsserver preserves
4102    #     and thinks using the full revision number.
4103    #   - To handle non-linear history, it uses a version of the form
4104    #     "2.1.1.2000.b.b.b."..., where the 2.1.1.2000 is to help uniquely
4105    #     identify this as a special revision number, and there are
4106    #     20 b's that together encode the sha1 git commit from which
4107    #     this version of this file originated.  Each b is
4108    #     the numerical value of the corresponding byte plus
4109    #     100.
4110    #      - "plus 100" avoids "0"s, and also reduces the
4111    #        likelyhood of a collision in the case that someone someday
4112    #        writes an import tool that tries to preserve original
4113    #        CVS revision numbers, and the original CVS data had done
4114    #        lots of branches off of branches and other strangeness to
4115    #        end up with a real version number that just happens to look
4116    #        like this special revision number form.  Also, if needed
4117    #        there are several ways to extend/identify alternative encodings
4118    #        within the "2.1.1.2000" part if necessary.
4119    #      - Unlike real CVS revisions, you can't really reconstruct what
4120    #        relation a revision of this form has to other revisions.
4121    #   - FUTURE: TODO: Rework database somehow to make up and remember
4122    #     fully-CVS-compatible branches and branch version numbers.
4123
4124    my $meta;
4125    if ( defined($revision) )
4126    {
4127        if ( $revision =~ /^1\.(\d+)$/ )
4128        {
4129            my ($intRev) = $1;
4130            my $db_query;
4131            $db_query = $self->{dbh}->prepare_cached(
4132                "SELECT * FROM $tablename_rev WHERE name=? AND revision=?",
4133                {},1);
4134            $db_query->execute($filename, $intRev);
4135            $meta = $db_query->fetchrow_hashref;
4136        }
4137        elsif ( $revision =~ /^2\.1\.1\.2000(\.[1-3][0-9][0-9]){20}$/ )
4138        {
4139            my ($commitHash)=($revision=~/^2\.1\.1\.2000(.*)$/);
4140            $commitHash=~s/\.([0-9]+)/sprintf("%02x",$1-100)/eg;
4141            if($commitHash=~/^[0-9a-f]{40}$/)
4142            {
4143                return $self->getMetaFromCommithash($filename,$commitHash);
4144            }
4145
4146            # error recovery: fall back on head version below
4147            print "E Failed to find $filename version=$revision or commit=$commitHash\n";
4148            $log->warning("failed get $revision with commithash=$commitHash");
4149            undef $revision;
4150        }
4151        elsif ( $revision =~ /^[0-9a-f]{40}$/ )
4152        {
4153            # Try DB first.  This is mostly only useful for req_annotate(),
4154            # which only calls this for stuff that should already be in
4155            # the DB.  It is fairly likely to be a waste of time
4156            # in most other cases [unless the file happened to be
4157            # modified in $revision specifically], but
4158            # it is probably in the noise compared to how long
4159            # getMetaFromCommithash() will take.
4160            my $db_query;
4161            $db_query = $self->{dbh}->prepare_cached(
4162                "SELECT * FROM $tablename_rev WHERE name=? AND commithash=?",
4163                {},1);
4164            $db_query->execute($filename, $revision);
4165            $meta = $db_query->fetchrow_hashref;
4166
4167            if(! $meta)
4168            {
4169                my($revCommit)=$self->lookupCommitRef($revision);
4170                if($revCommit=~/^[0-9a-f]{40}$/)
4171                {
4172                    return $self->getMetaFromCommithash($filename,$revCommit);
4173                }
4174
4175                # error recovery: nothing found:
4176                print "E Failed to find $filename version=$revision\n";
4177                $log->warning("failed get $revision");
4178                return $meta;
4179            }
4180        }
4181        else
4182        {
4183            my($revCommit)=$self->lookupCommitRef($revision);
4184            if($revCommit=~/^[0-9a-f]{40}$/)
4185            {
4186                return $self->getMetaFromCommithash($filename,$revCommit);
4187            }
4188
4189            # error recovery: fall back on head version below
4190            print "E Failed to find $filename version=$revision\n";
4191            $log->warning("failed get $revision");
4192            undef $revision;  # Allow fallback
4193        }
4194    }
4195
4196    if(!defined($revision))
4197    {
4198        my $db_query;
4199        $db_query = $self->{dbh}->prepare_cached(
4200                "SELECT * FROM $tablename_head WHERE name=?",{},1);
4201        $db_query->execute($filename);
4202        $meta = $db_query->fetchrow_hashref;
4203    }
4204
4205    if($meta)
4206    {
4207        $meta->{revision} = "1.$meta->{revision}";
4208    }
4209    return $meta;
4210}
4211
4212sub getMetaFromCommithash
4213{
4214    my $self = shift;
4215    my $filename = shift;
4216    my $revCommit = shift;
4217
4218    # NOTE: This function doesn't scale well (lots of forks), especially
4219    #   if you have many files that have not been modified for many commits
4220    #   (each git-rev-parse redoes a lot of work for each file
4221    #   that theoretically could be done in parallel by smarter
4222    #   graph traversal).
4223    #
4224    # TODO: Possible optimization strategies:
4225    #   - Solve the issue of assigning and remembering "real" CVS
4226    #     revision numbers for branches, and ensure the
4227    #     data structure can do this efficiently.  Perhaps something
4228    #     similar to "git notes", and carefully structured to take
4229    #     advantage same-sha1-is-same-contents, to roll the same
4230    #     unmodified subdirectory data onto multiple commits?
4231    #   - Write and use a C tool that is like git-blame, but
4232    #     operates on multiple files with file granularity, instead
4233    #     of one file with line granularity.  Cache
4234    #     most-recently-modified in $self->{commitRefCache}{$revCommit}.
4235    #     Try to be intelligent about how many files we do with
4236    #     one fork (perhaps one directory at a time, without recursion,
4237    #     and/or include directory as one line item, recurse from here
4238    #     instead of in C tool?).
4239    #   - Perhaps we could ask the DB for (filename,fileHash),
4240    #     and just guess that it is correct (that the file hadn't
4241    #     changed between $revCommit and the found commit, then
4242    #     changed back, confusing anything trying to interpret
4243    #     history).  Probably need to add another index to revisions
4244    #     DB table for this.
4245    #   - NOTE: Trying to store all (commit,file) keys in DB [to
4246    #     find "lastModfiedCommit] (instead of
4247    #     just files that changed in each commit as we do now) is
4248    #     probably not practical from a disk space perspective.
4249
4250        # Does the file exist in $revCommit?
4251    # TODO: Include file hash in dirmap cache.
4252    my($dirMap)=$self->getRevisionDirMap($revCommit);
4253    my($dir,$file)=($filename=~m%^(?:(.*)/)?([^/]*$)%);
4254    if(!defined($dir))
4255    {
4256        $dir="";
4257    }
4258    if( !defined($dirMap->{$dir}) ||
4259        !defined($dirMap->{$dir}{$filename}) )
4260    {
4261        my($fileHash)="deleted";
4262
4263        my($retVal)={};
4264        $retVal->{name}=$filename;
4265        $retVal->{filehash}=$fileHash;
4266
4267            # not needed and difficult to compute:
4268        $retVal->{revision}="0";  # $revision;
4269        $retVal->{commithash}=$revCommit;
4270        #$retVal->{author}=$commit->{author};
4271        #$retVal->{modified}=convertToCvsDate($commit->{date});
4272        #$retVal->{mode}=convertToDbMode($mode);
4273
4274        return $retVal;
4275    }
4276
4277    my($fileHash)=safe_pipe_capture("git","rev-parse","$revCommit:$filename");
4278    chomp $fileHash;
4279    if(!($fileHash=~/^[0-9a-f]{40}$/))
4280    {
4281        die "Invalid fileHash '$fileHash' looking up"
4282                    ." '$revCommit:$filename'\n";
4283    }
4284
4285    # information about most recent commit to modify $filename:
4286    open(my $gitLogPipe, '-|', 'git', 'rev-list',
4287         '--max-count=1', '--pretty', '--parents',
4288         $revCommit, '--', $filename)
4289                or die "Cannot call git-rev-list: $!";
4290    my @commits=readCommits($gitLogPipe);
4291    close $gitLogPipe;
4292    if(scalar(@commits)!=1)
4293    {
4294        die "Can't find most recent commit changing $filename\n";
4295    }
4296    my($commit)=$commits[0];
4297    if( !defined($commit) || !defined($commit->{hash}) )
4298    {
4299        return undef;
4300    }
4301
4302    # does this (commit,file) have a real assigned CVS revision number?
4303    my $tablename_rev = $self->tablename("revision");
4304    my $db_query;
4305    $db_query = $self->{dbh}->prepare_cached(
4306        "SELECT * FROM $tablename_rev WHERE name=? AND commithash=?",
4307        {},1);
4308    $db_query->execute($filename, $commit->{hash});
4309    my($meta)=$db_query->fetchrow_hashref;
4310    if($meta)
4311    {
4312        $meta->{revision} = "1.$meta->{revision}";
4313        return $meta;
4314    }
4315
4316    # fall back on special revision number
4317    my($revision)=$commit->{hash};
4318    $revision=~s/(..)/'.' . (hex($1)+100)/eg;
4319    $revision="2.1.1.2000$revision";
4320
4321    # meta data about $filename:
4322    open(my $filePipe, '-|', 'git', 'ls-tree', '-z',
4323                $commit->{hash}, '--', $filename)
4324            or die("Cannot call git-ls-tree : $!");
4325    local $/ = "\0";
4326    my $line;
4327    $line=<$filePipe>;
4328    if(defined(<$filePipe>))
4329    {
4330        die "Expected only a single file for git-ls-tree $filename\n";
4331    }
4332    close $filePipe;
4333
4334    chomp $line;
4335    unless ( $line=~m/^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
4336    {
4337        die("Couldn't process git-ls-tree line : $line\n");
4338    }
4339    my ( $mode, $git_type, $git_hash, $git_filename ) = ( $1, $2, $3, $4 );
4340
4341    # save result:
4342    my($retVal)={};
4343    $retVal->{name}=$filename;
4344    $retVal->{revision}=$revision;
4345    $retVal->{filehash}=$fileHash;
4346    $retVal->{commithash}=$revCommit;
4347    $retVal->{author}=$commit->{author};
4348    $retVal->{modified}=convertToCvsDate($commit->{date});
4349    $retVal->{mode}=convertToDbMode($mode);
4350
4351    return $retVal;
4352}
4353
4354=head2 lookupCommitRef
4355
4356Convert tag/branch/abbreviation/etc into a commit sha1 hash.  Caches
4357the result so looking it up again is fast.
4358
4359=cut
4360
4361sub lookupCommitRef
4362{
4363    my $self = shift;
4364    my $ref = shift;
4365
4366    my $commitHash = $self->{commitRefCache}{$ref};
4367    if(defined($commitHash))
4368    {
4369        return $commitHash;
4370    }
4371
4372    $commitHash=safe_pipe_capture("git","rev-parse","--verify","--quiet",
4373                                  $self->unescapeRefName($ref));
4374    $commitHash=~s/\s*$//;
4375    if(!($commitHash=~/^[0-9a-f]{40}$/))
4376    {
4377        $commitHash=undef;
4378    }
4379
4380    if( defined($commitHash) )
4381    {
4382        my $type=safe_pipe_capture("git","cat-file","-t",$commitHash);
4383        if( ! ($type=~/^commit\s*$/ ) )
4384        {
4385            $commitHash=undef;
4386        }
4387    }
4388    if(defined($commitHash))
4389    {
4390        $self->{commitRefCache}{$ref}=$commitHash;
4391    }
4392    return $commitHash;
4393}
4394
4395=head2 clearCommitRefCaches
4396
4397Clears cached commit cache (sha1's for various tags/abbeviations/etc),
4398and related caches.
4399
4400=cut
4401
4402sub clearCommitRefCaches
4403{
4404    my $self = shift;
4405    $self->{commitRefCache} = {};
4406    $self->{revisionDirMapCache} = undef;
4407    $self->{gethead_cache} = undef;
4408}
4409
4410=head2 commitmessage
4411
4412this function takes a commithash and returns the commit message for that commit
4413
4414=cut
4415sub commitmessage
4416{
4417    my $self = shift;
4418    my $commithash = shift;
4419    my $tablename = $self->tablename("commitmsgs");
4420
4421    die("Need commithash") unless ( defined($commithash) and $commithash =~ /^[a-zA-Z0-9]{40}$/ );
4422
4423    my $db_query;
4424    $db_query = $self->{dbh}->prepare_cached("SELECT value FROM $tablename WHERE key=?",{},1);
4425    $db_query->execute($commithash);
4426
4427    my ( $message ) = $db_query->fetchrow_array;
4428
4429    if ( defined ( $message ) )
4430    {
4431        $message .= " " if ( $message =~ /\n$/ );
4432        return $message;
4433    }
4434
4435    my @lines = safe_pipe_capture("git", "cat-file", "commit", $commithash);
4436    shift @lines while ( $lines[0] =~ /\S/ );
4437    $message = join("",@lines);
4438    $message .= " " if ( $message =~ /\n$/ );
4439    return $message;
4440}
4441
4442=head2 gethistorydense
4443
4444This function takes a filename (with path) argument and returns an arrayofarrays
4445containing revision,filehash,commithash ordered by revision descending.
4446
4447This version of gethistory skips deleted entries -- so it is useful for annotate.
4448The 'dense' part is a reference to a '--dense' option available for git-rev-list
4449and other git tools that depend on it.
4450
4451See also getlog().
4452
4453=cut
4454sub gethistorydense
4455{
4456    my $self = shift;
4457    my $filename = shift;
4458    my $tablename = $self->tablename("revision");
4459
4460    my $db_query;
4461    $db_query = $self->{dbh}->prepare_cached("SELECT revision, filehash, commithash FROM $tablename WHERE name=? AND filehash!='deleted' ORDER BY revision DESC",{},1);
4462    $db_query->execute($filename);
4463
4464    my $result = $db_query->fetchall_arrayref;
4465
4466    my $i;
4467    for($i=0 ; $i<scalar(@$result) ; $i++)
4468    {
4469        $result->[$i][0]="1." . $result->[$i][0];
4470    }
4471
4472    return $result;
4473}
4474
4475=head2 escapeRefName
4476
4477Apply an escape mechanism to compensate for characters that
4478git ref names can have that CVS tags can not.
4479
4480=cut
4481sub escapeRefName
4482{
4483    my($self,$refName)=@_;
4484
4485    # CVS officially only allows [-_A-Za-z0-9] in tag names (or in
4486    # many contexts it can also be a CVS revision number).
4487    #
4488    # Git tags commonly use '/' and '.' as well, but also handle
4489    # anything else just in case:
4490    #
4491    #   = "_-s-"  For '/'.
4492    #   = "_-p-"  For '.'.
4493    #   = "_-u-"  For underscore, in case someone wants a literal "_-" in
4494    #     a tag name.
4495    #   = "_-xx-" Where "xx" is the hexadecimal representation of the
4496    #     desired ASCII character byte. (for anything else)
4497
4498    if(! $refName=~/^[1-9][0-9]*(\.[1-9][0-9]*)*$/)
4499    {
4500        $refName=~s/_-/_-u--/g;
4501        $refName=~s/\./_-p-/g;
4502        $refName=~s%/%_-s-%g;
4503        $refName=~s/[^-_a-zA-Z0-9]/sprintf("_-%02x-",$1)/eg;
4504    }
4505}
4506
4507=head2 unescapeRefName
4508
4509Undo an escape mechanism to compensate for characters that
4510git ref names can have that CVS tags can not.
4511
4512=cut
4513sub unescapeRefName
4514{
4515    my($self,$refName)=@_;
4516
4517    # see escapeRefName() for description of escape mechanism.
4518
4519    $refName=~s/_-([spu]|[0-9a-f][0-9a-f])-/unescapeRefNameChar($1)/eg;
4520
4521    # allowed tag names
4522    # TODO: Perhaps use git check-ref-format, with an in-process cache of
4523    #  validated names?
4524    if( !( $refName=~m%^[^-][-a-zA-Z0-9_/.]*$% ) ||
4525        ( $refName=~m%[/.]$% ) ||
4526        ( $refName=~/\.lock$/ ) ||
4527        ( $refName=~m%\.\.|/\.|[[\\:?*~]|\@\{% ) )  # matching }
4528    {
4529        # Error:
4530        $log->warn("illegal refName: $refName");
4531        $refName=undef;
4532    }
4533    return $refName;
4534}
4535
4536sub unescapeRefNameChar
4537{
4538    my($char)=@_;
4539
4540    if($char eq "s")
4541    {
4542        $char="/";
4543    }
4544    elsif($char eq "p")
4545    {
4546        $char=".";
4547    }
4548    elsif($char eq "u")
4549    {
4550        $char="_";
4551    }
4552    elsif($char=~/^[0-9a-f][0-9a-f]$/)
4553    {
4554        $char=chr(hex($char));
4555    }
4556    else
4557    {
4558        # Error case: Maybe it has come straight from user, and
4559        # wasn't supposed to be escaped?  Restore it the way we got it:
4560        $char="_-$char-";
4561    }
4562
4563    return $char;
4564}
4565
4566=head2 in_array()
4567
4568from Array::PAT - mimics the in_array() function
4569found in PHP. Yuck but works for small arrays.
4570
4571=cut
4572sub in_array
4573{
4574    my ($check, @array) = @_;
4575    my $retval = 0;
4576    foreach my $test (@array){
4577        if($check eq $test){
4578            $retval =  1;
4579        }
4580    }
4581    return $retval;
4582}
4583
4584=head2 safe_pipe_capture
4585
4586an alternative to `command` that allows input to be passed as an array
4587to work around shell problems with weird characters in arguments
4588
4589=cut
4590sub safe_pipe_capture {
4591
4592    my @output;
4593
4594    if (my $pid = open my $child, '-|') {
4595        @output = (<$child>);
4596        close $child or die join(' ',@_).": $! $?";
4597    } else {
4598        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
4599    }
4600    return wantarray ? @output : join('',@output);
4601}
4602
4603=head2 mangle_dirname
4604
4605create a string from a directory name that is suitable to use as
4606part of a filename, mainly by converting all chars except \w.- to _
4607
4608=cut
4609sub mangle_dirname {
4610    my $dirname = shift;
4611    return unless defined $dirname;
4612
4613    $dirname =~ s/[^\w.-]/_/g;
4614
4615    return $dirname;
4616}
4617
4618=head2 mangle_tablename
4619
4620create a string from a that is suitable to use as part of an SQL table
4621name, mainly by converting all chars except \w to _
4622
4623=cut
4624sub mangle_tablename {
4625    my $tablename = shift;
4626    return unless defined $tablename;
4627
4628    $tablename =~ s/[^\w_]/_/g;
4629
4630    return $tablename;
4631}
4632
46331;