perl / Git.pmon commit refactor userdiff textconv code (04427ac)
   1=head1 NAME
   2
   3Git - Perl interface to the Git version control system
   4
   5=cut
   6
   7
   8package Git;
   9
  10use strict;
  11
  12
  13BEGIN {
  14
  15our ($VERSION, @ISA, @EXPORT, @EXPORT_OK);
  16
  17# Totally unstable API.
  18$VERSION = '0.01';
  19
  20
  21=head1 SYNOPSIS
  22
  23  use Git;
  24
  25  my $version = Git::command_oneline('version');
  26
  27  git_cmd_try { Git::command_noisy('update-server-info') }
  28              '%s failed w/ code %d';
  29
  30  my $repo = Git->repository (Directory => '/srv/git/cogito.git');
  31
  32
  33  my @revs = $repo->command('rev-list', '--since=last monday', '--all');
  34
  35  my ($fh, $c) = $repo->command_output_pipe('rev-list', '--since=last monday', '--all');
  36  my $lastrev = <$fh>; chomp $lastrev;
  37  $repo->command_close_pipe($fh, $c);
  38
  39  my $lastrev = $repo->command_oneline( [ 'rev-list', '--all' ],
  40                                        STDERR => 0 );
  41
  42  my $sha1 = $repo->hash_and_insert_object('file.txt');
  43  my $tempfile = tempfile();
  44  my $size = $repo->cat_blob($sha1, $tempfile);
  45
  46=cut
  47
  48
  49require Exporter;
  50
  51@ISA = qw(Exporter);
  52
  53@EXPORT = qw(git_cmd_try);
  54
  55# Methods which can be called as standalone functions as well:
  56@EXPORT_OK = qw(command command_oneline command_noisy
  57                command_output_pipe command_input_pipe command_close_pipe
  58                command_bidi_pipe command_close_bidi_pipe
  59                version exec_path hash_object git_cmd_try
  60                remote_refs
  61                temp_acquire temp_release temp_reset temp_path);
  62
  63
  64=head1 DESCRIPTION
  65
  66This module provides Perl scripts easy way to interface the Git version control
  67system. The modules have an easy and well-tested way to call arbitrary Git
  68commands; in the future, the interface will also provide specialized methods
  69for doing easily operations which are not totally trivial to do over
  70the generic command interface.
  71
  72While some commands can be executed outside of any context (e.g. 'version'
  73or 'init'), most operations require a repository context, which in practice
  74means getting an instance of the Git object using the repository() constructor.
  75(In the future, we will also get a new_repository() constructor.) All commands
  76called as methods of the object are then executed in the context of the
  77repository.
  78
  79Part of the "repository state" is also information about path to the attached
  80working copy (unless you work with a bare repository). You can also navigate
  81inside of the working copy using the C<wc_chdir()> method. (Note that
  82the repository object is self-contained and will not change working directory
  83of your process.)
  84
  85TODO: In the future, we might also do
  86
  87        my $remoterepo = $repo->remote_repository (Name => 'cogito', Branch => 'master');
  88        $remoterepo ||= Git->remote_repository ('http://git.or.cz/cogito.git/');
  89        my @refs = $remoterepo->refs();
  90
  91Currently, the module merely wraps calls to external Git tools. In the future,
  92it will provide a much faster way to interact with Git by linking directly
  93to libgit. This should be completely opaque to the user, though (performance
  94increase notwithstanding).
  95
  96=cut
  97
  98
  99use Carp qw(carp croak); # but croak is bad - throw instead
 100use Error qw(:try);
 101use Cwd qw(abs_path);
 102use IPC::Open2 qw(open2);
 103use Fcntl qw(SEEK_SET SEEK_CUR);
 104}
 105
 106
 107=head1 CONSTRUCTORS
 108
 109=over 4
 110
 111=item repository ( OPTIONS )
 112
 113=item repository ( DIRECTORY )
 114
 115=item repository ()
 116
 117Construct a new repository object.
 118C<OPTIONS> are passed in a hash like fashion, using key and value pairs.
 119Possible options are:
 120
 121B<Repository> - Path to the Git repository.
 122
 123B<WorkingCopy> - Path to the associated working copy; not strictly required
 124as many commands will happily crunch on a bare repository.
 125
 126B<WorkingSubdir> - Subdirectory in the working copy to work inside.
 127Just left undefined if you do not want to limit the scope of operations.
 128
 129B<Directory> - Path to the Git working directory in its usual setup.
 130The C<.git> directory is searched in the directory and all the parent
 131directories; if found, C<WorkingCopy> is set to the directory containing
 132it and C<Repository> to the C<.git> directory itself. If no C<.git>
 133directory was found, the C<Directory> is assumed to be a bare repository,
 134C<Repository> is set to point at it and C<WorkingCopy> is left undefined.
 135If the C<$GIT_DIR> environment variable is set, things behave as expected
 136as well.
 137
 138You should not use both C<Directory> and either of C<Repository> and
 139C<WorkingCopy> - the results of that are undefined.
 140
 141Alternatively, a directory path may be passed as a single scalar argument
 142to the constructor; it is equivalent to setting only the C<Directory> option
 143field.
 144
 145Calling the constructor with no options whatsoever is equivalent to
 146calling it with C<< Directory => '.' >>. In general, if you are building
 147a standard porcelain command, simply doing C<< Git->repository() >> should
 148do the right thing and setup the object to reflect exactly where the user
 149is right now.
 150
 151=cut
 152
 153sub repository {
 154        my $class = shift;
 155        my @args = @_;
 156        my %opts = ();
 157        my $self;
 158
 159        if (defined $args[0]) {
 160                if ($#args % 2 != 1) {
 161                        # Not a hash.
 162                        $#args == 0 or throw Error::Simple("bad usage");
 163                        %opts = ( Directory => $args[0] );
 164                } else {
 165                        %opts = @args;
 166                }
 167        }
 168
 169        if (not defined $opts{Repository} and not defined $opts{WorkingCopy}) {
 170                $opts{Directory} ||= '.';
 171        }
 172
 173        if ($opts{Directory}) {
 174                -d $opts{Directory} or throw Error::Simple("Directory not found: $!");
 175
 176                my $search = Git->repository(WorkingCopy => $opts{Directory});
 177                my $dir;
 178                try {
 179                        $dir = $search->command_oneline(['rev-parse', '--git-dir'],
 180                                                        STDERR => 0);
 181                } catch Git::Error::Command with {
 182                        $dir = undef;
 183                };
 184
 185                if ($dir) {
 186                        $dir =~ m#^/# or $dir = $opts{Directory} . '/' . $dir;
 187                        $opts{Repository} = $dir;
 188
 189                        # If --git-dir went ok, this shouldn't die either.
 190                        my $prefix = $search->command_oneline('rev-parse', '--show-prefix');
 191                        $dir = abs_path($opts{Directory}) . '/';
 192                        if ($prefix) {
 193                                if (substr($dir, -length($prefix)) ne $prefix) {
 194                                        throw Error::Simple("rev-parse confused me - $dir does not have trailing $prefix");
 195                                }
 196                                substr($dir, -length($prefix)) = '';
 197                        }
 198                        $opts{WorkingCopy} = $dir;
 199                        $opts{WorkingSubdir} = $prefix;
 200
 201                } else {
 202                        # A bare repository? Let's see...
 203                        $dir = $opts{Directory};
 204
 205                        unless (-d "$dir/refs" and -d "$dir/objects" and -e "$dir/HEAD") {
 206                                # Mimick git-rev-parse --git-dir error message:
 207                                throw Error::Simple('fatal: Not a git repository');
 208                        }
 209                        my $search = Git->repository(Repository => $dir);
 210                        try {
 211                                $search->command('symbolic-ref', 'HEAD');
 212                        } catch Git::Error::Command with {
 213                                # Mimick git-rev-parse --git-dir error message:
 214                                throw Error::Simple('fatal: Not a git repository');
 215                        }
 216
 217                        $opts{Repository} = abs_path($dir);
 218                }
 219
 220                delete $opts{Directory};
 221        }
 222
 223        $self = { opts => \%opts };
 224        bless $self, $class;
 225}
 226
 227=back
 228
 229=head1 METHODS
 230
 231=over 4
 232
 233=item command ( COMMAND [, ARGUMENTS... ] )
 234
 235=item command ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 236
 237Execute the given Git C<COMMAND> (specify it without the 'git-'
 238prefix), optionally with the specified extra C<ARGUMENTS>.
 239
 240The second more elaborate form can be used if you want to further adjust
 241the command execution. Currently, only one option is supported:
 242
 243B<STDERR> - How to deal with the command's error output. By default (C<undef>)
 244it is delivered to the caller's C<STDERR>. A false value (0 or '') will cause
 245it to be thrown away. If you want to process it, you can get it in a filehandle
 246you specify, but you must be extremely careful; if the error output is not
 247very short and you want to read it in the same process as where you called
 248C<command()>, you are set up for a nice deadlock!
 249
 250The method can be called without any instance or on a specified Git repository
 251(in that case the command will be run in the repository context).
 252
 253In scalar context, it returns all the command output in a single string
 254(verbatim).
 255
 256In array context, it returns an array containing lines printed to the
 257command's stdout (without trailing newlines).
 258
 259In both cases, the command's stdin and stderr are the same as the caller's.
 260
 261=cut
 262
 263sub command {
 264        my ($fh, $ctx) = command_output_pipe(@_);
 265
 266        if (not defined wantarray) {
 267                # Nothing to pepper the possible exception with.
 268                _cmd_close($fh, $ctx);
 269
 270        } elsif (not wantarray) {
 271                local $/;
 272                my $text = <$fh>;
 273                try {
 274                        _cmd_close($fh, $ctx);
 275                } catch Git::Error::Command with {
 276                        # Pepper with the output:
 277                        my $E = shift;
 278                        $E->{'-outputref'} = \$text;
 279                        throw $E;
 280                };
 281                return $text;
 282
 283        } else {
 284                my @lines = <$fh>;
 285                defined and chomp for @lines;
 286                try {
 287                        _cmd_close($fh, $ctx);
 288                } catch Git::Error::Command with {
 289                        my $E = shift;
 290                        $E->{'-outputref'} = \@lines;
 291                        throw $E;
 292                };
 293                return @lines;
 294        }
 295}
 296
 297
 298=item command_oneline ( COMMAND [, ARGUMENTS... ] )
 299
 300=item command_oneline ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 301
 302Execute the given C<COMMAND> in the same way as command()
 303does but always return a scalar string containing the first line
 304of the command's standard output.
 305
 306=cut
 307
 308sub command_oneline {
 309        my ($fh, $ctx) = command_output_pipe(@_);
 310
 311        my $line = <$fh>;
 312        defined $line and chomp $line;
 313        try {
 314                _cmd_close($fh, $ctx);
 315        } catch Git::Error::Command with {
 316                # Pepper with the output:
 317                my $E = shift;
 318                $E->{'-outputref'} = \$line;
 319                throw $E;
 320        };
 321        return $line;
 322}
 323
 324
 325=item command_output_pipe ( COMMAND [, ARGUMENTS... ] )
 326
 327=item command_output_pipe ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 328
 329Execute the given C<COMMAND> in the same way as command()
 330does but return a pipe filehandle from which the command output can be
 331read.
 332
 333The function can return C<($pipe, $ctx)> in array context.
 334See C<command_close_pipe()> for details.
 335
 336=cut
 337
 338sub command_output_pipe {
 339        _command_common_pipe('-|', @_);
 340}
 341
 342
 343=item command_input_pipe ( COMMAND [, ARGUMENTS... ] )
 344
 345=item command_input_pipe ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 346
 347Execute the given C<COMMAND> in the same way as command_output_pipe()
 348does but return an input pipe filehandle instead; the command output
 349is not captured.
 350
 351The function can return C<($pipe, $ctx)> in array context.
 352See C<command_close_pipe()> for details.
 353
 354=cut
 355
 356sub command_input_pipe {
 357        _command_common_pipe('|-', @_);
 358}
 359
 360
 361=item command_close_pipe ( PIPE [, CTX ] )
 362
 363Close the C<PIPE> as returned from C<command_*_pipe()>, checking
 364whether the command finished successfully. The optional C<CTX> argument
 365is required if you want to see the command name in the error message,
 366and it is the second value returned by C<command_*_pipe()> when
 367called in array context. The call idiom is:
 368
 369        my ($fh, $ctx) = $r->command_output_pipe('status');
 370        while (<$fh>) { ... }
 371        $r->command_close_pipe($fh, $ctx);
 372
 373Note that you should not rely on whatever actually is in C<CTX>;
 374currently it is simply the command name but in future the context might
 375have more complicated structure.
 376
 377=cut
 378
 379sub command_close_pipe {
 380        my ($self, $fh, $ctx) = _maybe_self(@_);
 381        $ctx ||= '<unknown>';
 382        _cmd_close($fh, $ctx);
 383}
 384
 385=item command_bidi_pipe ( COMMAND [, ARGUMENTS... ] )
 386
 387Execute the given C<COMMAND> in the same way as command_output_pipe()
 388does but return both an input pipe filehandle and an output pipe filehandle.
 389
 390The function will return return C<($pid, $pipe_in, $pipe_out, $ctx)>.
 391See C<command_close_bidi_pipe()> for details.
 392
 393=cut
 394
 395sub command_bidi_pipe {
 396        my ($pid, $in, $out);
 397        $pid = open2($in, $out, 'git', @_);
 398        return ($pid, $in, $out, join(' ', @_));
 399}
 400
 401=item command_close_bidi_pipe ( PID, PIPE_IN, PIPE_OUT [, CTX] )
 402
 403Close the C<PIPE_IN> and C<PIPE_OUT> as returned from C<command_bidi_pipe()>,
 404checking whether the command finished successfully. The optional C<CTX>
 405argument is required if you want to see the command name in the error message,
 406and it is the fourth value returned by C<command_bidi_pipe()>.  The call idiom
 407is:
 408
 409        my ($pid, $in, $out, $ctx) = $r->command_bidi_pipe('cat-file --batch-check');
 410        print "000000000\n" $out;
 411        while (<$in>) { ... }
 412        $r->command_close_bidi_pipe($pid, $in, $out, $ctx);
 413
 414Note that you should not rely on whatever actually is in C<CTX>;
 415currently it is simply the command name but in future the context might
 416have more complicated structure.
 417
 418=cut
 419
 420sub command_close_bidi_pipe {
 421        local $?;
 422        my ($pid, $in, $out, $ctx) = @_;
 423        foreach my $fh ($in, $out) {
 424                unless (close $fh) {
 425                        if ($!) {
 426                                carp "error closing pipe: $!";
 427                        } elsif ($? >> 8) {
 428                                throw Git::Error::Command($ctx, $? >>8);
 429                        }
 430                }
 431        }
 432
 433        waitpid $pid, 0;
 434
 435        if ($? >> 8) {
 436                throw Git::Error::Command($ctx, $? >>8);
 437        }
 438}
 439
 440
 441=item command_noisy ( COMMAND [, ARGUMENTS... ] )
 442
 443Execute the given C<COMMAND> in the same way as command() does but do not
 444capture the command output - the standard output is not redirected and goes
 445to the standard output of the caller application.
 446
 447While the method is called command_noisy(), you might want to as well use
 448it for the most silent Git commands which you know will never pollute your
 449stdout but you want to avoid the overhead of the pipe setup when calling them.
 450
 451The function returns only after the command has finished running.
 452
 453=cut
 454
 455sub command_noisy {
 456        my ($self, $cmd, @args) = _maybe_self(@_);
 457        _check_valid_cmd($cmd);
 458
 459        my $pid = fork;
 460        if (not defined $pid) {
 461                throw Error::Simple("fork failed: $!");
 462        } elsif ($pid == 0) {
 463                _cmd_exec($self, $cmd, @args);
 464        }
 465        if (waitpid($pid, 0) > 0 and $?>>8 != 0) {
 466                throw Git::Error::Command(join(' ', $cmd, @args), $? >> 8);
 467        }
 468}
 469
 470
 471=item version ()
 472
 473Return the Git version in use.
 474
 475=cut
 476
 477sub version {
 478        my $verstr = command_oneline('--version');
 479        $verstr =~ s/^git version //;
 480        $verstr;
 481}
 482
 483
 484=item exec_path ()
 485
 486Return path to the Git sub-command executables (the same as
 487C<git --exec-path>). Useful mostly only internally.
 488
 489=cut
 490
 491sub exec_path { command_oneline('--exec-path') }
 492
 493
 494=item repo_path ()
 495
 496Return path to the git repository. Must be called on a repository instance.
 497
 498=cut
 499
 500sub repo_path { $_[0]->{opts}->{Repository} }
 501
 502
 503=item wc_path ()
 504
 505Return path to the working copy. Must be called on a repository instance.
 506
 507=cut
 508
 509sub wc_path { $_[0]->{opts}->{WorkingCopy} }
 510
 511
 512=item wc_subdir ()
 513
 514Return path to the subdirectory inside of a working copy. Must be called
 515on a repository instance.
 516
 517=cut
 518
 519sub wc_subdir { $_[0]->{opts}->{WorkingSubdir} ||= '' }
 520
 521
 522=item wc_chdir ( SUBDIR )
 523
 524Change the working copy subdirectory to work within. The C<SUBDIR> is
 525relative to the working copy root directory (not the current subdirectory).
 526Must be called on a repository instance attached to a working copy
 527and the directory must exist.
 528
 529=cut
 530
 531sub wc_chdir {
 532        my ($self, $subdir) = @_;
 533        $self->wc_path()
 534                or throw Error::Simple("bare repository");
 535
 536        -d $self->wc_path().'/'.$subdir
 537                or throw Error::Simple("subdir not found: $!");
 538        # Of course we will not "hold" the subdirectory so anyone
 539        # can delete it now and we will never know. But at least we tried.
 540
 541        $self->{opts}->{WorkingSubdir} = $subdir;
 542}
 543
 544
 545=item config ( VARIABLE )
 546
 547Retrieve the configuration C<VARIABLE> in the same manner as C<config>
 548does. In scalar context requires the variable to be set only one time
 549(exception is thrown otherwise), in array context returns allows the
 550variable to be set multiple times and returns all the values.
 551
 552This currently wraps command('config') so it is not so fast.
 553
 554=cut
 555
 556sub config {
 557        my ($self, $var) = _maybe_self(@_);
 558
 559        try {
 560                my @cmd = ('config');
 561                unshift @cmd, $self if $self;
 562                if (wantarray) {
 563                        return command(@cmd, '--get-all', $var);
 564                } else {
 565                        return command_oneline(@cmd, '--get', $var);
 566                }
 567        } catch Git::Error::Command with {
 568                my $E = shift;
 569                if ($E->value() == 1) {
 570                        # Key not found.
 571                        return;
 572                } else {
 573                        throw $E;
 574                }
 575        };
 576}
 577
 578
 579=item config_bool ( VARIABLE )
 580
 581Retrieve the bool configuration C<VARIABLE>. The return value
 582is usable as a boolean in perl (and C<undef> if it's not defined,
 583of course).
 584
 585This currently wraps command('config') so it is not so fast.
 586
 587=cut
 588
 589sub config_bool {
 590        my ($self, $var) = _maybe_self(@_);
 591
 592        try {
 593                my @cmd = ('config', '--bool', '--get', $var);
 594                unshift @cmd, $self if $self;
 595                my $val = command_oneline(@cmd);
 596                return undef unless defined $val;
 597                return $val eq 'true';
 598        } catch Git::Error::Command with {
 599                my $E = shift;
 600                if ($E->value() == 1) {
 601                        # Key not found.
 602                        return undef;
 603                } else {
 604                        throw $E;
 605                }
 606        };
 607}
 608
 609=item config_int ( VARIABLE )
 610
 611Retrieve the integer configuration C<VARIABLE>. The return value
 612is simple decimal number.  An optional value suffix of 'k', 'm',
 613or 'g' in the config file will cause the value to be multiplied
 614by 1024, 1048576 (1024^2), or 1073741824 (1024^3) prior to output.
 615It would return C<undef> if configuration variable is not defined,
 616
 617This currently wraps command('config') so it is not so fast.
 618
 619=cut
 620
 621sub config_int {
 622        my ($self, $var) = _maybe_self(@_);
 623
 624        try {
 625                my @cmd = ('config', '--int', '--get', $var);
 626                unshift @cmd, $self if $self;
 627                return command_oneline(@cmd);
 628        } catch Git::Error::Command with {
 629                my $E = shift;
 630                if ($E->value() == 1) {
 631                        # Key not found.
 632                        return undef;
 633                } else {
 634                        throw $E;
 635                }
 636        };
 637}
 638
 639=item get_colorbool ( NAME )
 640
 641Finds if color should be used for NAMEd operation from the configuration,
 642and returns boolean (true for "use color", false for "do not use color").
 643
 644=cut
 645
 646sub get_colorbool {
 647        my ($self, $var) = @_;
 648        my $stdout_to_tty = (-t STDOUT) ? "true" : "false";
 649        my $use_color = $self->command_oneline('config', '--get-colorbool',
 650                                               $var, $stdout_to_tty);
 651        return ($use_color eq 'true');
 652}
 653
 654=item get_color ( SLOT, COLOR )
 655
 656Finds color for SLOT from the configuration, while defaulting to COLOR,
 657and returns the ANSI color escape sequence:
 658
 659        print $repo->get_color("color.interactive.prompt", "underline blue white");
 660        print "some text";
 661        print $repo->get_color("", "normal");
 662
 663=cut
 664
 665sub get_color {
 666        my ($self, $slot, $default) = @_;
 667        my $color = $self->command_oneline('config', '--get-color', $slot, $default);
 668        if (!defined $color) {
 669                $color = "";
 670        }
 671        return $color;
 672}
 673
 674=item remote_refs ( REPOSITORY [, GROUPS [, REFGLOBS ] ] )
 675
 676This function returns a hashref of refs stored in a given remote repository.
 677The hash is in the format C<refname =\> hash>. For tags, the C<refname> entry
 678contains the tag object while a C<refname^{}> entry gives the tagged objects.
 679
 680C<REPOSITORY> has the same meaning as the appropriate C<git-ls-remote>
 681argument; either an URL or a remote name (if called on a repository instance).
 682C<GROUPS> is an optional arrayref that can contain 'tags' to return all the
 683tags and/or 'heads' to return all the heads. C<REFGLOB> is an optional array
 684of strings containing a shell-like glob to further limit the refs returned in
 685the hash; the meaning is again the same as the appropriate C<git-ls-remote>
 686argument.
 687
 688This function may or may not be called on a repository instance. In the former
 689case, remote names as defined in the repository are recognized as repository
 690specifiers.
 691
 692=cut
 693
 694sub remote_refs {
 695        my ($self, $repo, $groups, $refglobs) = _maybe_self(@_);
 696        my @args;
 697        if (ref $groups eq 'ARRAY') {
 698                foreach (@$groups) {
 699                        if ($_ eq 'heads') {
 700                                push (@args, '--heads');
 701                        } elsif ($_ eq 'tags') {
 702                                push (@args, '--tags');
 703                        } else {
 704                                # Ignore unknown groups for future
 705                                # compatibility
 706                        }
 707                }
 708        }
 709        push (@args, $repo);
 710        if (ref $refglobs eq 'ARRAY') {
 711                push (@args, @$refglobs);
 712        }
 713
 714        my @self = $self ? ($self) : (); # Ultra trickery
 715        my ($fh, $ctx) = Git::command_output_pipe(@self, 'ls-remote', @args);
 716        my %refs;
 717        while (<$fh>) {
 718                chomp;
 719                my ($hash, $ref) = split(/\t/, $_, 2);
 720                $refs{$ref} = $hash;
 721        }
 722        Git::command_close_pipe(@self, $fh, $ctx);
 723        return \%refs;
 724}
 725
 726
 727=item ident ( TYPE | IDENTSTR )
 728
 729=item ident_person ( TYPE | IDENTSTR | IDENTARRAY )
 730
 731This suite of functions retrieves and parses ident information, as stored
 732in the commit and tag objects or produced by C<var GIT_type_IDENT> (thus
 733C<TYPE> can be either I<author> or I<committer>; case is insignificant).
 734
 735The C<ident> method retrieves the ident information from C<git var>
 736and either returns it as a scalar string or as an array with the fields parsed.
 737Alternatively, it can take a prepared ident string (e.g. from the commit
 738object) and just parse it.
 739
 740C<ident_person> returns the person part of the ident - name and email;
 741it can take the same arguments as C<ident> or the array returned by C<ident>.
 742
 743The synopsis is like:
 744
 745        my ($name, $email, $time_tz) = ident('author');
 746        "$name <$email>" eq ident_person('author');
 747        "$name <$email>" eq ident_person($name);
 748        $time_tz =~ /^\d+ [+-]\d{4}$/;
 749
 750=cut
 751
 752sub ident {
 753        my ($self, $type) = _maybe_self(@_);
 754        my $identstr;
 755        if (lc $type eq lc 'committer' or lc $type eq lc 'author') {
 756                my @cmd = ('var', 'GIT_'.uc($type).'_IDENT');
 757                unshift @cmd, $self if $self;
 758                $identstr = command_oneline(@cmd);
 759        } else {
 760                $identstr = $type;
 761        }
 762        if (wantarray) {
 763                return $identstr =~ /^(.*) <(.*)> (\d+ [+-]\d{4})$/;
 764        } else {
 765                return $identstr;
 766        }
 767}
 768
 769sub ident_person {
 770        my ($self, @ident) = _maybe_self(@_);
 771        $#ident == 0 and @ident = $self ? $self->ident($ident[0]) : ident($ident[0]);
 772        return "$ident[0] <$ident[1]>";
 773}
 774
 775
 776=item hash_object ( TYPE, FILENAME )
 777
 778Compute the SHA1 object id of the given C<FILENAME> considering it is
 779of the C<TYPE> object type (C<blob>, C<commit>, C<tree>).
 780
 781The method can be called without any instance or on a specified Git repository,
 782it makes zero difference.
 783
 784The function returns the SHA1 hash.
 785
 786=cut
 787
 788# TODO: Support for passing FILEHANDLE instead of FILENAME
 789sub hash_object {
 790        my ($self, $type, $file) = _maybe_self(@_);
 791        command_oneline('hash-object', '-t', $type, $file);
 792}
 793
 794
 795=item hash_and_insert_object ( FILENAME )
 796
 797Compute the SHA1 object id of the given C<FILENAME> and add the object to the
 798object database.
 799
 800The function returns the SHA1 hash.
 801
 802=cut
 803
 804# TODO: Support for passing FILEHANDLE instead of FILENAME
 805sub hash_and_insert_object {
 806        my ($self, $filename) = @_;
 807
 808        carp "Bad filename \"$filename\"" if $filename =~ /[\r\n]/;
 809
 810        $self->_open_hash_and_insert_object_if_needed();
 811        my ($in, $out) = ($self->{hash_object_in}, $self->{hash_object_out});
 812
 813        unless (print $out $filename, "\n") {
 814                $self->_close_hash_and_insert_object();
 815                throw Error::Simple("out pipe went bad");
 816        }
 817
 818        chomp(my $hash = <$in>);
 819        unless (defined($hash)) {
 820                $self->_close_hash_and_insert_object();
 821                throw Error::Simple("in pipe went bad");
 822        }
 823
 824        return $hash;
 825}
 826
 827sub _open_hash_and_insert_object_if_needed {
 828        my ($self) = @_;
 829
 830        return if defined($self->{hash_object_pid});
 831
 832        ($self->{hash_object_pid}, $self->{hash_object_in},
 833         $self->{hash_object_out}, $self->{hash_object_ctx}) =
 834                command_bidi_pipe(qw(hash-object -w --stdin-paths));
 835}
 836
 837sub _close_hash_and_insert_object {
 838        my ($self) = @_;
 839
 840        return unless defined($self->{hash_object_pid});
 841
 842        my @vars = map { 'hash_object_' . $_ } qw(pid in out ctx);
 843
 844        command_close_bidi_pipe(@$self{@vars});
 845        delete @$self{@vars};
 846}
 847
 848=item cat_blob ( SHA1, FILEHANDLE )
 849
 850Prints the contents of the blob identified by C<SHA1> to C<FILEHANDLE> and
 851returns the number of bytes printed.
 852
 853=cut
 854
 855sub cat_blob {
 856        my ($self, $sha1, $fh) = @_;
 857
 858        $self->_open_cat_blob_if_needed();
 859        my ($in, $out) = ($self->{cat_blob_in}, $self->{cat_blob_out});
 860
 861        unless (print $out $sha1, "\n") {
 862                $self->_close_cat_blob();
 863                throw Error::Simple("out pipe went bad");
 864        }
 865
 866        my $description = <$in>;
 867        if ($description =~ / missing$/) {
 868                carp "$sha1 doesn't exist in the repository";
 869                return -1;
 870        }
 871
 872        if ($description !~ /^[0-9a-fA-F]{40} \S+ (\d+)$/) {
 873                carp "Unexpected result returned from git cat-file";
 874                return -1;
 875        }
 876
 877        my $size = $1;
 878
 879        my $blob;
 880        my $bytesRead = 0;
 881
 882        while (1) {
 883                my $bytesLeft = $size - $bytesRead;
 884                last unless $bytesLeft;
 885
 886                my $bytesToRead = $bytesLeft < 1024 ? $bytesLeft : 1024;
 887                my $read = read($in, $blob, $bytesToRead, $bytesRead);
 888                unless (defined($read)) {
 889                        $self->_close_cat_blob();
 890                        throw Error::Simple("in pipe went bad");
 891                }
 892
 893                $bytesRead += $read;
 894        }
 895
 896        # Skip past the trailing newline.
 897        my $newline;
 898        my $read = read($in, $newline, 1);
 899        unless (defined($read)) {
 900                $self->_close_cat_blob();
 901                throw Error::Simple("in pipe went bad");
 902        }
 903        unless ($read == 1 && $newline eq "\n") {
 904                $self->_close_cat_blob();
 905                throw Error::Simple("didn't find newline after blob");
 906        }
 907
 908        unless (print $fh $blob) {
 909                $self->_close_cat_blob();
 910                throw Error::Simple("couldn't write to passed in filehandle");
 911        }
 912
 913        return $size;
 914}
 915
 916sub _open_cat_blob_if_needed {
 917        my ($self) = @_;
 918
 919        return if defined($self->{cat_blob_pid});
 920
 921        ($self->{cat_blob_pid}, $self->{cat_blob_in},
 922         $self->{cat_blob_out}, $self->{cat_blob_ctx}) =
 923                command_bidi_pipe(qw(cat-file --batch));
 924}
 925
 926sub _close_cat_blob {
 927        my ($self) = @_;
 928
 929        return unless defined($self->{cat_blob_pid});
 930
 931        my @vars = map { 'cat_blob_' . $_ } qw(pid in out ctx);
 932
 933        command_close_bidi_pipe(@$self{@vars});
 934        delete @$self{@vars};
 935}
 936
 937
 938{ # %TEMP_* Lexical Context
 939
 940my (%TEMP_FILEMAP, %TEMP_FILES);
 941
 942=item temp_acquire ( NAME )
 943
 944Attempts to retreive the temporary file mapped to the string C<NAME>. If an
 945associated temp file has not been created this session or was closed, it is
 946created, cached, and set for autoflush and binmode.
 947
 948Internally locks the file mapped to C<NAME>. This lock must be released with
 949C<temp_release()> when the temp file is no longer needed. Subsequent attempts
 950to retrieve temporary files mapped to the same C<NAME> while still locked will
 951cause an error. This locking mechanism provides a weak guarantee and is not
 952threadsafe. It does provide some error checking to help prevent temp file refs
 953writing over one another.
 954
 955In general, the L<File::Handle> returned should not be closed by consumers as
 956it defeats the purpose of this caching mechanism. If you need to close the temp
 957file handle, then you should use L<File::Temp> or another temp file faculty
 958directly. If a handle is closed and then requested again, then a warning will
 959issue.
 960
 961=cut
 962
 963sub temp_acquire {
 964        my ($self, $name) = _maybe_self(@_);
 965
 966        my $temp_fd = _temp_cache($name);
 967
 968        $TEMP_FILES{$temp_fd}{locked} = 1;
 969        $temp_fd;
 970}
 971
 972=item temp_release ( NAME )
 973
 974=item temp_release ( FILEHANDLE )
 975
 976Releases a lock acquired through C<temp_acquire()>. Can be called either with
 977the C<NAME> mapping used when acquiring the temp file or with the C<FILEHANDLE>
 978referencing a locked temp file.
 979
 980Warns if an attempt is made to release a file that is not locked.
 981
 982The temp file will be truncated before being released. This can help to reduce
 983disk I/O where the system is smart enough to detect the truncation while data
 984is in the output buffers. Beware that after the temp file is released and
 985truncated, any operations on that file may fail miserably until it is
 986re-acquired. All contents are lost between each release and acquire mapped to
 987the same string.
 988
 989=cut
 990
 991sub temp_release {
 992        my ($self, $temp_fd, $trunc) = _maybe_self(@_);
 993
 994        if (exists $TEMP_FILEMAP{$temp_fd}) {
 995                $temp_fd = $TEMP_FILES{$temp_fd};
 996        }
 997        unless ($TEMP_FILES{$temp_fd}{locked}) {
 998                carp "Attempt to release temp file '",
 999                        $temp_fd, "' that has not been locked";
1000        }
1001        temp_reset($temp_fd) if $trunc and $temp_fd->opened;
1002
1003        $TEMP_FILES{$temp_fd}{locked} = 0;
1004        undef;
1005}
1006
1007sub _temp_cache {
1008        my ($name) = @_;
1009
1010        _verify_require();
1011
1012        my $temp_fd = \$TEMP_FILEMAP{$name};
1013        if (defined $$temp_fd and $$temp_fd->opened) {
1014                if ($TEMP_FILES{$$temp_fd}{locked}) {
1015                        throw Error::Simple("Temp file with moniker '",
1016                                $name, "' already in use");
1017                }
1018        } else {
1019                if (defined $$temp_fd) {
1020                        # then we're here because of a closed handle.
1021                        carp "Temp file '", $name,
1022                                "' was closed. Opening replacement.";
1023                }
1024                my $fname;
1025                ($$temp_fd, $fname) = File::Temp->tempfile(
1026                        'Git_XXXXXX', UNLINK => 1
1027                        ) or throw Error::Simple("couldn't open new temp file");
1028                $$temp_fd->autoflush;
1029                binmode $$temp_fd;
1030                $TEMP_FILES{$$temp_fd}{fname} = $fname;
1031        }
1032        $$temp_fd;
1033}
1034
1035sub _verify_require {
1036        eval { require File::Temp; require File::Spec; };
1037        $@ and throw Error::Simple($@);
1038}
1039
1040=item temp_reset ( FILEHANDLE )
1041
1042Truncates and resets the position of the C<FILEHANDLE>.
1043
1044=cut
1045
1046sub temp_reset {
1047        my ($self, $temp_fd) = _maybe_self(@_);
1048
1049        truncate $temp_fd, 0
1050                or throw Error::Simple("couldn't truncate file");
1051        sysseek($temp_fd, 0, SEEK_SET) and seek($temp_fd, 0, SEEK_SET)
1052                or throw Error::Simple("couldn't seek to beginning of file");
1053        sysseek($temp_fd, 0, SEEK_CUR) == 0 and tell($temp_fd) == 0
1054                or throw Error::Simple("expected file position to be reset");
1055}
1056
1057=item temp_path ( NAME )
1058
1059=item temp_path ( FILEHANDLE )
1060
1061Returns the filename associated with the given tempfile.
1062
1063=cut
1064
1065sub temp_path {
1066        my ($self, $temp_fd) = _maybe_self(@_);
1067
1068        if (exists $TEMP_FILEMAP{$temp_fd}) {
1069                $temp_fd = $TEMP_FILEMAP{$temp_fd};
1070        }
1071        $TEMP_FILES{$temp_fd}{fname};
1072}
1073
1074sub END {
1075        unlink values %TEMP_FILEMAP if %TEMP_FILEMAP;
1076}
1077
1078} # %TEMP_* Lexical Context
1079
1080=back
1081
1082=head1 ERROR HANDLING
1083
1084All functions are supposed to throw Perl exceptions in case of errors.
1085See the L<Error> module on how to catch those. Most exceptions are mere
1086L<Error::Simple> instances.
1087
1088However, the C<command()>, C<command_oneline()> and C<command_noisy()>
1089functions suite can throw C<Git::Error::Command> exceptions as well: those are
1090thrown when the external command returns an error code and contain the error
1091code as well as access to the captured command's output. The exception class
1092provides the usual C<stringify> and C<value> (command's exit code) methods and
1093in addition also a C<cmd_output> method that returns either an array or a
1094string with the captured command output (depending on the original function
1095call context; C<command_noisy()> returns C<undef>) and $<cmdline> which
1096returns the command and its arguments (but without proper quoting).
1097
1098Note that the C<command_*_pipe()> functions cannot throw this exception since
1099it has no idea whether the command failed or not. You will only find out
1100at the time you C<close> the pipe; if you want to have that automated,
1101use C<command_close_pipe()>, which can throw the exception.
1102
1103=cut
1104
1105{
1106        package Git::Error::Command;
1107
1108        @Git::Error::Command::ISA = qw(Error);
1109
1110        sub new {
1111                my $self = shift;
1112                my $cmdline = '' . shift;
1113                my $value = 0 + shift;
1114                my $outputref = shift;
1115                my(@args) = ();
1116
1117                local $Error::Depth = $Error::Depth + 1;
1118
1119                push(@args, '-cmdline', $cmdline);
1120                push(@args, '-value', $value);
1121                push(@args, '-outputref', $outputref);
1122
1123                $self->SUPER::new(-text => 'command returned error', @args);
1124        }
1125
1126        sub stringify {
1127                my $self = shift;
1128                my $text = $self->SUPER::stringify;
1129                $self->cmdline() . ': ' . $text . ': ' . $self->value() . "\n";
1130        }
1131
1132        sub cmdline {
1133                my $self = shift;
1134                $self->{'-cmdline'};
1135        }
1136
1137        sub cmd_output {
1138                my $self = shift;
1139                my $ref = $self->{'-outputref'};
1140                defined $ref or undef;
1141                if (ref $ref eq 'ARRAY') {
1142                        return @$ref;
1143                } else { # SCALAR
1144                        return $$ref;
1145                }
1146        }
1147}
1148
1149=over 4
1150
1151=item git_cmd_try { CODE } ERRMSG
1152
1153This magical statement will automatically catch any C<Git::Error::Command>
1154exceptions thrown by C<CODE> and make your program die with C<ERRMSG>
1155on its lips; the message will have %s substituted for the command line
1156and %d for the exit status. This statement is useful mostly for producing
1157more user-friendly error messages.
1158
1159In case of no exception caught the statement returns C<CODE>'s return value.
1160
1161Note that this is the only auto-exported function.
1162
1163=cut
1164
1165sub git_cmd_try(&$) {
1166        my ($code, $errmsg) = @_;
1167        my @result;
1168        my $err;
1169        my $array = wantarray;
1170        try {
1171                if ($array) {
1172                        @result = &$code;
1173                } else {
1174                        $result[0] = &$code;
1175                }
1176        } catch Git::Error::Command with {
1177                my $E = shift;
1178                $err = $errmsg;
1179                $err =~ s/\%s/$E->cmdline()/ge;
1180                $err =~ s/\%d/$E->value()/ge;
1181                # We can't croak here since Error.pm would mangle
1182                # that to Error::Simple.
1183        };
1184        $err and croak $err;
1185        return $array ? @result : $result[0];
1186}
1187
1188
1189=back
1190
1191=head1 COPYRIGHT
1192
1193Copyright 2006 by Petr Baudis E<lt>pasky@suse.czE<gt>.
1194
1195This module is free software; it may be used, copied, modified
1196and distributed under the terms of the GNU General Public Licence,
1197either version 2, or (at your option) any later version.
1198
1199=cut
1200
1201
1202# Take raw method argument list and return ($obj, @args) in case
1203# the method was called upon an instance and (undef, @args) if
1204# it was called directly.
1205sub _maybe_self {
1206        # This breaks inheritance. Oh well.
1207        ref $_[0] eq 'Git' ? @_ : (undef, @_);
1208}
1209
1210# Check if the command id is something reasonable.
1211sub _check_valid_cmd {
1212        my ($cmd) = @_;
1213        $cmd =~ /^[a-z0-9A-Z_-]+$/ or throw Error::Simple("bad command: $cmd");
1214}
1215
1216# Common backend for the pipe creators.
1217sub _command_common_pipe {
1218        my $direction = shift;
1219        my ($self, @p) = _maybe_self(@_);
1220        my (%opts, $cmd, @args);
1221        if (ref $p[0]) {
1222                ($cmd, @args) = @{shift @p};
1223                %opts = ref $p[0] ? %{$p[0]} : @p;
1224        } else {
1225                ($cmd, @args) = @p;
1226        }
1227        _check_valid_cmd($cmd);
1228
1229        my $fh;
1230        if ($^O eq 'MSWin32') {
1231                # ActiveState Perl
1232                #defined $opts{STDERR} and
1233                #       warn 'ignoring STDERR option - running w/ ActiveState';
1234                $direction eq '-|' or
1235                        die 'input pipe for ActiveState not implemented';
1236                # the strange construction with *ACPIPE is just to
1237                # explain the tie below that we want to bind to
1238                # a handle class, not scalar. It is not known if
1239                # it is something specific to ActiveState Perl or
1240                # just a Perl quirk.
1241                tie (*ACPIPE, 'Git::activestate_pipe', $cmd, @args);
1242                $fh = *ACPIPE;
1243
1244        } else {
1245                my $pid = open($fh, $direction);
1246                if (not defined $pid) {
1247                        throw Error::Simple("open failed: $!");
1248                } elsif ($pid == 0) {
1249                        if (defined $opts{STDERR}) {
1250                                close STDERR;
1251                        }
1252                        if ($opts{STDERR}) {
1253                                open (STDERR, '>&', $opts{STDERR})
1254                                        or die "dup failed: $!";
1255                        }
1256                        _cmd_exec($self, $cmd, @args);
1257                }
1258        }
1259        return wantarray ? ($fh, join(' ', $cmd, @args)) : $fh;
1260}
1261
1262# When already in the subprocess, set up the appropriate state
1263# for the given repository and execute the git command.
1264sub _cmd_exec {
1265        my ($self, @args) = @_;
1266        if ($self) {
1267                $self->repo_path() and $ENV{'GIT_DIR'} = $self->repo_path();
1268                $self->wc_path() and chdir($self->wc_path());
1269                $self->wc_subdir() and chdir($self->wc_subdir());
1270        }
1271        _execv_git_cmd(@args);
1272        die qq[exec "@args" failed: $!];
1273}
1274
1275# Execute the given Git command ($_[0]) with arguments ($_[1..])
1276# by searching for it at proper places.
1277sub _execv_git_cmd { exec('git', @_); }
1278
1279# Close pipe to a subprocess.
1280sub _cmd_close {
1281        my ($fh, $ctx) = @_;
1282        if (not close $fh) {
1283                if ($!) {
1284                        # It's just close, no point in fatalities
1285                        carp "error closing pipe: $!";
1286                } elsif ($? >> 8) {
1287                        # The caller should pepper this.
1288                        throw Git::Error::Command($ctx, $? >> 8);
1289                }
1290                # else we might e.g. closed a live stream; the command
1291                # dying of SIGPIPE would drive us here.
1292        }
1293}
1294
1295
1296sub DESTROY {
1297        my ($self) = @_;
1298        $self->_close_hash_and_insert_object();
1299        $self->_close_cat_blob();
1300}
1301
1302
1303# Pipe implementation for ActiveState Perl.
1304
1305package Git::activestate_pipe;
1306use strict;
1307
1308sub TIEHANDLE {
1309        my ($class, @params) = @_;
1310        # FIXME: This is probably horrible idea and the thing will explode
1311        # at the moment you give it arguments that require some quoting,
1312        # but I have no ActiveState clue... --pasky
1313        # Let's just hope ActiveState Perl does at least the quoting
1314        # correctly.
1315        my @data = qx{git @params};
1316        bless { i => 0, data => \@data }, $class;
1317}
1318
1319sub READLINE {
1320        my $self = shift;
1321        if ($self->{i} >= scalar @{$self->{data}}) {
1322                return undef;
1323        }
1324        my $i = $self->{i};
1325        if (wantarray) {
1326                $self->{i} = $#{$self->{'data'}} + 1;
1327                return splice(@{$self->{'data'}}, $i);
1328        }
1329        $self->{i} = $i + 1;
1330        return $self->{'data'}->[ $i ];
1331}
1332
1333sub CLOSE {
1334        my $self = shift;
1335        delete $self->{data};
1336        delete $self->{i};
1337}
1338
1339sub EOF {
1340        my $self = shift;
1341        return ($self->{i} >= scalar @{$self->{data}});
1342}
1343
1344
13451; # Famous last words