perl / Git.pmon commit filter-branch: add git_commit_non_empty_tree and --prune-empty. (d3240d9)
   1=head1 NAME
   2
   3Git - Perl interface to the Git version control system
   4
   5=cut
   6
   7
   8package Git;
   9
  10use strict;
  11
  12
  13BEGIN {
  14
  15our ($VERSION, @ISA, @EXPORT, @EXPORT_OK);
  16
  17# Totally unstable API.
  18$VERSION = '0.01';
  19
  20
  21=head1 SYNOPSIS
  22
  23  use Git;
  24
  25  my $version = Git::command_oneline('version');
  26
  27  git_cmd_try { Git::command_noisy('update-server-info') }
  28              '%s failed w/ code %d';
  29
  30  my $repo = Git->repository (Directory => '/srv/git/cogito.git');
  31
  32
  33  my @revs = $repo->command('rev-list', '--since=last monday', '--all');
  34
  35  my ($fh, $c) = $repo->command_output_pipe('rev-list', '--since=last monday', '--all');
  36  my $lastrev = <$fh>; chomp $lastrev;
  37  $repo->command_close_pipe($fh, $c);
  38
  39  my $lastrev = $repo->command_oneline( [ 'rev-list', '--all' ],
  40                                        STDERR => 0 );
  41
  42  my $sha1 = $repo->hash_and_insert_object('file.txt');
  43  my $tempfile = tempfile();
  44  my $size = $repo->cat_blob($sha1, $tempfile);
  45
  46=cut
  47
  48
  49require Exporter;
  50
  51@ISA = qw(Exporter);
  52
  53@EXPORT = qw(git_cmd_try);
  54
  55# Methods which can be called as standalone functions as well:
  56@EXPORT_OK = qw(command command_oneline command_noisy
  57                command_output_pipe command_input_pipe command_close_pipe
  58                command_bidi_pipe command_close_bidi_pipe
  59                version exec_path hash_object git_cmd_try
  60                remote_refs
  61                temp_acquire temp_release temp_reset temp_path);
  62
  63
  64=head1 DESCRIPTION
  65
  66This module provides Perl scripts easy way to interface the Git version control
  67system. The modules have an easy and well-tested way to call arbitrary Git
  68commands; in the future, the interface will also provide specialized methods
  69for doing easily operations which are not totally trivial to do over
  70the generic command interface.
  71
  72While some commands can be executed outside of any context (e.g. 'version'
  73or 'init'), most operations require a repository context, which in practice
  74means getting an instance of the Git object using the repository() constructor.
  75(In the future, we will also get a new_repository() constructor.) All commands
  76called as methods of the object are then executed in the context of the
  77repository.
  78
  79Part of the "repository state" is also information about path to the attached
  80working copy (unless you work with a bare repository). You can also navigate
  81inside of the working copy using the C<wc_chdir()> method. (Note that
  82the repository object is self-contained and will not change working directory
  83of your process.)
  84
  85TODO: In the future, we might also do
  86
  87        my $remoterepo = $repo->remote_repository (Name => 'cogito', Branch => 'master');
  88        $remoterepo ||= Git->remote_repository ('http://git.or.cz/cogito.git/');
  89        my @refs = $remoterepo->refs();
  90
  91Currently, the module merely wraps calls to external Git tools. In the future,
  92it will provide a much faster way to interact with Git by linking directly
  93to libgit. This should be completely opaque to the user, though (performance
  94increase notwithstanding).
  95
  96=cut
  97
  98
  99use Carp qw(carp croak); # but croak is bad - throw instead
 100use Error qw(:try);
 101use Cwd qw(abs_path);
 102use IPC::Open2 qw(open2);
 103use Fcntl qw(SEEK_SET SEEK_CUR);
 104}
 105
 106
 107=head1 CONSTRUCTORS
 108
 109=over 4
 110
 111=item repository ( OPTIONS )
 112
 113=item repository ( DIRECTORY )
 114
 115=item repository ()
 116
 117Construct a new repository object.
 118C<OPTIONS> are passed in a hash like fashion, using key and value pairs.
 119Possible options are:
 120
 121B<Repository> - Path to the Git repository.
 122
 123B<WorkingCopy> - Path to the associated working copy; not strictly required
 124as many commands will happily crunch on a bare repository.
 125
 126B<WorkingSubdir> - Subdirectory in the working copy to work inside.
 127Just left undefined if you do not want to limit the scope of operations.
 128
 129B<Directory> - Path to the Git working directory in its usual setup.
 130The C<.git> directory is searched in the directory and all the parent
 131directories; if found, C<WorkingCopy> is set to the directory containing
 132it and C<Repository> to the C<.git> directory itself. If no C<.git>
 133directory was found, the C<Directory> is assumed to be a bare repository,
 134C<Repository> is set to point at it and C<WorkingCopy> is left undefined.
 135If the C<$GIT_DIR> environment variable is set, things behave as expected
 136as well.
 137
 138You should not use both C<Directory> and either of C<Repository> and
 139C<WorkingCopy> - the results of that are undefined.
 140
 141Alternatively, a directory path may be passed as a single scalar argument
 142to the constructor; it is equivalent to setting only the C<Directory> option
 143field.
 144
 145Calling the constructor with no options whatsoever is equivalent to
 146calling it with C<< Directory => '.' >>. In general, if you are building
 147a standard porcelain command, simply doing C<< Git->repository() >> should
 148do the right thing and setup the object to reflect exactly where the user
 149is right now.
 150
 151=cut
 152
 153sub repository {
 154        my $class = shift;
 155        my @args = @_;
 156        my %opts = ();
 157        my $self;
 158
 159        if (defined $args[0]) {
 160                if ($#args % 2 != 1) {
 161                        # Not a hash.
 162                        $#args == 0 or throw Error::Simple("bad usage");
 163                        %opts = ( Directory => $args[0] );
 164                } else {
 165                        %opts = @args;
 166                }
 167        }
 168
 169        if (not defined $opts{Repository} and not defined $opts{WorkingCopy}) {
 170                $opts{Directory} ||= '.';
 171        }
 172
 173        if ($opts{Directory}) {
 174                -d $opts{Directory} or throw Error::Simple("Directory not found: $!");
 175
 176                my $search = Git->repository(WorkingCopy => $opts{Directory});
 177                my $dir;
 178                try {
 179                        $dir = $search->command_oneline(['rev-parse', '--git-dir'],
 180                                                        STDERR => 0);
 181                } catch Git::Error::Command with {
 182                        $dir = undef;
 183                };
 184
 185                if ($dir) {
 186                        $dir =~ m#^/# or $dir = $opts{Directory} . '/' . $dir;
 187                        $opts{Repository} = $dir;
 188
 189                        # If --git-dir went ok, this shouldn't die either.
 190                        my $prefix = $search->command_oneline('rev-parse', '--show-prefix');
 191                        $dir = abs_path($opts{Directory}) . '/';
 192                        if ($prefix) {
 193                                if (substr($dir, -length($prefix)) ne $prefix) {
 194                                        throw Error::Simple("rev-parse confused me - $dir does not have trailing $prefix");
 195                                }
 196                                substr($dir, -length($prefix)) = '';
 197                        }
 198                        $opts{WorkingCopy} = $dir;
 199                        $opts{WorkingSubdir} = $prefix;
 200
 201                } else {
 202                        # A bare repository? Let's see...
 203                        $dir = $opts{Directory};
 204
 205                        unless (-d "$dir/refs" and -d "$dir/objects" and -e "$dir/HEAD") {
 206                                # Mimick git-rev-parse --git-dir error message:
 207                                throw Error::Simple("fatal: Not a git repository: $dir");
 208                        }
 209                        my $search = Git->repository(Repository => $dir);
 210                        try {
 211                                $search->command('symbolic-ref', 'HEAD');
 212                        } catch Git::Error::Command with {
 213                                # Mimick git-rev-parse --git-dir error message:
 214                                throw Error::Simple("fatal: Not a git repository: $dir");
 215                        }
 216
 217                        $opts{Repository} = abs_path($dir);
 218                }
 219
 220                delete $opts{Directory};
 221        }
 222
 223        $self = { opts => \%opts };
 224        bless $self, $class;
 225}
 226
 227=back
 228
 229=head1 METHODS
 230
 231=over 4
 232
 233=item command ( COMMAND [, ARGUMENTS... ] )
 234
 235=item command ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 236
 237Execute the given Git C<COMMAND> (specify it without the 'git-'
 238prefix), optionally with the specified extra C<ARGUMENTS>.
 239
 240The second more elaborate form can be used if you want to further adjust
 241the command execution. Currently, only one option is supported:
 242
 243B<STDERR> - How to deal with the command's error output. By default (C<undef>)
 244it is delivered to the caller's C<STDERR>. A false value (0 or '') will cause
 245it to be thrown away. If you want to process it, you can get it in a filehandle
 246you specify, but you must be extremely careful; if the error output is not
 247very short and you want to read it in the same process as where you called
 248C<command()>, you are set up for a nice deadlock!
 249
 250The method can be called without any instance or on a specified Git repository
 251(in that case the command will be run in the repository context).
 252
 253In scalar context, it returns all the command output in a single string
 254(verbatim).
 255
 256In array context, it returns an array containing lines printed to the
 257command's stdout (without trailing newlines).
 258
 259In both cases, the command's stdin and stderr are the same as the caller's.
 260
 261=cut
 262
 263sub command {
 264        my ($fh, $ctx) = command_output_pipe(@_);
 265
 266        if (not defined wantarray) {
 267                # Nothing to pepper the possible exception with.
 268                _cmd_close($fh, $ctx);
 269
 270        } elsif (not wantarray) {
 271                local $/;
 272                my $text = <$fh>;
 273                try {
 274                        _cmd_close($fh, $ctx);
 275                } catch Git::Error::Command with {
 276                        # Pepper with the output:
 277                        my $E = shift;
 278                        $E->{'-outputref'} = \$text;
 279                        throw $E;
 280                };
 281                return $text;
 282
 283        } else {
 284                my @lines = <$fh>;
 285                defined and chomp for @lines;
 286                try {
 287                        _cmd_close($fh, $ctx);
 288                } catch Git::Error::Command with {
 289                        my $E = shift;
 290                        $E->{'-outputref'} = \@lines;
 291                        throw $E;
 292                };
 293                return @lines;
 294        }
 295}
 296
 297
 298=item command_oneline ( COMMAND [, ARGUMENTS... ] )
 299
 300=item command_oneline ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 301
 302Execute the given C<COMMAND> in the same way as command()
 303does but always return a scalar string containing the first line
 304of the command's standard output.
 305
 306=cut
 307
 308sub command_oneline {
 309        my ($fh, $ctx) = command_output_pipe(@_);
 310
 311        my $line = <$fh>;
 312        defined $line and chomp $line;
 313        try {
 314                _cmd_close($fh, $ctx);
 315        } catch Git::Error::Command with {
 316                # Pepper with the output:
 317                my $E = shift;
 318                $E->{'-outputref'} = \$line;
 319                throw $E;
 320        };
 321        return $line;
 322}
 323
 324
 325=item command_output_pipe ( COMMAND [, ARGUMENTS... ] )
 326
 327=item command_output_pipe ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 328
 329Execute the given C<COMMAND> in the same way as command()
 330does but return a pipe filehandle from which the command output can be
 331read.
 332
 333The function can return C<($pipe, $ctx)> in array context.
 334See C<command_close_pipe()> for details.
 335
 336=cut
 337
 338sub command_output_pipe {
 339        _command_common_pipe('-|', @_);
 340}
 341
 342
 343=item command_input_pipe ( COMMAND [, ARGUMENTS... ] )
 344
 345=item command_input_pipe ( [ COMMAND, ARGUMENTS... ], { Opt => Val ... } )
 346
 347Execute the given C<COMMAND> in the same way as command_output_pipe()
 348does but return an input pipe filehandle instead; the command output
 349is not captured.
 350
 351The function can return C<($pipe, $ctx)> in array context.
 352See C<command_close_pipe()> for details.
 353
 354=cut
 355
 356sub command_input_pipe {
 357        _command_common_pipe('|-', @_);
 358}
 359
 360
 361=item command_close_pipe ( PIPE [, CTX ] )
 362
 363Close the C<PIPE> as returned from C<command_*_pipe()>, checking
 364whether the command finished successfully. The optional C<CTX> argument
 365is required if you want to see the command name in the error message,
 366and it is the second value returned by C<command_*_pipe()> when
 367called in array context. The call idiom is:
 368
 369        my ($fh, $ctx) = $r->command_output_pipe('status');
 370        while (<$fh>) { ... }
 371        $r->command_close_pipe($fh, $ctx);
 372
 373Note that you should not rely on whatever actually is in C<CTX>;
 374currently it is simply the command name but in future the context might
 375have more complicated structure.
 376
 377=cut
 378
 379sub command_close_pipe {
 380        my ($self, $fh, $ctx) = _maybe_self(@_);
 381        $ctx ||= '<unknown>';
 382        _cmd_close($fh, $ctx);
 383}
 384
 385=item command_bidi_pipe ( COMMAND [, ARGUMENTS... ] )
 386
 387Execute the given C<COMMAND> in the same way as command_output_pipe()
 388does but return both an input pipe filehandle and an output pipe filehandle.
 389
 390The function will return return C<($pid, $pipe_in, $pipe_out, $ctx)>.
 391See C<command_close_bidi_pipe()> for details.
 392
 393=cut
 394
 395sub command_bidi_pipe {
 396        my ($pid, $in, $out);
 397        $pid = open2($in, $out, 'git', @_);
 398        return ($pid, $in, $out, join(' ', @_));
 399}
 400
 401=item command_close_bidi_pipe ( PID, PIPE_IN, PIPE_OUT [, CTX] )
 402
 403Close the C<PIPE_IN> and C<PIPE_OUT> as returned from C<command_bidi_pipe()>,
 404checking whether the command finished successfully. The optional C<CTX>
 405argument is required if you want to see the command name in the error message,
 406and it is the fourth value returned by C<command_bidi_pipe()>.  The call idiom
 407is:
 408
 409        my ($pid, $in, $out, $ctx) = $r->command_bidi_pipe('cat-file --batch-check');
 410        print "000000000\n" $out;
 411        while (<$in>) { ... }
 412        $r->command_close_bidi_pipe($pid, $in, $out, $ctx);
 413
 414Note that you should not rely on whatever actually is in C<CTX>;
 415currently it is simply the command name but in future the context might
 416have more complicated structure.
 417
 418=cut
 419
 420sub command_close_bidi_pipe {
 421        local $?;
 422        my ($pid, $in, $out, $ctx) = @_;
 423        foreach my $fh ($in, $out) {
 424                unless (close $fh) {
 425                        if ($!) {
 426                                carp "error closing pipe: $!";
 427                        } elsif ($? >> 8) {
 428                                throw Git::Error::Command($ctx, $? >>8);
 429                        }
 430                }
 431        }
 432
 433        waitpid $pid, 0;
 434
 435        if ($? >> 8) {
 436                throw Git::Error::Command($ctx, $? >>8);
 437        }
 438}
 439
 440
 441=item command_noisy ( COMMAND [, ARGUMENTS... ] )
 442
 443Execute the given C<COMMAND> in the same way as command() does but do not
 444capture the command output - the standard output is not redirected and goes
 445to the standard output of the caller application.
 446
 447While the method is called command_noisy(), you might want to as well use
 448it for the most silent Git commands which you know will never pollute your
 449stdout but you want to avoid the overhead of the pipe setup when calling them.
 450
 451The function returns only after the command has finished running.
 452
 453=cut
 454
 455sub command_noisy {
 456        my ($self, $cmd, @args) = _maybe_self(@_);
 457        _check_valid_cmd($cmd);
 458
 459        my $pid = fork;
 460        if (not defined $pid) {
 461                throw Error::Simple("fork failed: $!");
 462        } elsif ($pid == 0) {
 463                _cmd_exec($self, $cmd, @args);
 464        }
 465        if (waitpid($pid, 0) > 0 and $?>>8 != 0) {
 466                throw Git::Error::Command(join(' ', $cmd, @args), $? >> 8);
 467        }
 468}
 469
 470
 471=item version ()
 472
 473Return the Git version in use.
 474
 475=cut
 476
 477sub version {
 478        my $verstr = command_oneline('--version');
 479        $verstr =~ s/^git version //;
 480        $verstr;
 481}
 482
 483
 484=item exec_path ()
 485
 486Return path to the Git sub-command executables (the same as
 487C<git --exec-path>). Useful mostly only internally.
 488
 489=cut
 490
 491sub exec_path { command_oneline('--exec-path') }
 492
 493
 494=item repo_path ()
 495
 496Return path to the git repository. Must be called on a repository instance.
 497
 498=cut
 499
 500sub repo_path { $_[0]->{opts}->{Repository} }
 501
 502
 503=item wc_path ()
 504
 505Return path to the working copy. Must be called on a repository instance.
 506
 507=cut
 508
 509sub wc_path { $_[0]->{opts}->{WorkingCopy} }
 510
 511
 512=item wc_subdir ()
 513
 514Return path to the subdirectory inside of a working copy. Must be called
 515on a repository instance.
 516
 517=cut
 518
 519sub wc_subdir { $_[0]->{opts}->{WorkingSubdir} ||= '' }
 520
 521
 522=item wc_chdir ( SUBDIR )
 523
 524Change the working copy subdirectory to work within. The C<SUBDIR> is
 525relative to the working copy root directory (not the current subdirectory).
 526Must be called on a repository instance attached to a working copy
 527and the directory must exist.
 528
 529=cut
 530
 531sub wc_chdir {
 532        my ($self, $subdir) = @_;
 533        $self->wc_path()
 534                or throw Error::Simple("bare repository");
 535
 536        -d $self->wc_path().'/'.$subdir
 537                or throw Error::Simple("subdir not found: $!");
 538        # Of course we will not "hold" the subdirectory so anyone
 539        # can delete it now and we will never know. But at least we tried.
 540
 541        $self->{opts}->{WorkingSubdir} = $subdir;
 542}
 543
 544
 545=item config ( VARIABLE )
 546
 547Retrieve the configuration C<VARIABLE> in the same manner as C<config>
 548does. In scalar context requires the variable to be set only one time
 549(exception is thrown otherwise), in array context returns allows the
 550variable to be set multiple times and returns all the values.
 551
 552This currently wraps command('config') so it is not so fast.
 553
 554=cut
 555
 556sub config {
 557        my ($self, $var) = _maybe_self(@_);
 558
 559        try {
 560                my @cmd = ('config');
 561                unshift @cmd, $self if $self;
 562                if (wantarray) {
 563                        return command(@cmd, '--get-all', $var);
 564                } else {
 565                        return command_oneline(@cmd, '--get', $var);
 566                }
 567        } catch Git::Error::Command with {
 568                my $E = shift;
 569                if ($E->value() == 1) {
 570                        # Key not found.
 571                        return;
 572                } else {
 573                        throw $E;
 574                }
 575        };
 576}
 577
 578
 579=item config_bool ( VARIABLE )
 580
 581Retrieve the bool configuration C<VARIABLE>. The return value
 582is usable as a boolean in perl (and C<undef> if it's not defined,
 583of course).
 584
 585This currently wraps command('config') so it is not so fast.
 586
 587=cut
 588
 589sub config_bool {
 590        my ($self, $var) = _maybe_self(@_);
 591
 592        try {
 593                my @cmd = ('config', '--bool', '--get', $var);
 594                unshift @cmd, $self if $self;
 595                my $val = command_oneline(@cmd);
 596                return undef unless defined $val;
 597                return $val eq 'true';
 598        } catch Git::Error::Command with {
 599                my $E = shift;
 600                if ($E->value() == 1) {
 601                        # Key not found.
 602                        return undef;
 603                } else {
 604                        throw $E;
 605                }
 606        };
 607}
 608
 609=item config_int ( VARIABLE )
 610
 611Retrieve the integer configuration C<VARIABLE>. The return value
 612is simple decimal number.  An optional value suffix of 'k', 'm',
 613or 'g' in the config file will cause the value to be multiplied
 614by 1024, 1048576 (1024^2), or 1073741824 (1024^3) prior to output.
 615It would return C<undef> if configuration variable is not defined,
 616
 617This currently wraps command('config') so it is not so fast.
 618
 619=cut
 620
 621sub config_int {
 622        my ($self, $var) = _maybe_self(@_);
 623
 624        try {
 625                my @cmd = ('config', '--int', '--get', $var);
 626                unshift @cmd, $self if $self;
 627                return command_oneline(@cmd);
 628        } catch Git::Error::Command with {
 629                my $E = shift;
 630                if ($E->value() == 1) {
 631                        # Key not found.
 632                        return undef;
 633                } else {
 634                        throw $E;
 635                }
 636        };
 637}
 638
 639=item get_colorbool ( NAME )
 640
 641Finds if color should be used for NAMEd operation from the configuration,
 642and returns boolean (true for "use color", false for "do not use color").
 643
 644=cut
 645
 646sub get_colorbool {
 647        my ($self, $var) = @_;
 648        my $stdout_to_tty = (-t STDOUT) ? "true" : "false";
 649        my $use_color = $self->command_oneline('config', '--get-colorbool',
 650                                               $var, $stdout_to_tty);
 651        return ($use_color eq 'true');
 652}
 653
 654=item get_color ( SLOT, COLOR )
 655
 656Finds color for SLOT from the configuration, while defaulting to COLOR,
 657and returns the ANSI color escape sequence:
 658
 659        print $repo->get_color("color.interactive.prompt", "underline blue white");
 660        print "some text";
 661        print $repo->get_color("", "normal");
 662
 663=cut
 664
 665sub get_color {
 666        my ($self, $slot, $default) = @_;
 667        my $color = $self->command_oneline('config', '--get-color', $slot, $default);
 668        if (!defined $color) {
 669                $color = "";
 670        }
 671        return $color;
 672}
 673
 674=item remote_refs ( REPOSITORY [, GROUPS [, REFGLOBS ] ] )
 675
 676This function returns a hashref of refs stored in a given remote repository.
 677The hash is in the format C<refname =\> hash>. For tags, the C<refname> entry
 678contains the tag object while a C<refname^{}> entry gives the tagged objects.
 679
 680C<REPOSITORY> has the same meaning as the appropriate C<git-ls-remote>
 681argument; either an URL or a remote name (if called on a repository instance).
 682C<GROUPS> is an optional arrayref that can contain 'tags' to return all the
 683tags and/or 'heads' to return all the heads. C<REFGLOB> is an optional array
 684of strings containing a shell-like glob to further limit the refs returned in
 685the hash; the meaning is again the same as the appropriate C<git-ls-remote>
 686argument.
 687
 688This function may or may not be called on a repository instance. In the former
 689case, remote names as defined in the repository are recognized as repository
 690specifiers.
 691
 692=cut
 693
 694sub remote_refs {
 695        my ($self, $repo, $groups, $refglobs) = _maybe_self(@_);
 696        my @args;
 697        if (ref $groups eq 'ARRAY') {
 698                foreach (@$groups) {
 699                        if ($_ eq 'heads') {
 700                                push (@args, '--heads');
 701                        } elsif ($_ eq 'tags') {
 702                                push (@args, '--tags');
 703                        } else {
 704                                # Ignore unknown groups for future
 705                                # compatibility
 706                        }
 707                }
 708        }
 709        push (@args, $repo);
 710        if (ref $refglobs eq 'ARRAY') {
 711                push (@args, @$refglobs);
 712        }
 713
 714        my @self = $self ? ($self) : (); # Ultra trickery
 715        my ($fh, $ctx) = Git::command_output_pipe(@self, 'ls-remote', @args);
 716        my %refs;
 717        while (<$fh>) {
 718                chomp;
 719                my ($hash, $ref) = split(/\t/, $_, 2);
 720                $refs{$ref} = $hash;
 721        }
 722        Git::command_close_pipe(@self, $fh, $ctx);
 723        return \%refs;
 724}
 725
 726
 727=item ident ( TYPE | IDENTSTR )
 728
 729=item ident_person ( TYPE | IDENTSTR | IDENTARRAY )
 730
 731This suite of functions retrieves and parses ident information, as stored
 732in the commit and tag objects or produced by C<var GIT_type_IDENT> (thus
 733C<TYPE> can be either I<author> or I<committer>; case is insignificant).
 734
 735The C<ident> method retrieves the ident information from C<git var>
 736and either returns it as a scalar string or as an array with the fields parsed.
 737Alternatively, it can take a prepared ident string (e.g. from the commit
 738object) and just parse it.
 739
 740C<ident_person> returns the person part of the ident - name and email;
 741it can take the same arguments as C<ident> or the array returned by C<ident>.
 742
 743The synopsis is like:
 744
 745        my ($name, $email, $time_tz) = ident('author');
 746        "$name <$email>" eq ident_person('author');
 747        "$name <$email>" eq ident_person($name);
 748        $time_tz =~ /^\d+ [+-]\d{4}$/;
 749
 750=cut
 751
 752sub ident {
 753        my ($self, $type) = _maybe_self(@_);
 754        my $identstr;
 755        if (lc $type eq lc 'committer' or lc $type eq lc 'author') {
 756                my @cmd = ('var', 'GIT_'.uc($type).'_IDENT');
 757                unshift @cmd, $self if $self;
 758                $identstr = command_oneline(@cmd);
 759        } else {
 760                $identstr = $type;
 761        }
 762        if (wantarray) {
 763                return $identstr =~ /^(.*) <(.*)> (\d+ [+-]\d{4})$/;
 764        } else {
 765                return $identstr;
 766        }
 767}
 768
 769sub ident_person {
 770        my ($self, @ident) = _maybe_self(@_);
 771        $#ident == 0 and @ident = $self ? $self->ident($ident[0]) : ident($ident[0]);
 772        return "$ident[0] <$ident[1]>";
 773}
 774
 775
 776=item hash_object ( TYPE, FILENAME )
 777
 778Compute the SHA1 object id of the given C<FILENAME> considering it is
 779of the C<TYPE> object type (C<blob>, C<commit>, C<tree>).
 780
 781The method can be called without any instance or on a specified Git repository,
 782it makes zero difference.
 783
 784The function returns the SHA1 hash.
 785
 786=cut
 787
 788# TODO: Support for passing FILEHANDLE instead of FILENAME
 789sub hash_object {
 790        my ($self, $type, $file) = _maybe_self(@_);
 791        command_oneline('hash-object', '-t', $type, $file);
 792}
 793
 794
 795=item hash_and_insert_object ( FILENAME )
 796
 797Compute the SHA1 object id of the given C<FILENAME> and add the object to the
 798object database.
 799
 800The function returns the SHA1 hash.
 801
 802=cut
 803
 804# TODO: Support for passing FILEHANDLE instead of FILENAME
 805sub hash_and_insert_object {
 806        my ($self, $filename) = @_;
 807
 808        carp "Bad filename \"$filename\"" if $filename =~ /[\r\n]/;
 809
 810        $self->_open_hash_and_insert_object_if_needed();
 811        my ($in, $out) = ($self->{hash_object_in}, $self->{hash_object_out});
 812
 813        unless (print $out $filename, "\n") {
 814                $self->_close_hash_and_insert_object();
 815                throw Error::Simple("out pipe went bad");
 816        }
 817
 818        chomp(my $hash = <$in>);
 819        unless (defined($hash)) {
 820                $self->_close_hash_and_insert_object();
 821                throw Error::Simple("in pipe went bad");
 822        }
 823
 824        return $hash;
 825}
 826
 827sub _open_hash_and_insert_object_if_needed {
 828        my ($self) = @_;
 829
 830        return if defined($self->{hash_object_pid});
 831
 832        ($self->{hash_object_pid}, $self->{hash_object_in},
 833         $self->{hash_object_out}, $self->{hash_object_ctx}) =
 834                command_bidi_pipe(qw(hash-object -w --stdin-paths));
 835}
 836
 837sub _close_hash_and_insert_object {
 838        my ($self) = @_;
 839
 840        return unless defined($self->{hash_object_pid});
 841
 842        my @vars = map { 'hash_object_' . $_ } qw(pid in out ctx);
 843
 844        command_close_bidi_pipe(@$self{@vars});
 845        delete @$self{@vars};
 846}
 847
 848=item cat_blob ( SHA1, FILEHANDLE )
 849
 850Prints the contents of the blob identified by C<SHA1> to C<FILEHANDLE> and
 851returns the number of bytes printed.
 852
 853=cut
 854
 855sub cat_blob {
 856        my ($self, $sha1, $fh) = @_;
 857
 858        $self->_open_cat_blob_if_needed();
 859        my ($in, $out) = ($self->{cat_blob_in}, $self->{cat_blob_out});
 860
 861        unless (print $out $sha1, "\n") {
 862                $self->_close_cat_blob();
 863                throw Error::Simple("out pipe went bad");
 864        }
 865
 866        my $description = <$in>;
 867        if ($description =~ / missing$/) {
 868                carp "$sha1 doesn't exist in the repository";
 869                return -1;
 870        }
 871
 872        if ($description !~ /^[0-9a-fA-F]{40} \S+ (\d+)$/) {
 873                carp "Unexpected result returned from git cat-file";
 874                return -1;
 875        }
 876
 877        my $size = $1;
 878
 879        my $blob;
 880        my $bytesRead = 0;
 881
 882        while (1) {
 883                my $bytesLeft = $size - $bytesRead;
 884                last unless $bytesLeft;
 885
 886                my $bytesToRead = $bytesLeft < 1024 ? $bytesLeft : 1024;
 887                my $read = read($in, $blob, $bytesToRead, $bytesRead);
 888                unless (defined($read)) {
 889                        $self->_close_cat_blob();
 890                        throw Error::Simple("in pipe went bad");
 891                }
 892
 893                $bytesRead += $read;
 894        }
 895
 896        # Skip past the trailing newline.
 897        my $newline;
 898        my $read = read($in, $newline, 1);
 899        unless (defined($read)) {
 900                $self->_close_cat_blob();
 901                throw Error::Simple("in pipe went bad");
 902        }
 903        unless ($read == 1 && $newline eq "\n") {
 904                $self->_close_cat_blob();
 905                throw Error::Simple("didn't find newline after blob");
 906        }
 907
 908        unless (print $fh $blob) {
 909                $self->_close_cat_blob();
 910                throw Error::Simple("couldn't write to passed in filehandle");
 911        }
 912
 913        return $size;
 914}
 915
 916sub _open_cat_blob_if_needed {
 917        my ($self) = @_;
 918
 919        return if defined($self->{cat_blob_pid});
 920
 921        ($self->{cat_blob_pid}, $self->{cat_blob_in},
 922         $self->{cat_blob_out}, $self->{cat_blob_ctx}) =
 923                command_bidi_pipe(qw(cat-file --batch));
 924}
 925
 926sub _close_cat_blob {
 927        my ($self) = @_;
 928
 929        return unless defined($self->{cat_blob_pid});
 930
 931        my @vars = map { 'cat_blob_' . $_ } qw(pid in out ctx);
 932
 933        command_close_bidi_pipe(@$self{@vars});
 934        delete @$self{@vars};
 935}
 936
 937
 938{ # %TEMP_* Lexical Context
 939
 940my (%TEMP_FILEMAP, %TEMP_FILES);
 941
 942=item temp_acquire ( NAME )
 943
 944Attempts to retreive the temporary file mapped to the string C<NAME>. If an
 945associated temp file has not been created this session or was closed, it is
 946created, cached, and set for autoflush and binmode.
 947
 948Internally locks the file mapped to C<NAME>. This lock must be released with
 949C<temp_release()> when the temp file is no longer needed. Subsequent attempts
 950to retrieve temporary files mapped to the same C<NAME> while still locked will
 951cause an error. This locking mechanism provides a weak guarantee and is not
 952threadsafe. It does provide some error checking to help prevent temp file refs
 953writing over one another.
 954
 955In general, the L<File::Handle> returned should not be closed by consumers as
 956it defeats the purpose of this caching mechanism. If you need to close the temp
 957file handle, then you should use L<File::Temp> or another temp file faculty
 958directly. If a handle is closed and then requested again, then a warning will
 959issue.
 960
 961=cut
 962
 963sub temp_acquire {
 964        my $temp_fd = _temp_cache(@_);
 965
 966        $TEMP_FILES{$temp_fd}{locked} = 1;
 967        $temp_fd;
 968}
 969
 970=item temp_release ( NAME )
 971
 972=item temp_release ( FILEHANDLE )
 973
 974Releases a lock acquired through C<temp_acquire()>. Can be called either with
 975the C<NAME> mapping used when acquiring the temp file or with the C<FILEHANDLE>
 976referencing a locked temp file.
 977
 978Warns if an attempt is made to release a file that is not locked.
 979
 980The temp file will be truncated before being released. This can help to reduce
 981disk I/O where the system is smart enough to detect the truncation while data
 982is in the output buffers. Beware that after the temp file is released and
 983truncated, any operations on that file may fail miserably until it is
 984re-acquired. All contents are lost between each release and acquire mapped to
 985the same string.
 986
 987=cut
 988
 989sub temp_release {
 990        my ($self, $temp_fd, $trunc) = _maybe_self(@_);
 991
 992        if (exists $TEMP_FILEMAP{$temp_fd}) {
 993                $temp_fd = $TEMP_FILES{$temp_fd};
 994        }
 995        unless ($TEMP_FILES{$temp_fd}{locked}) {
 996                carp "Attempt to release temp file '",
 997                        $temp_fd, "' that has not been locked";
 998        }
 999        temp_reset($temp_fd) if $trunc and $temp_fd->opened;
1000
1001        $TEMP_FILES{$temp_fd}{locked} = 0;
1002        undef;
1003}
1004
1005sub _temp_cache {
1006        my ($self, $name) = _maybe_self(@_);
1007
1008        _verify_require();
1009
1010        my $temp_fd = \$TEMP_FILEMAP{$name};
1011        if (defined $$temp_fd and $$temp_fd->opened) {
1012                if ($TEMP_FILES{$$temp_fd}{locked}) {
1013                        throw Error::Simple("Temp file with moniker '",
1014                                $name, "' already in use");
1015                }
1016        } else {
1017                if (defined $$temp_fd) {
1018                        # then we're here because of a closed handle.
1019                        carp "Temp file '", $name,
1020                                "' was closed. Opening replacement.";
1021                }
1022                my $fname;
1023
1024                my $tmpdir;
1025                if (defined $self) {
1026                        $tmpdir = $self->repo_path();
1027                }
1028
1029                ($$temp_fd, $fname) = File::Temp->tempfile(
1030                        'Git_XXXXXX', UNLINK => 1, DIR => $tmpdir,
1031                        ) or throw Error::Simple("couldn't open new temp file");
1032
1033                $$temp_fd->autoflush;
1034                binmode $$temp_fd;
1035                $TEMP_FILES{$$temp_fd}{fname} = $fname;
1036        }
1037        $$temp_fd;
1038}
1039
1040sub _verify_require {
1041        eval { require File::Temp; require File::Spec; };
1042        $@ and throw Error::Simple($@);
1043}
1044
1045=item temp_reset ( FILEHANDLE )
1046
1047Truncates and resets the position of the C<FILEHANDLE>.
1048
1049=cut
1050
1051sub temp_reset {
1052        my ($self, $temp_fd) = _maybe_self(@_);
1053
1054        truncate $temp_fd, 0
1055                or throw Error::Simple("couldn't truncate file");
1056        sysseek($temp_fd, 0, SEEK_SET) and seek($temp_fd, 0, SEEK_SET)
1057                or throw Error::Simple("couldn't seek to beginning of file");
1058        sysseek($temp_fd, 0, SEEK_CUR) == 0 and tell($temp_fd) == 0
1059                or throw Error::Simple("expected file position to be reset");
1060}
1061
1062=item temp_path ( NAME )
1063
1064=item temp_path ( FILEHANDLE )
1065
1066Returns the filename associated with the given tempfile.
1067
1068=cut
1069
1070sub temp_path {
1071        my ($self, $temp_fd) = _maybe_self(@_);
1072
1073        if (exists $TEMP_FILEMAP{$temp_fd}) {
1074                $temp_fd = $TEMP_FILEMAP{$temp_fd};
1075        }
1076        $TEMP_FILES{$temp_fd}{fname};
1077}
1078
1079sub END {
1080        unlink values %TEMP_FILEMAP if %TEMP_FILEMAP;
1081}
1082
1083} # %TEMP_* Lexical Context
1084
1085=back
1086
1087=head1 ERROR HANDLING
1088
1089All functions are supposed to throw Perl exceptions in case of errors.
1090See the L<Error> module on how to catch those. Most exceptions are mere
1091L<Error::Simple> instances.
1092
1093However, the C<command()>, C<command_oneline()> and C<command_noisy()>
1094functions suite can throw C<Git::Error::Command> exceptions as well: those are
1095thrown when the external command returns an error code and contain the error
1096code as well as access to the captured command's output. The exception class
1097provides the usual C<stringify> and C<value> (command's exit code) methods and
1098in addition also a C<cmd_output> method that returns either an array or a
1099string with the captured command output (depending on the original function
1100call context; C<command_noisy()> returns C<undef>) and $<cmdline> which
1101returns the command and its arguments (but without proper quoting).
1102
1103Note that the C<command_*_pipe()> functions cannot throw this exception since
1104it has no idea whether the command failed or not. You will only find out
1105at the time you C<close> the pipe; if you want to have that automated,
1106use C<command_close_pipe()>, which can throw the exception.
1107
1108=cut
1109
1110{
1111        package Git::Error::Command;
1112
1113        @Git::Error::Command::ISA = qw(Error);
1114
1115        sub new {
1116                my $self = shift;
1117                my $cmdline = '' . shift;
1118                my $value = 0 + shift;
1119                my $outputref = shift;
1120                my(@args) = ();
1121
1122                local $Error::Depth = $Error::Depth + 1;
1123
1124                push(@args, '-cmdline', $cmdline);
1125                push(@args, '-value', $value);
1126                push(@args, '-outputref', $outputref);
1127
1128                $self->SUPER::new(-text => 'command returned error', @args);
1129        }
1130
1131        sub stringify {
1132                my $self = shift;
1133                my $text = $self->SUPER::stringify;
1134                $self->cmdline() . ': ' . $text . ': ' . $self->value() . "\n";
1135        }
1136
1137        sub cmdline {
1138                my $self = shift;
1139                $self->{'-cmdline'};
1140        }
1141
1142        sub cmd_output {
1143                my $self = shift;
1144                my $ref = $self->{'-outputref'};
1145                defined $ref or undef;
1146                if (ref $ref eq 'ARRAY') {
1147                        return @$ref;
1148                } else { # SCALAR
1149                        return $$ref;
1150                }
1151        }
1152}
1153
1154=over 4
1155
1156=item git_cmd_try { CODE } ERRMSG
1157
1158This magical statement will automatically catch any C<Git::Error::Command>
1159exceptions thrown by C<CODE> and make your program die with C<ERRMSG>
1160on its lips; the message will have %s substituted for the command line
1161and %d for the exit status. This statement is useful mostly for producing
1162more user-friendly error messages.
1163
1164In case of no exception caught the statement returns C<CODE>'s return value.
1165
1166Note that this is the only auto-exported function.
1167
1168=cut
1169
1170sub git_cmd_try(&$) {
1171        my ($code, $errmsg) = @_;
1172        my @result;
1173        my $err;
1174        my $array = wantarray;
1175        try {
1176                if ($array) {
1177                        @result = &$code;
1178                } else {
1179                        $result[0] = &$code;
1180                }
1181        } catch Git::Error::Command with {
1182                my $E = shift;
1183                $err = $errmsg;
1184                $err =~ s/\%s/$E->cmdline()/ge;
1185                $err =~ s/\%d/$E->value()/ge;
1186                # We can't croak here since Error.pm would mangle
1187                # that to Error::Simple.
1188        };
1189        $err and croak $err;
1190        return $array ? @result : $result[0];
1191}
1192
1193
1194=back
1195
1196=head1 COPYRIGHT
1197
1198Copyright 2006 by Petr Baudis E<lt>pasky@suse.czE<gt>.
1199
1200This module is free software; it may be used, copied, modified
1201and distributed under the terms of the GNU General Public Licence,
1202either version 2, or (at your option) any later version.
1203
1204=cut
1205
1206
1207# Take raw method argument list and return ($obj, @args) in case
1208# the method was called upon an instance and (undef, @args) if
1209# it was called directly.
1210sub _maybe_self {
1211        UNIVERSAL::isa($_[0], 'Git') ? @_ : (undef, @_);
1212}
1213
1214# Check if the command id is something reasonable.
1215sub _check_valid_cmd {
1216        my ($cmd) = @_;
1217        $cmd =~ /^[a-z0-9A-Z_-]+$/ or throw Error::Simple("bad command: $cmd");
1218}
1219
1220# Common backend for the pipe creators.
1221sub _command_common_pipe {
1222        my $direction = shift;
1223        my ($self, @p) = _maybe_self(@_);
1224        my (%opts, $cmd, @args);
1225        if (ref $p[0]) {
1226                ($cmd, @args) = @{shift @p};
1227                %opts = ref $p[0] ? %{$p[0]} : @p;
1228        } else {
1229                ($cmd, @args) = @p;
1230        }
1231        _check_valid_cmd($cmd);
1232
1233        my $fh;
1234        if ($^O eq 'MSWin32') {
1235                # ActiveState Perl
1236                #defined $opts{STDERR} and
1237                #       warn 'ignoring STDERR option - running w/ ActiveState';
1238                $direction eq '-|' or
1239                        die 'input pipe for ActiveState not implemented';
1240                # the strange construction with *ACPIPE is just to
1241                # explain the tie below that we want to bind to
1242                # a handle class, not scalar. It is not known if
1243                # it is something specific to ActiveState Perl or
1244                # just a Perl quirk.
1245                tie (*ACPIPE, 'Git::activestate_pipe', $cmd, @args);
1246                $fh = *ACPIPE;
1247
1248        } else {
1249                my $pid = open($fh, $direction);
1250                if (not defined $pid) {
1251                        throw Error::Simple("open failed: $!");
1252                } elsif ($pid == 0) {
1253                        if (defined $opts{STDERR}) {
1254                                close STDERR;
1255                        }
1256                        if ($opts{STDERR}) {
1257                                open (STDERR, '>&', $opts{STDERR})
1258                                        or die "dup failed: $!";
1259                        }
1260                        _cmd_exec($self, $cmd, @args);
1261                }
1262        }
1263        return wantarray ? ($fh, join(' ', $cmd, @args)) : $fh;
1264}
1265
1266# When already in the subprocess, set up the appropriate state
1267# for the given repository and execute the git command.
1268sub _cmd_exec {
1269        my ($self, @args) = @_;
1270        if ($self) {
1271                $self->repo_path() and $ENV{'GIT_DIR'} = $self->repo_path();
1272                $self->wc_path() and chdir($self->wc_path());
1273                $self->wc_subdir() and chdir($self->wc_subdir());
1274        }
1275        _execv_git_cmd(@args);
1276        die qq[exec "@args" failed: $!];
1277}
1278
1279# Execute the given Git command ($_[0]) with arguments ($_[1..])
1280# by searching for it at proper places.
1281sub _execv_git_cmd { exec('git', @_); }
1282
1283# Close pipe to a subprocess.
1284sub _cmd_close {
1285        my ($fh, $ctx) = @_;
1286        if (not close $fh) {
1287                if ($!) {
1288                        # It's just close, no point in fatalities
1289                        carp "error closing pipe: $!";
1290                } elsif ($? >> 8) {
1291                        # The caller should pepper this.
1292                        throw Git::Error::Command($ctx, $? >> 8);
1293                }
1294                # else we might e.g. closed a live stream; the command
1295                # dying of SIGPIPE would drive us here.
1296        }
1297}
1298
1299
1300sub DESTROY {
1301        my ($self) = @_;
1302        $self->_close_hash_and_insert_object();
1303        $self->_close_cat_blob();
1304}
1305
1306
1307# Pipe implementation for ActiveState Perl.
1308
1309package Git::activestate_pipe;
1310use strict;
1311
1312sub TIEHANDLE {
1313        my ($class, @params) = @_;
1314        # FIXME: This is probably horrible idea and the thing will explode
1315        # at the moment you give it arguments that require some quoting,
1316        # but I have no ActiveState clue... --pasky
1317        # Let's just hope ActiveState Perl does at least the quoting
1318        # correctly.
1319        my @data = qx{git @params};
1320        bless { i => 0, data => \@data }, $class;
1321}
1322
1323sub READLINE {
1324        my $self = shift;
1325        if ($self->{i} >= scalar @{$self->{data}}) {
1326                return undef;
1327        }
1328        my $i = $self->{i};
1329        if (wantarray) {
1330                $self->{i} = $#{$self->{'data'}} + 1;
1331                return splice(@{$self->{'data'}}, $i);
1332        }
1333        $self->{i} = $i + 1;
1334        return $self->{'data'}->[ $i ];
1335}
1336
1337sub CLOSE {
1338        my $self = shift;
1339        delete $self->{data};
1340        delete $self->{i};
1341}
1342
1343sub EOF {
1344        my $self = shift;
1345        return ($self->{i} >= scalar @{$self->{data}});
1346}
1347
1348
13491; # Famous last words