git-annotate.perlon commit clone: the given repository dir should be relative to $PWD (ced78b3)
   1#!/usr/bin/perl
   2# Copyright 2006, Ryan Anderson <ryan@michonline.com>
   3#
   4# GPL v2 (See COPYING)
   5#
   6# This file is licensed under the GPL v2, or a later version
   7# at the discretion of Linus Torvalds.
   8
   9use warnings;
  10use strict;
  11use Getopt::Long;
  12use POSIX qw(strftime gmtime);
  13use File::Basename qw(basename dirname);
  14
  15sub usage() {
  16        print STDERR "Usage: ${\basename $0} [-s] [-S revs-file] file [ revision ]
  17        -l, --long
  18                        Show long rev (Defaults off)
  19        -t, --time
  20                        Show raw timestamp (Defaults off)
  21        -r, --rename
  22                        Follow renames (Defaults on).
  23        -S, --rev-file revs-file
  24                        Use revs from revs-file instead of calling git-rev-list
  25        -h, --help
  26                        This message.
  27";
  28
  29        exit(1);
  30}
  31
  32our ($help, $longrev, $rename, $rawtime, $starting_rev, $rev_file) = (0, 0, 1);
  33
  34my $rc = GetOptions(    "long|l" => \$longrev,
  35                        "time|t" => \$rawtime,
  36                        "help|h" => \$help,
  37                        "rename|r" => \$rename,
  38                        "rev-file|S=s" => \$rev_file);
  39if (!$rc or $help or !@ARGV) {
  40        usage();
  41}
  42
  43my $filename = shift @ARGV;
  44if (@ARGV) {
  45        $starting_rev = shift @ARGV;
  46}
  47
  48my @stack = (
  49        {
  50                'rev' => defined $starting_rev ? $starting_rev : "HEAD",
  51                'filename' => $filename,
  52        },
  53);
  54
  55our @filelines = ();
  56
  57if (defined $starting_rev) {
  58        @filelines = git_cat_file($starting_rev, $filename);
  59} else {
  60        open(F,"<",$filename)
  61                or die "Failed to open filename: $!";
  62
  63        while(<F>) {
  64                chomp;
  65                push @filelines, $_;
  66        }
  67        close(F);
  68
  69}
  70
  71our %revs;
  72our @revqueue;
  73our $head;
  74
  75my $revsprocessed = 0;
  76while (my $bound = pop @stack) {
  77        my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
  78        foreach my $revinst (@revisions) {
  79                my ($rev, @parents) = @$revinst;
  80                $head ||= $rev;
  81
  82                if (!defined($rev)) {
  83                        $rev = "";
  84                }
  85                $revs{$rev}{'filename'} = $bound->{'filename'};
  86                if (scalar @parents > 0) {
  87                        $revs{$rev}{'parents'} = \@parents;
  88                        next;
  89                }
  90
  91                if (!$rename) {
  92                        next;
  93                }
  94
  95                my $newbound = find_parent_renames($rev, $bound->{'filename'});
  96                if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
  97                        push @stack, $newbound;
  98                        $revs{$rev}{'parents'} = [$newbound->{'rev'}];
  99                }
 100        }
 101}
 102push @revqueue, $head;
 103init_claim( defined $starting_rev ? $head : 'dirty');
 104unless (defined $starting_rev) {
 105        my $diff = open_pipe("git","diff","HEAD", "--",$filename)
 106                or die "Failed to call git diff to check for dirty state: $!";
 107
 108        _git_diff_parse($diff, [$head], "dirty", (
 109                                'author' => gitvar_name("GIT_AUTHOR_IDENT"),
 110                                'author_date' => sprintf("%s +0000",time()),
 111                                )
 112                        );
 113        close($diff);
 114}
 115handle_rev();
 116
 117
 118my $i = 0;
 119foreach my $l (@filelines) {
 120        my ($output, $rev, $committer, $date);
 121        if (ref $l eq 'ARRAY') {
 122                ($output, $rev, $committer, $date) = @$l;
 123                if (!$longrev && length($rev) > 8) {
 124                        $rev = substr($rev,0,8);
 125                }
 126        } else {
 127                $output = $l;
 128                ($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
 129        }
 130
 131        printf("%s\t(%10s\t%10s\t%d)%s\n", $rev, $committer,
 132                format_date($date), ++$i, $output);
 133}
 134
 135sub init_claim {
 136        my ($rev) = @_;
 137        for (my $i = 0; $i < @filelines; $i++) {
 138                $filelines[$i] = [ $filelines[$i], '', '', '', 1];
 139                        # line,
 140                        # rev,
 141                        # author,
 142                        # date,
 143                        # 1 <-- belongs to the original file.
 144        }
 145        $revs{$rev}{'lines'} = \@filelines;
 146}
 147
 148
 149sub handle_rev {
 150        my $revseen = 0;
 151        my %seen;
 152        while (my $rev = shift @revqueue) {
 153                next if $seen{$rev}++;
 154
 155                my %revinfo = git_commit_info($rev);
 156
 157                if (exists $revs{$rev}{parents} &&
 158                    scalar @{$revs{$rev}{parents}} != 0) {
 159
 160                        git_diff_parse($revs{$rev}{'parents'}, $rev, %revinfo);
 161                        push @revqueue, @{$revs{$rev}{'parents'}};
 162
 163                } else {
 164                        # We must be at the initial rev here, so claim everything that is left.
 165                        for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
 166                                if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
 167                                        claim_line($i, $rev, $revs{$rev}{lines}, %revinfo);
 168                                }
 169                        }
 170                }
 171        }
 172}
 173
 174
 175sub git_rev_list {
 176        my ($rev, $file) = @_;
 177
 178        my $revlist;
 179        if ($rev_file) {
 180                open($revlist, '<' . $rev_file)
 181                    or die "Failed to open $rev_file : $!";
 182        } else {
 183                $revlist = open_pipe("git-rev-list","--parents","--remove-empty",$rev,"--",$file)
 184                        or die "Failed to exec git-rev-list: $!";
 185        }
 186
 187        my @revs;
 188        while(my $line = <$revlist>) {
 189                chomp $line;
 190                my ($rev, @parents) = split /\s+/, $line;
 191                push @revs, [ $rev, @parents ];
 192        }
 193        close($revlist);
 194
 195        printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
 196        return @revs;
 197}
 198
 199sub find_parent_renames {
 200        my ($rev, $file) = @_;
 201
 202        my $patch = open_pipe("git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
 203                or die "Failed to exec git-diff: $!";
 204
 205        local $/ = "\0";
 206        my %bound;
 207        my $junk = <$patch>;
 208        while (my $change = <$patch>) {
 209                chomp $change;
 210                my $filename = <$patch>;
 211                if (!defined $filename) {
 212                        next;
 213                }
 214                chomp $filename;
 215
 216                if ($change =~ m/^[AMD]$/ ) {
 217                        next;
 218                } elsif ($change =~ m/^R/ ) {
 219                        my $oldfilename = $filename;
 220                        $filename = <$patch>;
 221                        chomp $filename;
 222                        if ( $file eq $filename ) {
 223                                my $parent = git_find_parent($rev, $oldfilename);
 224                                @bound{'rev','filename'} = ($parent, $oldfilename);
 225                                last;
 226                        }
 227                }
 228        }
 229        close($patch);
 230
 231        return \%bound;
 232}
 233
 234
 235sub git_find_parent {
 236        my ($rev, $filename) = @_;
 237
 238        my $revparent = open_pipe("git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
 239                or die "Failed to open git-rev-list to find a single parent: $!";
 240
 241        my $parentline = <$revparent>;
 242        chomp $parentline;
 243        my ($revfound,$parent) = split m/\s+/, $parentline;
 244
 245        close($revparent);
 246
 247        return $parent;
 248}
 249
 250sub git_find_all_parents {
 251        my ($rev) = @_;
 252
 253        my $revparent = open_pipe("git-rev-list","--remove-empty", "--parents","--max-count=1","$rev")
 254                or die "Failed to open git-rev-list to find a single parent: $!";
 255
 256        my $parentline = <$revparent>;
 257        chomp $parentline;
 258        my ($origrev, @parents) = split m/\s+/, $parentline;
 259
 260        close($revparent);
 261
 262        return @parents;
 263}
 264
 265sub git_merge_base {
 266        my ($rev1, $rev2) = @_;
 267
 268        my $mb = open_pipe("git-merge-base", $rev1, $rev2)
 269                or die "Failed to open git-merge-base: $!";
 270
 271        my $base = <$mb>;
 272        chomp $base;
 273
 274        close($mb);
 275
 276        return $base;
 277}
 278
 279# Construct a set of pseudo parents that are in the same order,
 280# and the same quantity as the real parents,
 281# but whose SHA1s are as similar to the logical parents
 282# as possible.
 283sub get_pseudo_parents {
 284        my ($all, $fake) = @_;
 285
 286        my @all = @$all;
 287        my @fake = @$fake;
 288
 289        my @pseudo;
 290
 291        my %fake = map {$_ => 1} @fake;
 292        my %seenfake;
 293
 294        my $fakeidx = 0;
 295        foreach my $p (@all) {
 296                if (exists $fake{$p}) {
 297                        if ($fake[$fakeidx] ne $p) {
 298                                die sprintf("parent mismatch: %s != %s\nall:%s\nfake:%s\n",
 299                                            $fake[$fakeidx], $p,
 300                                            join(", ", @all),
 301                                            join(", ", @fake),
 302                                           );
 303                        }
 304
 305                        push @pseudo, $p;
 306                        $fakeidx++;
 307                        $seenfake{$p}++;
 308
 309                } else {
 310                        my $base = git_merge_base($fake[$fakeidx], $p);
 311                        if ($base ne $fake[$fakeidx]) {
 312                                die sprintf("Result of merge-base doesn't match fake: %s,%s != %s\n",
 313                                       $fake[$fakeidx], $p, $base);
 314                        }
 315
 316                        # The details of how we parse the diffs
 317                        # mean that we cannot have a duplicate
 318                        # revision in the list, so if we've already
 319                        # seen the revision we would normally add, just use
 320                        # the actual revision.
 321                        if ($seenfake{$base}) {
 322                                push @pseudo, $p;
 323                        } else {
 324                                push @pseudo, $base;
 325                                $seenfake{$base}++;
 326                        }
 327                }
 328        }
 329
 330        return @pseudo;
 331}
 332
 333
 334# Get a diff between the current revision and a parent.
 335# Record the commit information that results.
 336sub git_diff_parse {
 337        my ($parents, $rev, %revinfo) = @_;
 338
 339        my @pseudo_parents;
 340        my @command = ("git-diff-tree");
 341        my $revision_spec;
 342
 343        if (scalar @$parents == 1) {
 344
 345                $revision_spec = join("..", $parents->[0], $rev);
 346                @pseudo_parents = @$parents;
 347        } else {
 348                my @all_parents = git_find_all_parents($rev);
 349
 350                if (@all_parents !=  @$parents) {
 351                        @pseudo_parents = get_pseudo_parents(\@all_parents, $parents);
 352                } else {
 353                        @pseudo_parents = @$parents;
 354                }
 355
 356                $revision_spec = $rev;
 357                push @command, "-c";
 358        }
 359
 360        my @filenames = ( $revs{$rev}{'filename'} );
 361
 362        foreach my $parent (@$parents) {
 363                push @filenames, $revs{$parent}{'filename'};
 364        }
 365
 366        push @command, "-p", "-M", $revision_spec, "--", @filenames;
 367
 368
 369        my $diff = open_pipe( @command )
 370                or die "Failed to call git-diff for annotation: $!";
 371
 372        _git_diff_parse($diff, \@pseudo_parents, $rev, %revinfo);
 373
 374        close($diff);
 375}
 376
 377sub _git_diff_parse {
 378        my ($diff, $parents, $rev, %revinfo) = @_;
 379
 380        my $ri = 0;
 381
 382        my $slines = $revs{$rev}{'lines'};
 383        my (%plines, %pi);
 384
 385        my $gotheader = 0;
 386        my ($remstart);
 387        my $parent_count = @$parents;
 388
 389        my $diff_header_regexp = "^@";
 390        $diff_header_regexp .= "@" x @$parents;
 391        $diff_header_regexp .= ' -\d+,\d+' x @$parents;
 392        $diff_header_regexp .= ' \+(\d+),\d+';
 393        $diff_header_regexp .= " " . ("@" x @$parents);
 394
 395        my %claim_regexps;
 396        my $allparentplus = '^' . '\\+' x @$parents . '(.*)$';
 397
 398        {
 399                my $i = 0;
 400                foreach my $parent (@$parents) {
 401
 402                        $pi{$parent} = 0;
 403                        my $r = '^' . '.' x @$parents . '(.*)$';
 404                        my $p = $r;
 405                        substr($p,$i+1, 1) = '\\+';
 406
 407                        my $m = $r;
 408                        substr($m,$i+1, 1) = '-';
 409
 410                        $claim_regexps{$parent}{plus} = $p;
 411                        $claim_regexps{$parent}{minus} = $m;
 412
 413                        $plines{$parent} = [];
 414
 415                        $i++;
 416                }
 417        }
 418
 419        DIFF:
 420        while(<$diff>) {
 421                chomp;
 422                #printf("%d:%s:\n", $gotheader, $_);
 423                if (m/$diff_header_regexp/) {
 424                        $remstart = $1 - 1;
 425                        # (0-based arrays)
 426
 427                        $gotheader = 1;
 428
 429                        foreach my $parent (@$parents) {
 430                                for (my $i = $ri; $i < $remstart; $i++) {
 431                                        $plines{$parent}[$pi{$parent}++] = $slines->[$i];
 432                                }
 433                        }
 434                        $ri = $remstart;
 435
 436                        next DIFF;
 437
 438                } elsif (!$gotheader) {
 439                        # Skip over the leadin.
 440                        next DIFF;
 441                }
 442
 443                if (m/^\\/) {
 444                        ;
 445                        # Skip \No newline at end of file.
 446                        # But this can be internationalized, so only look
 447                        # for an initial \
 448
 449                } else {
 450                        my %claims = ();
 451                        my $negclaim = 0;
 452                        my $allclaimed = 0;
 453                        my $line;
 454
 455                        if (m/$allparentplus/) {
 456                                claim_line($ri, $rev, $slines, %revinfo);
 457                                $allclaimed = 1;
 458
 459                        }
 460
 461                        PARENT:
 462                        foreach my $parent (keys %claim_regexps) {
 463                                my $m = $claim_regexps{$parent}{minus};
 464                                my $p = $claim_regexps{$parent}{plus};
 465
 466                                if (m/$m/) {
 467                                        $line = $1;
 468                                        $plines{$parent}[$pi{$parent}++] = [ $line, '', '', '', 0 ];
 469                                        $negclaim++;
 470
 471                                } elsif (m/$p/) {
 472                                        $line = $1;
 473                                        if (get_line($slines, $ri) eq $line) {
 474                                                # Found a match, claim
 475                                                $claims{$parent}++;
 476
 477                                        } else {
 478                                                die sprintf("Sync error: %d\n|%s\n|%s\n%s => %s\n",
 479                                                                $ri, $line,
 480                                                                get_line($slines, $ri),
 481                                                                $rev, $parent);
 482                                        }
 483                                }
 484                        }
 485
 486                        if (%claims) {
 487                                foreach my $parent (@$parents) {
 488                                        next if $claims{$parent} || $allclaimed;
 489                                        $plines{$parent}[$pi{$parent}++] = $slines->[$ri];
 490                                            #[ $line, '', '', '', 0 ];
 491                                }
 492                                $ri++;
 493
 494                        } elsif ($negclaim) {
 495                                next DIFF;
 496
 497                        } else {
 498                                if (substr($_,scalar @$parents) ne get_line($slines,$ri) ) {
 499                                        foreach my $parent (@$parents) {
 500                                                printf("parent %s is on line %d\n", $parent, $pi{$parent});
 501                                        }
 502
 503                                        my @context;
 504                                        for (my $i = -2; $i < 2; $i++) {
 505                                                push @context, get_line($slines, $ri + $i);
 506                                        }
 507                                        my $context = join("\n", @context);
 508
 509                                        my $justline = substr($_, scalar @$parents);
 510                                        die sprintf("Line %d, does not match:\n|%s|\n|%s|\n%s\n",
 511                                                    $ri,
 512                                                    $justline,
 513                                                    $context);
 514                                }
 515                                foreach my $parent (@$parents) {
 516                                        $plines{$parent}[$pi{$parent}++] = $slines->[$ri];
 517                                }
 518                                $ri++;
 519                        }
 520                }
 521        }
 522
 523        for (my $i = $ri; $i < @{$slines} ; $i++) {
 524                foreach my $parent (@$parents) {
 525                        push @{$plines{$parent}}, $slines->[$ri];
 526                }
 527                $ri++;
 528        }
 529
 530        foreach my $parent (@$parents) {
 531                $revs{$parent}{lines} = $plines{$parent};
 532        }
 533
 534        return;
 535}
 536
 537sub get_line {
 538        my ($lines, $index) = @_;
 539
 540        return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index];
 541}
 542
 543sub git_cat_file {
 544        my ($rev, $filename) = @_;
 545        return () unless defined $rev && defined $filename;
 546
 547        my $blob = git_ls_tree($rev, $filename);
 548        die "Failed to find a blob for $filename in rev $rev\n" if !defined $blob;
 549
 550        my $catfile = open_pipe("git","cat-file", "blob", $blob)
 551                or die "Failed to git-cat-file blob $blob (rev $rev, file $filename): " . $!;
 552
 553        my @lines;
 554        while(<$catfile>) {
 555                chomp;
 556                push @lines, $_;
 557        }
 558        close($catfile);
 559
 560        return @lines;
 561}
 562
 563sub git_ls_tree {
 564        my ($rev, $filename) = @_;
 565
 566        my $lstree = open_pipe("git","ls-tree",$rev,$filename)
 567                or die "Failed to call git ls-tree: $!";
 568
 569        my ($mode, $type, $blob, $tfilename);
 570        while(<$lstree>) {
 571                chomp;
 572                ($mode, $type, $blob, $tfilename) = split(/\s+/, $_, 4);
 573                last if ($tfilename eq $filename);
 574        }
 575        close($lstree);
 576
 577        return $blob if ($tfilename eq $filename);
 578        die "git-ls-tree failed to find blob for $filename";
 579
 580}
 581
 582
 583
 584sub claim_line {
 585        my ($floffset, $rev, $lines, %revinfo) = @_;
 586        my $oline = get_line($lines, $floffset);
 587        @{$lines->[$floffset]} = ( $oline, $rev,
 588                $revinfo{'author'}, $revinfo{'author_date'} );
 589        #printf("Claiming line %d with rev %s: '%s'\n",
 590        #               $floffset, $rev, $oline) if 1;
 591}
 592
 593sub git_commit_info {
 594        my ($rev) = @_;
 595        my $commit = open_pipe("git-cat-file", "commit", $rev)
 596                or die "Failed to call git-cat-file: $!";
 597
 598        my %info;
 599        while(<$commit>) {
 600                chomp;
 601                last if (length $_ == 0);
 602
 603                if (m/^author (.*) <(.*)> (.*)$/) {
 604                        $info{'author'} = $1;
 605                        $info{'author_email'} = $2;
 606                        $info{'author_date'} = $3;
 607                } elsif (m/^committer (.*) <(.*)> (.*)$/) {
 608                        $info{'committer'} = $1;
 609                        $info{'committer_email'} = $2;
 610                        $info{'committer_date'} = $3;
 611                }
 612        }
 613        close($commit);
 614
 615        return %info;
 616}
 617
 618sub format_date {
 619        if ($rawtime) {
 620                return $_[0];
 621        }
 622        my ($timestamp, $timezone) = split(' ', $_[0]);
 623        my $minutes = abs($timezone);
 624        $minutes = int($minutes / 100) * 60 + ($minutes % 100);
 625        if ($timezone < 0) {
 626            $minutes = -$minutes;
 627        }
 628        my $t = $timestamp + $minutes * 60;
 629        return strftime("%Y-%m-%d %H:%M:%S " . $timezone, gmtime($t));
 630}
 631
 632# Copied from git-send-email.perl - We need a Git.pm module..
 633sub gitvar {
 634    my ($var) = @_;
 635    my $fh;
 636    my $pid = open($fh, '-|');
 637    die "$!" unless defined $pid;
 638    if (!$pid) {
 639        exec('git-var', $var) or die "$!";
 640    }
 641    my ($val) = <$fh>;
 642    close $fh or die "$!";
 643    chomp($val);
 644    return $val;
 645}
 646
 647sub gitvar_name {
 648    my ($name) = @_;
 649    my $val = gitvar($name);
 650    my @field = split(/\s+/, $val);
 651    return join(' ', @field[0...(@field-4)]);
 652}
 653
 654sub open_pipe {
 655        if ($^O eq '##INSERT_ACTIVESTATE_STRING_HERE##') {
 656                return open_pipe_activestate(@_);
 657        } else {
 658                return open_pipe_normal(@_);
 659        }
 660}
 661
 662sub open_pipe_activestate {
 663        tie *fh, "Git::ActiveStatePipe", @_;
 664        return *fh;
 665}
 666
 667sub open_pipe_normal {
 668        my (@execlist) = @_;
 669
 670        my $pid = open my $kid, "-|";
 671        defined $pid or die "Cannot fork: $!";
 672
 673        unless ($pid) {
 674                exec @execlist;
 675                die "Cannot exec @execlist: $!";
 676        }
 677
 678        return $kid;
 679}
 680
 681package Git::ActiveStatePipe;
 682use strict;
 683
 684sub TIEHANDLE {
 685        my ($class, @params) = @_;
 686        my $cmdline = join " ", @params;
 687        my  @data = qx{$cmdline};
 688        bless { i => 0, data => \@data }, $class;
 689}
 690
 691sub READLINE {
 692        my $self = shift;
 693        if ($self->{i} >= scalar @{$self->{data}}) {
 694                return undef;
 695        }
 696        return $self->{'data'}->[ $self->{i}++ ];
 697}
 698
 699sub CLOSE {
 700        my $self = shift;
 701        delete $self->{data};
 702        delete $self->{i};
 703}
 704
 705sub EOF {
 706        my $self = shift;
 707        return ($self->{i} >= scalar @{$self->{data}});
 708}