git-annotate.perlon commit Merge branch 'master' into pb/gitpm (d7b6c3c)
   1#!/usr/bin/perl
   2# Copyright 2006, Ryan Anderson <ryan@michonline.com>
   3#
   4# GPL v2 (See COPYING)
   5#
   6# This file is licensed under the GPL v2, or a later version
   7# at the discretion of Linus Torvalds.
   8
   9use warnings;
  10use strict;
  11use Getopt::Long;
  12use POSIX qw(strftime gmtime);
  13use File::Basename qw(basename dirname);
  14use Git;
  15
  16sub usage() {
  17        print STDERR "Usage: ${\basename $0} [-s] [-S revs-file] file [ revision ]
  18        -l, --long
  19                        Show long rev (Defaults off)
  20        -t, --time
  21                        Show raw timestamp (Defaults off)
  22        -r, --rename
  23                        Follow renames (Defaults on).
  24        -S, --rev-file revs-file
  25                        Use revs from revs-file instead of calling git-rev-list
  26        -h, --help
  27                        This message.
  28";
  29
  30        exit(1);
  31}
  32
  33our ($help, $longrev, $rename, $rawtime, $starting_rev, $rev_file, $repo) = (0, 0, 1);
  34
  35my $rc = GetOptions(    "long|l" => \$longrev,
  36                        "time|t" => \$rawtime,
  37                        "help|h" => \$help,
  38                        "rename|r" => \$rename,
  39                        "rev-file|S=s" => \$rev_file);
  40if (!$rc or $help or !@ARGV) {
  41        usage();
  42}
  43
  44my $filename = shift @ARGV;
  45if (@ARGV) {
  46        $starting_rev = shift @ARGV;
  47}
  48
  49my @stack = (
  50        {
  51                'rev' => defined $starting_rev ? $starting_rev : "HEAD",
  52                'filename' => $filename,
  53        },
  54);
  55
  56$repo = Git->repository();
  57
  58our @filelines = ();
  59
  60if (defined $starting_rev) {
  61        @filelines = git_cat_file($starting_rev, $filename);
  62} else {
  63        open(F,"<",$filename)
  64                or die "Failed to open filename: $!";
  65
  66        while(<F>) {
  67                chomp;
  68                push @filelines, $_;
  69        }
  70        close(F);
  71
  72}
  73
  74our %revs;
  75our @revqueue;
  76our $head;
  77
  78my $revsprocessed = 0;
  79while (my $bound = pop @stack) {
  80        my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
  81        foreach my $revinst (@revisions) {
  82                my ($rev, @parents) = @$revinst;
  83                $head ||= $rev;
  84
  85                if (!defined($rev)) {
  86                        $rev = "";
  87                }
  88                $revs{$rev}{'filename'} = $bound->{'filename'};
  89                if (scalar @parents > 0) {
  90                        $revs{$rev}{'parents'} = \@parents;
  91                        next;
  92                }
  93
  94                if (!$rename) {
  95                        next;
  96                }
  97
  98                my $newbound = find_parent_renames($rev, $bound->{'filename'});
  99                if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
 100                        push @stack, $newbound;
 101                        $revs{$rev}{'parents'} = [$newbound->{'rev'}];
 102                }
 103        }
 104}
 105push @revqueue, $head;
 106init_claim( defined $starting_rev ? $head : 'dirty');
 107unless (defined $starting_rev) {
 108        my %ident;
 109        @ident{'author', 'author_email', 'author_date'} = $repo->ident('author');
 110        my $diff = $repo->command_output_pipe('diff', '-R', 'HEAD', '--', $filename);
 111        _git_diff_parse($diff, [$head], "dirty", %ident);
 112        $repo->command_close_pipe($diff);
 113}
 114handle_rev();
 115
 116
 117my $i = 0;
 118foreach my $l (@filelines) {
 119        my ($output, $rev, $committer, $date);
 120        if (ref $l eq 'ARRAY') {
 121                ($output, $rev, $committer, $date) = @$l;
 122                if (!$longrev && length($rev) > 8) {
 123                        $rev = substr($rev,0,8);
 124                }
 125        } else {
 126                $output = $l;
 127                ($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
 128        }
 129
 130        printf("%s\t(%10s\t%10s\t%d)%s\n", $rev, $committer,
 131                format_date($date), ++$i, $output);
 132}
 133
 134sub init_claim {
 135        my ($rev) = @_;
 136        for (my $i = 0; $i < @filelines; $i++) {
 137                $filelines[$i] = [ $filelines[$i], '', '', '', 1];
 138                        # line,
 139                        # rev,
 140                        # author,
 141                        # date,
 142                        # 1 <-- belongs to the original file.
 143        }
 144        $revs{$rev}{'lines'} = \@filelines;
 145}
 146
 147
 148sub handle_rev {
 149        my $revseen = 0;
 150        my %seen;
 151        while (my $rev = shift @revqueue) {
 152                next if $seen{$rev}++;
 153
 154                my %revinfo = git_commit_info($rev);
 155
 156                if (exists $revs{$rev}{parents} &&
 157                    scalar @{$revs{$rev}{parents}} != 0) {
 158
 159                        git_diff_parse($revs{$rev}{'parents'}, $rev, %revinfo);
 160                        push @revqueue, @{$revs{$rev}{'parents'}};
 161
 162                } else {
 163                        # We must be at the initial rev here, so claim everything that is left.
 164                        for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
 165                                if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
 166                                        claim_line($i, $rev, $revs{$rev}{lines}, %revinfo);
 167                                }
 168                        }
 169                }
 170        }
 171}
 172
 173
 174sub git_rev_list {
 175        my ($rev, $file) = @_;
 176
 177        my $revlist;
 178        if ($rev_file) {
 179                open($revlist, '<' . $rev_file)
 180                    or die "Failed to open $rev_file : $!";
 181        } else {
 182                $revlist = $repo->command_output_pipe('rev-list', '--parents', '--remove-empty', $rev, '--', $file);
 183        }
 184
 185        my @revs;
 186        while(my $line = <$revlist>) {
 187                chomp $line;
 188                my ($rev, @parents) = split /\s+/, $line;
 189                push @revs, [ $rev, @parents ];
 190        }
 191        $repo->command_close_pipe($revlist);
 192
 193        printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
 194        return @revs;
 195}
 196
 197sub find_parent_renames {
 198        my ($rev, $file) = @_;
 199
 200        my $patch = $repo->command_output_pipe('diff-tree', '-M50', '-r', '--name-status', '-z', $rev);
 201
 202        local $/ = "\0";
 203        my %bound;
 204        my $junk = <$patch>;
 205        while (my $change = <$patch>) {
 206                chomp $change;
 207                my $filename = <$patch>;
 208                if (!defined $filename) {
 209                        next;
 210                }
 211                chomp $filename;
 212
 213                if ($change =~ m/^[AMD]$/ ) {
 214                        next;
 215                } elsif ($change =~ m/^R/ ) {
 216                        my $oldfilename = $filename;
 217                        $filename = <$patch>;
 218                        chomp $filename;
 219                        if ( $file eq $filename ) {
 220                                my $parent = git_find_parent($rev, $oldfilename);
 221                                @bound{'rev','filename'} = ($parent, $oldfilename);
 222                                last;
 223                        }
 224                }
 225        }
 226        $repo->command_close_pipe($patch);
 227
 228        return \%bound;
 229}
 230
 231
 232sub git_find_parent {
 233        my ($rev, $filename) = @_;
 234
 235        my $parentline = $repo->command_oneline('rev-list', '--remove-empty',
 236                        '--parents', '--max-count=1', $rev, '--', $filename);
 237        my ($revfound, $parent) = split m/\s+/, $parentline;
 238
 239        return $parent;
 240}
 241
 242sub git_find_all_parents {
 243        my ($rev) = @_;
 244
 245        my $parentline = $repo->command_oneline("rev-list","--remove-empty", "--parents","--max-count=1","$rev");
 246        my ($origrev, @parents) = split m/\s+/, $parentline;
 247
 248        return @parents;
 249}
 250
 251sub git_merge_base {
 252        my ($rev1, $rev2) = @_;
 253
 254        my $base = $repo->command_oneline("merge-base", $rev1, $rev2);
 255        return $base;
 256}
 257
 258# Construct a set of pseudo parents that are in the same order,
 259# and the same quantity as the real parents,
 260# but whose SHA1s are as similar to the logical parents
 261# as possible.
 262sub get_pseudo_parents {
 263        my ($all, $fake) = @_;
 264
 265        my @all = @$all;
 266        my @fake = @$fake;
 267
 268        my @pseudo;
 269
 270        my %fake = map {$_ => 1} @fake;
 271        my %seenfake;
 272
 273        my $fakeidx = 0;
 274        foreach my $p (@all) {
 275                if (exists $fake{$p}) {
 276                        if ($fake[$fakeidx] ne $p) {
 277                                die sprintf("parent mismatch: %s != %s\nall:%s\nfake:%s\n",
 278                                            $fake[$fakeidx], $p,
 279                                            join(", ", @all),
 280                                            join(", ", @fake),
 281                                           );
 282                        }
 283
 284                        push @pseudo, $p;
 285                        $fakeidx++;
 286                        $seenfake{$p}++;
 287
 288                } else {
 289                        my $base = git_merge_base($fake[$fakeidx], $p);
 290                        if ($base ne $fake[$fakeidx]) {
 291                                die sprintf("Result of merge-base doesn't match fake: %s,%s != %s\n",
 292                                       $fake[$fakeidx], $p, $base);
 293                        }
 294
 295                        # The details of how we parse the diffs
 296                        # mean that we cannot have a duplicate
 297                        # revision in the list, so if we've already
 298                        # seen the revision we would normally add, just use
 299                        # the actual revision.
 300                        if ($seenfake{$base}) {
 301                                push @pseudo, $p;
 302                        } else {
 303                                push @pseudo, $base;
 304                                $seenfake{$base}++;
 305                        }
 306                }
 307        }
 308
 309        return @pseudo;
 310}
 311
 312
 313# Get a diff between the current revision and a parent.
 314# Record the commit information that results.
 315sub git_diff_parse {
 316        my ($parents, $rev, %revinfo) = @_;
 317
 318        my @pseudo_parents;
 319        my @command = ("diff-tree");
 320        my $revision_spec;
 321
 322        if (scalar @$parents == 1) {
 323
 324                $revision_spec = join("..", $parents->[0], $rev);
 325                @pseudo_parents = @$parents;
 326        } else {
 327                my @all_parents = git_find_all_parents($rev);
 328
 329                if (@all_parents !=  @$parents) {
 330                        @pseudo_parents = get_pseudo_parents(\@all_parents, $parents);
 331                } else {
 332                        @pseudo_parents = @$parents;
 333                }
 334
 335                $revision_spec = $rev;
 336                push @command, "-c";
 337        }
 338
 339        my @filenames = ( $revs{$rev}{'filename'} );
 340
 341        foreach my $parent (@$parents) {
 342                push @filenames, $revs{$parent}{'filename'};
 343        }
 344
 345        push @command, "-p", "-M", $revision_spec, "--", @filenames;
 346
 347
 348        my $diff = $repo->command_output_pipe(@command);
 349
 350        _git_diff_parse($diff, \@pseudo_parents, $rev, %revinfo);
 351
 352        $repo->command_close_pipe($diff);
 353}
 354
 355sub _git_diff_parse {
 356        my ($diff, $parents, $rev, %revinfo) = @_;
 357
 358        my $ri = 0;
 359
 360        my $slines = $revs{$rev}{'lines'};
 361        my (%plines, %pi);
 362
 363        my $gotheader = 0;
 364        my ($remstart);
 365        my $parent_count = @$parents;
 366
 367        my $diff_header_regexp = "^@";
 368        $diff_header_regexp .= "@" x @$parents;
 369        $diff_header_regexp .= ' -\d+,\d+' x @$parents;
 370        $diff_header_regexp .= ' \+(\d+),\d+';
 371        $diff_header_regexp .= " " . ("@" x @$parents);
 372
 373        my %claim_regexps;
 374        my $allparentplus = '^' . '\\+' x @$parents . '(.*)$';
 375
 376        {
 377                my $i = 0;
 378                foreach my $parent (@$parents) {
 379
 380                        $pi{$parent} = 0;
 381                        my $r = '^' . '.' x @$parents . '(.*)$';
 382                        my $p = $r;
 383                        substr($p,$i+1, 1) = '\\+';
 384
 385                        my $m = $r;
 386                        substr($m,$i+1, 1) = '-';
 387
 388                        $claim_regexps{$parent}{plus} = $p;
 389                        $claim_regexps{$parent}{minus} = $m;
 390
 391                        $plines{$parent} = [];
 392
 393                        $i++;
 394                }
 395        }
 396
 397        DIFF:
 398        while(<$diff>) {
 399                chomp;
 400                #printf("%d:%s:\n", $gotheader, $_);
 401                if (m/$diff_header_regexp/) {
 402                        $remstart = $1 - 1;
 403                        # (0-based arrays)
 404
 405                        $gotheader = 1;
 406
 407                        foreach my $parent (@$parents) {
 408                                for (my $i = $ri; $i < $remstart; $i++) {
 409                                        $plines{$parent}[$pi{$parent}++] = $slines->[$i];
 410                                }
 411                        }
 412                        $ri = $remstart;
 413
 414                        next DIFF;
 415
 416                } elsif (!$gotheader) {
 417                        # Skip over the leadin.
 418                        next DIFF;
 419                }
 420
 421                if (m/^\\/) {
 422                        ;
 423                        # Skip \No newline at end of file.
 424                        # But this can be internationalized, so only look
 425                        # for an initial \
 426
 427                } else {
 428                        my %claims = ();
 429                        my $negclaim = 0;
 430                        my $allclaimed = 0;
 431                        my $line;
 432
 433                        if (m/$allparentplus/) {
 434                                claim_line($ri, $rev, $slines, %revinfo);
 435                                $allclaimed = 1;
 436
 437                        }
 438
 439                        PARENT:
 440                        foreach my $parent (keys %claim_regexps) {
 441                                my $m = $claim_regexps{$parent}{minus};
 442                                my $p = $claim_regexps{$parent}{plus};
 443
 444                                if (m/$m/) {
 445                                        $line = $1;
 446                                        $plines{$parent}[$pi{$parent}++] = [ $line, '', '', '', 0 ];
 447                                        $negclaim++;
 448
 449                                } elsif (m/$p/) {
 450                                        $line = $1;
 451                                        if (get_line($slines, $ri) eq $line) {
 452                                                # Found a match, claim
 453                                                $claims{$parent}++;
 454
 455                                        } else {
 456                                                die sprintf("Sync error: %d\n|%s\n|%s\n%s => %s\n",
 457                                                                $ri, $line,
 458                                                                get_line($slines, $ri),
 459                                                                $rev, $parent);
 460                                        }
 461                                }
 462                        }
 463
 464                        if (%claims) {
 465                                foreach my $parent (@$parents) {
 466                                        next if $claims{$parent} || $allclaimed;
 467                                        $plines{$parent}[$pi{$parent}++] = $slines->[$ri];
 468                                            #[ $line, '', '', '', 0 ];
 469                                }
 470                                $ri++;
 471
 472                        } elsif ($negclaim) {
 473                                next DIFF;
 474
 475                        } else {
 476                                if (substr($_,scalar @$parents) ne get_line($slines,$ri) ) {
 477                                        foreach my $parent (@$parents) {
 478                                                printf("parent %s is on line %d\n", $parent, $pi{$parent});
 479                                        }
 480
 481                                        my @context;
 482                                        for (my $i = -2; $i < 2; $i++) {
 483                                                push @context, get_line($slines, $ri + $i);
 484                                        }
 485                                        my $context = join("\n", @context);
 486
 487                                        my $justline = substr($_, scalar @$parents);
 488                                        die sprintf("Line %d, does not match:\n|%s|\n|%s|\n%s\n",
 489                                                    $ri,
 490                                                    $justline,
 491                                                    $context);
 492                                }
 493                                foreach my $parent (@$parents) {
 494                                        $plines{$parent}[$pi{$parent}++] = $slines->[$ri];
 495                                }
 496                                $ri++;
 497                        }
 498                }
 499        }
 500
 501        for (my $i = $ri; $i < @{$slines} ; $i++) {
 502                foreach my $parent (@$parents) {
 503                        push @{$plines{$parent}}, $slines->[$ri];
 504                }
 505                $ri++;
 506        }
 507
 508        foreach my $parent (@$parents) {
 509                $revs{$parent}{lines} = $plines{$parent};
 510        }
 511
 512        return;
 513}
 514
 515sub get_line {
 516        my ($lines, $index) = @_;
 517
 518        return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index];
 519}
 520
 521sub git_cat_file {
 522        my ($rev, $filename) = @_;
 523        return () unless defined $rev && defined $filename;
 524
 525        my $blob = git_ls_tree($rev, $filename);
 526        die "Failed to find a blob for $filename in rev $rev\n" if !defined $blob;
 527
 528        my @lines = split(/\n/, $repo->get_object('blob', $blob));
 529        pop @lines unless $lines[$#lines]; # Trailing newline
 530        return @lines;
 531}
 532
 533sub git_ls_tree {
 534        my ($rev, $filename) = @_;
 535
 536        my $lstree = $repo->command_output_pipe('ls-tree', $rev, $filename);
 537        my ($mode, $type, $blob, $tfilename);
 538        while(<$lstree>) {
 539                chomp;
 540                ($mode, $type, $blob, $tfilename) = split(/\s+/, $_, 4);
 541                last if ($tfilename eq $filename);
 542        }
 543        $repo->command_close_pipe($lstree);
 544
 545        return $blob if ($tfilename eq $filename);
 546        die "git-ls-tree failed to find blob for $filename";
 547}
 548
 549
 550
 551sub claim_line {
 552        my ($floffset, $rev, $lines, %revinfo) = @_;
 553        my $oline = get_line($lines, $floffset);
 554        @{$lines->[$floffset]} = ( $oline, $rev,
 555                $revinfo{'author'}, $revinfo{'author_date'} );
 556        #printf("Claiming line %d with rev %s: '%s'\n",
 557        #               $floffset, $rev, $oline) if 1;
 558}
 559
 560sub git_commit_info {
 561        my ($rev) = @_;
 562        my $commit = $repo->get_object('commit', $rev);
 563
 564        my %info;
 565        while ($commit =~ /(.*?)\n/g) {
 566                my $line = $1;
 567                if ($line =~ s/^author //) {
 568                        @info{'author', 'author_email', 'author_date'} = $repo->ident($line);
 569                } elsif ($line =~ s/^committer//) {
 570                        @info{'committer', 'committer_email', 'committer_date'} = $repo->ident($line);
 571                }
 572        }
 573
 574        return %info;
 575}
 576
 577sub format_date {
 578        if ($rawtime) {
 579                return $_[0];
 580        }
 581        my ($timestamp, $timezone) = split(' ', $_[0]);
 582        my $minutes = abs($timezone);
 583        $minutes = int($minutes / 100) * 60 + ($minutes % 100);
 584        if ($timezone < 0) {
 585            $minutes = -$minutes;
 586        }
 587        my $t = $timestamp + $minutes * 60;
 588        return strftime("%Y-%m-%d %H:%M:%S " . $timezone, gmtime($t));
 589}