grep --all-match
[gitweb.git] / git-annotate.perl
index 8f984318af406fd725097052b4a4f01b9141c1e0..215ed26f3aff4b12139359ca841a9a80c567a6e6 100755 (executable)
@@ -8,28 +8,66 @@
 
 use warnings;
 use strict;
+use Getopt::Long;
+use POSIX qw(strftime gmtime);
+use File::Basename qw(basename dirname);
+
+sub usage() {
+       print STDERR "Usage: ${\basename $0} [-s] [-S revs-file] file [ revision ]
+       -l, --long
+                       Show long rev (Defaults off)
+       -t, --time
+                       Show raw timestamp (Defaults off)
+       -r, --rename
+                       Follow renames (Defaults on).
+       -S, --rev-file revs-file
+                       Use revs from revs-file instead of calling git-rev-list
+       -h, --help
+                       This message.
+";
+
+       exit(1);
+}
 
-my $filename = shift @ARGV;
+our ($help, $longrev, $rename, $rawtime, $starting_rev, $rev_file) = (0, 0, 1);
 
+my $rc = GetOptions(   "long|l" => \$longrev,
+                       "time|t" => \$rawtime,
+                       "help|h" => \$help,
+                       "rename|r" => \$rename,
+                       "rev-file|S=s" => \$rev_file);
+if (!$rc or $help or !@ARGV) {
+       usage();
+}
+
+my $filename = shift @ARGV;
+if (@ARGV) {
+       $starting_rev = shift @ARGV;
+}
 
 my @stack = (
        {
-               'rev' => "HEAD",
+               'rev' => defined $starting_rev ? $starting_rev : "HEAD",
                'filename' => $filename,
        },
 );
 
-our (@lineoffsets, @pendinglineoffsets);
 our @filelines = ();
-open(F,"<",$filename)
-       or die "Failed to open filename: $!";
 
-while(<F>) {
-       chomp;
-       push @filelines, $_;
+if (defined $starting_rev) {
+       @filelines = git_cat_file($starting_rev, $filename);
+} else {
+       open(F,"<",$filename)
+               or die "Failed to open filename: $!";
+
+       while(<F>) {
+               chomp;
+               push @filelines, $_;
+       }
+       close(F);
+
 }
-close(F);
-our $leftover_lines = @filelines;
+
 our %revs;
 our @revqueue;
 our $head;
                my ($rev, @parents) = @$revinst;
                $head ||= $rev;
 
+               if (!defined($rev)) {
+                       $rev = "";
+               }
                $revs{$rev}{'filename'} = $bound->{'filename'};
                if (scalar @parents > 0) {
                        $revs{$rev}{'parents'} = \@parents;
                        next;
                }
 
+               if (!$rename) {
+                       next;
+               }
+
                my $newbound = find_parent_renames($rev, $bound->{'filename'});
                if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
                        push @stack, $newbound;
        }
 }
 push @revqueue, $head;
-init_claim($head);
-$revs{$head}{'lineoffsets'} = {};
+init_claim( defined $starting_rev ? $head : 'dirty');
+unless (defined $starting_rev) {
+       my $diff = open_pipe("git","diff","HEAD", "--",$filename)
+               or die "Failed to call git diff to check for dirty state: $!";
+
+       _git_diff_parse($diff, [$head], "dirty", (
+                               'author' => gitvar_name("GIT_AUTHOR_IDENT"),
+                               'author_date' => sprintf("%s +0000",time()),
+                               )
+                       );
+       close($diff);
+}
 handle_rev();
 
 
        my ($output, $rev, $committer, $date);
        if (ref $l eq 'ARRAY') {
                ($output, $rev, $committer, $date) = @$l;
-               if (length($rev) > 8) {
+               if (!$longrev && length($rev) > 8) {
                        $rev = substr($rev,0,8);
                }
        } else {
                ($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
        }
 
-       printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output);
+       printf("%s\t(%10s\t%10s\t%d)%s\n", $rev, $committer,
+               format_date($date), ++$i, $output);
 }
 
 sub init_claim {
        my ($rev) = @_;
-       my %revinfo = git_commit_info($rev);
        for (my $i = 0; $i < @filelines; $i++) {
                $filelines[$i] = [ $filelines[$i], '', '', '', 1];
                        # line,
@@ -92,19 +147,20 @@ sub init_claim {
 
 
 sub handle_rev {
-       my $i = 0;
+       my $revseen = 0;
+       my %seen;
        while (my $rev = shift @revqueue) {
+               next if $seen{$rev}++;
 
                my %revinfo = git_commit_info($rev);
 
-               foreach my $p (@{$revs{$rev}{'parents'}}) {
-
-                       git_diff_parse($p, $rev, %revinfo);
-                       push @revqueue, $p;
-               }
+               if (exists $revs{$rev}{parents} &&
+                   scalar @{$revs{$rev}{parents}} != 0) {
 
+                       git_diff_parse($revs{$rev}{'parents'}, $rev, %revinfo);
+                       push @revqueue, @{$revs{$rev}{'parents'}};
 
-               if (scalar @{$revs{$rev}{parents}} == 0) {
+               } else {
                        # We must be at the initial rev here, so claim everything that is left.
                        for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
                                if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
@@ -119,16 +175,22 @@ sub handle_rev {
 sub git_rev_list {
        my ($rev, $file) = @_;
 
-       open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
-               or die "Failed to exec git-rev-list: $!";
+       my $revlist;
+       if ($rev_file) {
+               open($revlist, '<' . $rev_file)
+                   or die "Failed to open $rev_file : $!";
+       } else {
+               $revlist = open_pipe("git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+                       or die "Failed to exec git-rev-list: $!";
+       }
 
        my @revs;
-       while(my $line = <P>) {
+       while(my $line = <$revlist>) {
                chomp $line;
                my ($rev, @parents) = split /\s+/, $line;
                push @revs, [ $rev, @parents ];
        }
-       close(P);
+       close($revlist);
 
        printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
        return @revs;
@@ -137,22 +199,25 @@ sub git_rev_list {
 sub find_parent_renames {
        my ($rev, $file) = @_;
 
-       open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
+       my $patch = open_pipe("git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
                or die "Failed to exec git-diff: $!";
 
        local $/ = "\0";
        my %bound;
-       my $junk = <P>;
-       while (my $change = <P>) {
+       my $junk = <$patch>;
+       while (my $change = <$patch>) {
                chomp $change;
-               my $filename = <P>;
+               my $filename = <$patch>;
+               if (!defined $filename) {
+                       next;
+               }
                chomp $filename;
 
                if ($change =~ m/^[AMD]$/ ) {
                        next;
                } elsif ($change =~ m/^R/ ) {
                        my $oldfilename = $filename;
-                       $filename = <P>;
+                       $filename = <$patch>;
                        chomp $filename;
                        if ( $file eq $filename ) {
                                my $parent = git_find_parent($rev, $oldfilename);
@@ -161,7 +226,7 @@ sub find_parent_renames {
                        }
                }
        }
-       close(P);
+       close($patch);
 
        return \%bound;
 }
@@ -170,93 +235,302 @@ sub find_parent_renames {
 sub git_find_parent {
        my ($rev, $filename) = @_;
 
-       open(REVPARENT,"-|","git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
+       my $revparent = open_pipe("git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
                or die "Failed to open git-rev-list to find a single parent: $!";
 
-       my $parentline = <REVPARENT>;
+       my $parentline = <$revparent>;
        chomp $parentline;
        my ($revfound,$parent) = split m/\s+/, $parentline;
 
-       close(REVPARENT);
+       close($revparent);
 
        return $parent;
 }
 
+sub git_find_all_parents {
+       my ($rev) = @_;
+
+       my $revparent = open_pipe("git-rev-list","--remove-empty", "--parents","--max-count=1","$rev")
+               or die "Failed to open git-rev-list to find a single parent: $!";
+
+       my $parentline = <$revparent>;
+       chomp $parentline;
+       my ($origrev, @parents) = split m/\s+/, $parentline;
+
+       close($revparent);
+
+       return @parents;
+}
+
+sub git_merge_base {
+       my ($rev1, $rev2) = @_;
+
+       my $mb = open_pipe("git-merge-base", $rev1, $rev2)
+               or die "Failed to open git-merge-base: $!";
+
+       my $base = <$mb>;
+       chomp $base;
+
+       close($mb);
+
+       return $base;
+}
+
+# Construct a set of pseudo parents that are in the same order,
+# and the same quantity as the real parents,
+# but whose SHA1s are as similar to the logical parents
+# as possible.
+sub get_pseudo_parents {
+       my ($all, $fake) = @_;
+
+       my @all = @$all;
+       my @fake = @$fake;
+
+       my @pseudo;
+
+       my %fake = map {$_ => 1} @fake;
+       my %seenfake;
+
+       my $fakeidx = 0;
+       foreach my $p (@all) {
+               if (exists $fake{$p}) {
+                       if ($fake[$fakeidx] ne $p) {
+                               die sprintf("parent mismatch: %s != %s\nall:%s\nfake:%s\n",
+                                           $fake[$fakeidx], $p,
+                                           join(", ", @all),
+                                           join(", ", @fake),
+                                          );
+                       }
+
+                       push @pseudo, $p;
+                       $fakeidx++;
+                       $seenfake{$p}++;
+
+               } else {
+                       my $base = git_merge_base($fake[$fakeidx], $p);
+                       if ($base ne $fake[$fakeidx]) {
+                               die sprintf("Result of merge-base doesn't match fake: %s,%s != %s\n",
+                                      $fake[$fakeidx], $p, $base);
+                       }
+
+                       # The details of how we parse the diffs
+                       # mean that we cannot have a duplicate
+                       # revision in the list, so if we've already
+                       # seen the revision we would normally add, just use
+                       # the actual revision.
+                       if ($seenfake{$base}) {
+                               push @pseudo, $p;
+                       } else {
+                               push @pseudo, $base;
+                               $seenfake{$base}++;
+                       }
+               }
+       }
+
+       return @pseudo;
+}
+
 
 # Get a diff between the current revision and a parent.
 # Record the commit information that results.
 sub git_diff_parse {
-       my ($parent, $rev, %revinfo) = @_;
+       my ($parents, $rev, %revinfo) = @_;
+
+       my @pseudo_parents;
+       my @command = ("git-diff-tree");
+       my $revision_spec;
+
+       if (scalar @$parents == 1) {
+
+               $revision_spec = join("..", $parents->[0], $rev);
+               @pseudo_parents = @$parents;
+       } else {
+               my @all_parents = git_find_all_parents($rev);
+
+               if (@all_parents !=  @$parents) {
+                       @pseudo_parents = get_pseudo_parents(\@all_parents, $parents);
+               } else {
+                       @pseudo_parents = @$parents;
+               }
 
-       my ($ri, $pi) = (0,0);
-       open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--",
-                       $revs{$rev}{'filename'}, $revs{$parent}{'filename'})
+               $revision_spec = $rev;
+               push @command, "-c";
+       }
+
+       my @filenames = ( $revs{$rev}{'filename'} );
+
+       foreach my $parent (@$parents) {
+               push @filenames, $revs{$parent}{'filename'};
+       }
+
+       push @command, "-p", "-M", $revision_spec, "--", @filenames;
+
+
+       my $diff = open_pipe( @command )
                or die "Failed to call git-diff for annotation: $!";
 
+       _git_diff_parse($diff, \@pseudo_parents, $rev, %revinfo);
+
+       close($diff);
+}
+
+sub _git_diff_parse {
+       my ($diff, $parents, $rev, %revinfo) = @_;
+
+       my $ri = 0;
+
        my $slines = $revs{$rev}{'lines'};
-       my @plines;
+       my (%plines, %pi);
 
        my $gotheader = 0;
-       my ($remstart, $remlength, $addstart, $addlength);
-       my ($hunk_start, $hunk_index, $hunk_adds);
-       while(<DIFF>) {
+       my ($remstart);
+       my $parent_count = @$parents;
+
+       my $diff_header_regexp = "^@";
+       $diff_header_regexp .= "@" x @$parents;
+       $diff_header_regexp .= ' -\d+,\d+' x @$parents;
+       $diff_header_regexp .= ' \+(\d+),\d+';
+       $diff_header_regexp .= " " . ("@" x @$parents);
+
+       my %claim_regexps;
+       my $allparentplus = '^' . '\\+' x @$parents . '(.*)$';
+
+       {
+               my $i = 0;
+               foreach my $parent (@$parents) {
+
+                       $pi{$parent} = 0;
+                       my $r = '^' . '.' x @$parents . '(.*)$';
+                       my $p = $r;
+                       substr($p,$i+1, 1) = '\\+';
+
+                       my $m = $r;
+                       substr($m,$i+1, 1) = '-';
+
+                       $claim_regexps{$parent}{plus} = $p;
+                       $claim_regexps{$parent}{minus} = $m;
+
+                       $plines{$parent} = [];
+
+                       $i++;
+               }
+       }
+
+       DIFF:
+       while(<$diff>) {
                chomp;
-               if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
-                       ($remstart, $remlength, $addstart, $addlength) = ($1, $2, $3, $4);
-                       # Adjust for 0-based arrays
-                       $remstart--;
-                       $addstart--;
-                       # Reinit hunk tracking.
-                       $hunk_start = $remstart;
-                       $hunk_index = 0;
+               #printf("%d:%s:\n", $gotheader, $_);
+               if (m/$diff_header_regexp/) {
+                       $remstart = $1 - 1;
+                       # (0-based arrays)
+
                        $gotheader = 1;
 
-                       for (my $i = $ri; $i < $remstart; $i++) {
-                               $plines[$pi++] = $slines->[$i];
-                               $ri++;
+                       foreach my $parent (@$parents) {
+                               for (my $i = $ri; $i < $remstart; $i++) {
+                                       $plines{$parent}[$pi{$parent}++] = $slines->[$i];
+                               }
                        }
-                       next;
+                       $ri = $remstart;
+
+                       next DIFF;
+
                } elsif (!$gotheader) {
-                       next;
+                       # Skip over the leadin.
+                       next DIFF;
                }
 
-               if (m/^\+(.*)$/) {
-                       my $line = $1;
-                       $plines[$pi++] = [ $line, '', '', '', 0 ];
-                       next;
+               if (m/^\\/) {
+                       ;
+                       # Skip \No newline at end of file.
+                       # But this can be internationalized, so only look
+                       # for an initial \
+
+               } else {
+                       my %claims = ();
+                       my $negclaim = 0;
+                       my $allclaimed = 0;
+                       my $line;
 
-               } elsif (m/^-(.*)$/) {
-                       my $line = $1;
-                       if (get_line($slines, $ri) eq $line) {
-                               # Found a match, claim
+                       if (m/$allparentplus/) {
                                claim_line($ri, $rev, $slines, %revinfo);
-                       } else {
-                               die sprintf("Sync error: %d/%d\n|%s\n|%s\n%s => %s\n",
-                                               $ri, $hunk_start + $hunk_index,
-                                               $line,
-                                               get_line($slines, $ri),
-                                               $rev, $parent);
+                               $allclaimed = 1;
+
                        }
-                       $ri++;
 
-               } else {
-                       if (substr($_,1) ne get_line($slines,$ri) ) {
-                               die sprintf("Line %d (%d) does not match:\n|%s\n|%s\n%s => %s\n",
-                                               $hunk_start + $hunk_index, $ri,
-                                               substr($_,1),
-                                               get_line($slines,$ri),
-                                               $rev, $parent);
+                       PARENT:
+                       foreach my $parent (keys %claim_regexps) {
+                               my $m = $claim_regexps{$parent}{minus};
+                               my $p = $claim_regexps{$parent}{plus};
+
+                               if (m/$m/) {
+                                       $line = $1;
+                                       $plines{$parent}[$pi{$parent}++] = [ $line, '', '', '', 0 ];
+                                       $negclaim++;
+
+                               } elsif (m/$p/) {
+                                       $line = $1;
+                                       if (get_line($slines, $ri) eq $line) {
+                                               # Found a match, claim
+                                               $claims{$parent}++;
+
+                                       } else {
+                                               die sprintf("Sync error: %d\n|%s\n|%s\n%s => %s\n",
+                                                               $ri, $line,
+                                                               get_line($slines, $ri),
+                                                               $rev, $parent);
+                                       }
+                               }
+                       }
+
+                       if (%claims) {
+                               foreach my $parent (@$parents) {
+                                       next if $claims{$parent} || $allclaimed;
+                                       $plines{$parent}[$pi{$parent}++] = $slines->[$ri];
+                                           #[ $line, '', '', '', 0 ];
+                               }
+                               $ri++;
+
+                       } elsif ($negclaim) {
+                               next DIFF;
+
+                       } else {
+                               if (substr($_,scalar @$parents) ne get_line($slines,$ri) ) {
+                                       foreach my $parent (@$parents) {
+                                               printf("parent %s is on line %d\n", $parent, $pi{$parent});
+                                       }
+
+                                       my @context;
+                                       for (my $i = -2; $i < 2; $i++) {
+                                               push @context, get_line($slines, $ri + $i);
+                                       }
+                                       my $context = join("\n", @context);
+
+                                       my $justline = substr($_, scalar @$parents);
+                                       die sprintf("Line %d, does not match:\n|%s|\n|%s|\n%s\n",
+                                                   $ri,
+                                                   $justline,
+                                                   $context);
+                               }
+                               foreach my $parent (@$parents) {
+                                       $plines{$parent}[$pi{$parent}++] = $slines->[$ri];
+                               }
+                               $ri++;
                        }
-                       $plines[$pi++] = $slines->[$ri++];
                }
-               $hunk_index++;
        }
-       close(DIFF);
+
        for (my $i = $ri; $i < @{$slines} ; $i++) {
-               push @plines, $slines->[$ri++];
+               foreach my $parent (@$parents) {
+                       push @{$plines{$parent}}, $slines->[$ri];
+               }
+               $ri++;
+       }
+
+       foreach my $parent (@$parents) {
+               $revs{$parent}{lines} = $plines{$parent};
        }
 
-       $revs{$parent}{lines} = \@plines;
        return;
 }
 
@@ -267,24 +541,45 @@ sub get_line {
 }
 
 sub git_cat_file {
-       my ($parent, $filename) = @_;
-       return () unless defined $parent && defined $filename;
-       my $blobline = `git-ls-tree $parent $filename`;
-       my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4);
+       my ($rev, $filename) = @_;
+       return () unless defined $rev && defined $filename;
 
-       open(C,"-|","git-cat-file", "blob", $blob)
-               or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!;
+       my $blob = git_ls_tree($rev, $filename);
+       die "Failed to find a blob for $filename in rev $rev\n" if !defined $blob;
+
+       my $catfile = open_pipe("git","cat-file", "blob", $blob)
+               or die "Failed to git-cat-file blob $blob (rev $rev, file $filename): " . $!;
 
        my @lines;
-       while(<C>) {
+       while(<$catfile>) {
                chomp;
                push @lines, $_;
        }
-       close(C);
+       close($catfile);
 
        return @lines;
 }
 
+sub git_ls_tree {
+       my ($rev, $filename) = @_;
+
+       my $lstree = open_pipe("git","ls-tree",$rev,$filename)
+               or die "Failed to call git ls-tree: $!";
+
+       my ($mode, $type, $blob, $tfilename);
+       while(<$lstree>) {
+               chomp;
+               ($mode, $type, $blob, $tfilename) = split(/\s+/, $_, 4);
+               last if ($tfilename eq $filename);
+       }
+       close($lstree);
+
+       return $blob if ($tfilename eq $filename);
+       die "git-ls-tree failed to find blob for $filename";
+
+}
+
+
 
 sub claim_line {
        my ($floffset, $rev, $lines, %revinfo) = @_;
@@ -297,11 +592,11 @@ sub claim_line {
 
 sub git_commit_info {
        my ($rev) = @_;
-       open(COMMIT, "-|","git-cat-file", "commit", $rev)
+       my $commit = open_pipe("git-cat-file", "commit", $rev)
                or die "Failed to call git-cat-file: $!";
 
        my %info;
-       while(<COMMIT>) {
+       while(<$commit>) {
                chomp;
                last if (length $_ == 0);
 
@@ -315,7 +610,99 @@ sub git_commit_info {
                        $info{'committer_date'} = $3;
                }
        }
-       close(COMMIT);
+       close($commit);
 
        return %info;
 }
+
+sub format_date {
+       if ($rawtime) {
+               return $_[0];
+       }
+       my ($timestamp, $timezone) = split(' ', $_[0]);
+       my $minutes = abs($timezone);
+       $minutes = int($minutes / 100) * 60 + ($minutes % 100);
+       if ($timezone < 0) {
+           $minutes = -$minutes;
+       }
+       my $t = $timestamp + $minutes * 60;
+       return strftime("%Y-%m-%d %H:%M:%S " . $timezone, gmtime($t));
+}
+
+# Copied from git-send-email.perl - We need a Git.pm module..
+sub gitvar {
+    my ($var) = @_;
+    my $fh;
+    my $pid = open($fh, '-|');
+    die "$!" unless defined $pid;
+    if (!$pid) {
+       exec('git-var', $var) or die "$!";
+    }
+    my ($val) = <$fh>;
+    close $fh or die "$!";
+    chomp($val);
+    return $val;
+}
+
+sub gitvar_name {
+    my ($name) = @_;
+    my $val = gitvar($name);
+    my @field = split(/\s+/, $val);
+    return join(' ', @field[0...(@field-4)]);
+}
+
+sub open_pipe {
+       if ($^O eq '##INSERT_ACTIVESTATE_STRING_HERE##') {
+               return open_pipe_activestate(@_);
+       } else {
+               return open_pipe_normal(@_);
+       }
+}
+
+sub open_pipe_activestate {
+       tie *fh, "Git::ActiveStatePipe", @_;
+       return *fh;
+}
+
+sub open_pipe_normal {
+       my (@execlist) = @_;
+
+       my $pid = open my $kid, "-|";
+       defined $pid or die "Cannot fork: $!";
+
+       unless ($pid) {
+               exec @execlist;
+               die "Cannot exec @execlist: $!";
+       }
+
+       return $kid;
+}
+
+package Git::ActiveStatePipe;
+use strict;
+
+sub TIEHANDLE {
+       my ($class, @params) = @_;
+       my $cmdline = join " ", @params;
+       my  @data = qx{$cmdline};
+       bless { i => 0, data => \@data }, $class;
+}
+
+sub READLINE {
+       my $self = shift;
+       if ($self->{i} >= scalar @{$self->{data}}) {
+               return undef;
+       }
+       return $self->{'data'}->[ $self->{i}++ ];
+}
+
+sub CLOSE {
+       my $self = shift;
+       delete $self->{data};
+       delete $self->{i};
+}
+
+sub EOF {
+       my $self = shift;
+       return ($self->{i} >= scalar @{$self->{data}});
+}