1#!/usr/bin/perl
2# Copyright 2006, Ryan Anderson <ryan@michonline.com>
3#
4# GPL v2 (See COPYING)
5#
6# This file is licensed under the GPL v2, or a later version
7# at the discretion of Linus Torvalds.
8
9use warnings;
10use strict;
11use Getopt::Long;
12use POSIX qw(strftime gmtime);
13use File::Basename qw(basename dirname);
14use Git;
15
16sub usage() {
17 print STDERR "Usage: ${\basename $0} [-s] [-S revs-file] file [ revision ]
18 -l, --long
19 Show long rev (Defaults off)
20 -t, --time
21 Show raw timestamp (Defaults off)
22 -r, --rename
23 Follow renames (Defaults on).
24 -S, --rev-file revs-file
25 Use revs from revs-file instead of calling git-rev-list
26 -h, --help
27 This message.
28";
29
30 exit(1);
31}
32
33our ($help, $longrev, $rename, $rawtime, $starting_rev, $rev_file, $repo) = (0, 0, 1);
34
35my $rc = GetOptions( "long|l" => \$longrev,
36 "time|t" => \$rawtime,
37 "help|h" => \$help,
38 "rename|r" => \$rename,
39 "rev-file|S=s" => \$rev_file);
40if (!$rc or $help or !@ARGV) {
41 usage();
42}
43
44my $filename = shift @ARGV;
45if (@ARGV) {
46 $starting_rev = shift @ARGV;
47}
48
49my @stack = (
50 {
51 'rev' => defined $starting_rev ? $starting_rev : "HEAD",
52 'filename' => $filename,
53 },
54);
55
56$repo = Git->repository();
57
58our @filelines = ();
59
60if (defined $starting_rev) {
61 @filelines = git_cat_file($starting_rev, $filename);
62} else {
63 open(F,"<",$filename)
64 or die "Failed to open filename: $!";
65
66 while(<F>) {
67 chomp;
68 push @filelines, $_;
69 }
70 close(F);
71
72}
73
74our %revs;
75our @revqueue;
76our $head;
77
78my $revsprocessed = 0;
79while (my $bound = pop @stack) {
80 my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
81 foreach my $revinst (@revisions) {
82 my ($rev, @parents) = @$revinst;
83 $head ||= $rev;
84
85 if (!defined($rev)) {
86 $rev = "";
87 }
88 $revs{$rev}{'filename'} = $bound->{'filename'};
89 if (scalar @parents > 0) {
90 $revs{$rev}{'parents'} = \@parents;
91 next;
92 }
93
94 if (!$rename) {
95 next;
96 }
97
98 my $newbound = find_parent_renames($rev, $bound->{'filename'});
99 if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
100 push @stack, $newbound;
101 $revs{$rev}{'parents'} = [$newbound->{'rev'}];
102 }
103 }
104}
105push @revqueue, $head;
106init_claim( defined $starting_rev ? $head : 'dirty');
107unless (defined $starting_rev) {
108 my %ident;
109 @ident{'author', 'author_email', 'author_date'} = $repo->ident('author');
110 my $diff = $repo->command_output_pipe('diff', '-R', 'HEAD', '--', $filename);
111 _git_diff_parse($diff, $head, "dirty", %ident);
112 $repo->command_close_pipe($diff);
113}
114handle_rev();
115
116
117my $i = 0;
118foreach my $l (@filelines) {
119 my ($output, $rev, $committer, $date);
120 if (ref $l eq 'ARRAY') {
121 ($output, $rev, $committer, $date) = @$l;
122 if (!$longrev && length($rev) > 8) {
123 $rev = substr($rev,0,8);
124 }
125 } else {
126 $output = $l;
127 ($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
128 }
129
130 printf("%s\t(%10s\t%10s\t%d)%s\n", $rev, $committer,
131 format_date($date), ++$i, $output);
132}
133
134sub init_claim {
135 my ($rev) = @_;
136 for (my $i = 0; $i < @filelines; $i++) {
137 $filelines[$i] = [ $filelines[$i], '', '', '', 1];
138 # line,
139 # rev,
140 # author,
141 # date,
142 # 1 <-- belongs to the original file.
143 }
144 $revs{$rev}{'lines'} = \@filelines;
145}
146
147
148sub handle_rev {
149 my $i = 0;
150 my %seen;
151 while (my $rev = shift @revqueue) {
152 next if $seen{$rev}++;
153
154 my %revinfo = git_commit_info($rev);
155
156 foreach my $p (@{$revs{$rev}{'parents'}}) {
157
158 git_diff_parse($p, $rev, %revinfo);
159 push @revqueue, $p;
160 }
161
162
163 if (scalar @{$revs{$rev}{parents}} == 0) {
164 # We must be at the initial rev here, so claim everything that is left.
165 for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
166 if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
167 claim_line($i, $rev, $revs{$rev}{lines}, %revinfo);
168 }
169 }
170 }
171 }
172}
173
174
175sub git_rev_list {
176 my ($rev, $file) = @_;
177
178 my $revlist;
179 if ($rev_file) {
180 open($revlist, '<' . $rev_file)
181 or die "Failed to open $rev_file : $!";
182 } else {
183 $revlist = $repo->command_output_pipe('rev-list', '--parents', '--remove-empty', $rev, '--', $file);
184 }
185
186 my @revs;
187 while(my $line = <$revlist>) {
188 chomp $line;
189 my ($rev, @parents) = split /\s+/, $line;
190 push @revs, [ $rev, @parents ];
191 }
192 $repo->command_close_pipe($revlist);
193
194 printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
195 return @revs;
196}
197
198sub find_parent_renames {
199 my ($rev, $file) = @_;
200
201 my $patch = $repo->command_output_pipe('diff-tree', '-M50', '-r', '--name-status', '-z', $rev);
202
203 local $/ = "\0";
204 my %bound;
205 my $junk = <$patch>;
206 while (my $change = <$patch>) {
207 chomp $change;
208 my $filename = <$patch>;
209 if (!defined $filename) {
210 next;
211 }
212 chomp $filename;
213
214 if ($change =~ m/^[AMD]$/ ) {
215 next;
216 } elsif ($change =~ m/^R/ ) {
217 my $oldfilename = $filename;
218 $filename = <$patch>;
219 chomp $filename;
220 if ( $file eq $filename ) {
221 my $parent = git_find_parent($rev, $oldfilename);
222 @bound{'rev','filename'} = ($parent, $oldfilename);
223 last;
224 }
225 }
226 }
227 $repo->command_close_pipe($patch);
228
229 return \%bound;
230}
231
232
233sub git_find_parent {
234 my ($rev, $filename) = @_;
235
236 my $parentline = $repo->command_oneline('rev-list', '--remove-empty',
237 '--parents', '--max-count=1', $rev, '--', $filename);
238 my ($revfound, $parent) = split m/\s+/, $parentline;
239
240 return $parent;
241}
242
243
244# Get a diff between the current revision and a parent.
245# Record the commit information that results.
246sub git_diff_parse {
247 my ($parent, $rev, %revinfo) = @_;
248
249 my $diff = $repo->command_output_pipe('diff-tree', '-M', '-p',
250 $rev, $parent, '--',
251 $revs{$rev}{'filename'}, $revs{$parent}{'filename'});
252
253 _git_diff_parse($diff, $parent, $rev, %revinfo);
254
255 $repo->command_close_pipe($diff);
256}
257
258sub _git_diff_parse {
259 my ($diff, $parent, $rev, %revinfo) = @_;
260
261 my ($ri, $pi) = (0,0);
262 my $slines = $revs{$rev}{'lines'};
263 my @plines;
264
265 my $gotheader = 0;
266 my ($remstart);
267 my ($hunk_start, $hunk_index);
268 while(<$diff>) {
269 chomp;
270 if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
271 $remstart = $1;
272 # Adjust for 0-based arrays
273 $remstart--;
274 # Reinit hunk tracking.
275 $hunk_start = $remstart;
276 $hunk_index = 0;
277 $gotheader = 1;
278
279 for (my $i = $ri; $i < $remstart; $i++) {
280 $plines[$pi++] = $slines->[$i];
281 $ri++;
282 }
283 next;
284 } elsif (!$gotheader) {
285 next;
286 }
287
288 if (m/^\+(.*)$/) {
289 my $line = $1;
290 $plines[$pi++] = [ $line, '', '', '', 0 ];
291 next;
292
293 } elsif (m/^-(.*)$/) {
294 my $line = $1;
295 if (get_line($slines, $ri) eq $line) {
296 # Found a match, claim
297 claim_line($ri, $rev, $slines, %revinfo);
298 } else {
299 die sprintf("Sync error: %d/%d\n|%s\n|%s\n%s => %s\n",
300 $ri, $hunk_start + $hunk_index,
301 $line,
302 get_line($slines, $ri),
303 $rev, $parent);
304 }
305 $ri++;
306
307 } elsif (m/^\\/) {
308 ;
309 # Skip \No newline at end of file.
310 # But this can be internationalized, so only look
311 # for an initial \
312
313 } else {
314 if (substr($_,1) ne get_line($slines,$ri) ) {
315 die sprintf("Line %d (%d) does not match:\n|%s\n|%s\n%s => %s\n",
316 $hunk_start + $hunk_index, $ri,
317 substr($_,1),
318 get_line($slines,$ri),
319 $rev, $parent);
320 }
321 $plines[$pi++] = $slines->[$ri++];
322 }
323 $hunk_index++;
324 }
325 for (my $i = $ri; $i < @{$slines} ; $i++) {
326 push @plines, $slines->[$ri++];
327 }
328
329 $revs{$parent}{lines} = \@plines;
330 return;
331}
332
333sub get_line {
334 my ($lines, $index) = @_;
335
336 return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index];
337}
338
339sub git_cat_file {
340 my ($rev, $filename) = @_;
341 return () unless defined $rev && defined $filename;
342
343 my $blob = git_ls_tree($rev, $filename);
344 die "Failed to find a blob for $filename in rev $rev\n" if !defined $blob;
345
346 my @lines = split(/\n/, $repo->get_object('blob', $blob));
347 pop @lines unless $lines[$#lines]; # Trailing newline
348 return @lines;
349}
350
351sub git_ls_tree {
352 my ($rev, $filename) = @_;
353
354 my $lstree = $repo->command_output_pipe('ls-tree', $rev, $filename);
355 my ($mode, $type, $blob, $tfilename);
356 while(<$lstree>) {
357 chomp;
358 ($mode, $type, $blob, $tfilename) = split(/\s+/, $_, 4);
359 last if ($tfilename eq $filename);
360 }
361 $repo->command_close_pipe($lstree);
362
363 return $blob if ($tfilename eq $filename);
364 die "git-ls-tree failed to find blob for $filename";
365}
366
367
368
369sub claim_line {
370 my ($floffset, $rev, $lines, %revinfo) = @_;
371 my $oline = get_line($lines, $floffset);
372 @{$lines->[$floffset]} = ( $oline, $rev,
373 $revinfo{'author'}, $revinfo{'author_date'} );
374 #printf("Claiming line %d with rev %s: '%s'\n",
375 # $floffset, $rev, $oline) if 1;
376}
377
378sub git_commit_info {
379 my ($rev) = @_;
380 my $commit = $repo->get_object('commit', $rev);
381
382 my %info;
383 while ($commit =~ /(.*?)\n/g) {
384 my $line = $1;
385 if ($line =~ s/^author //) {
386 @info{'author', 'author_email', 'author_date'} = $repo->ident($line);
387 } elsif ($line =~ s/^committer//) {
388 @info{'committer', 'committer_email', 'committer_date'} = $repo->ident($line);
389 }
390 }
391
392 return %info;
393}
394
395sub format_date {
396 if ($rawtime) {
397 return $_[0];
398 }
399 my ($timestamp, $timezone) = split(' ', $_[0]);
400 my $minutes = abs($timezone);
401 $minutes = int($minutes / 100) * 60 + ($minutes % 100);
402 if ($timezone < 0) {
403 $minutes = -$minutes;
404 }
405 my $t = $timestamp + $minutes * 60;
406 return strftime("%Y-%m-%d %H:%M:%S " . $timezone, gmtime($t));
407}