1package Git::SVN::Fetcher;
2use vars qw/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
3 @deleted_gpath %added_placeholder $repo_id/;
4use strict;
5use warnings;
6use SVN::Delta;
7use Carp qw/croak/;
8use File::Basename qw/dirname/;
9use IO::File qw//;
10use Git qw/command command_oneline command_noisy command_output_pipe
11 command_input_pipe command_close_pipe
12 command_bidi_pipe command_close_bidi_pipe/;
13BEGIN {
14 @ISA = qw(SVN::Delta::Editor);
15}
16
17# file baton members: path, mode_a, mode_b, pool, fh, blob, base
18sub new {
19 my ($class, $git_svn, $switch_path) = @_;
20 my $self = SVN::Delta::Editor->new;
21 bless $self, $class;
22 if (exists $git_svn->{last_commit}) {
23 $self->{c} = $git_svn->{last_commit};
24 $self->{empty_symlinks} =
25 _mark_empty_symlinks($git_svn, $switch_path);
26 }
27
28 # some options are read globally, but can be overridden locally
29 # per [svn-remote "..."] section. Command-line options will *NOT*
30 # override options set in an [svn-remote "..."] section
31 $repo_id = $git_svn->{repo_id};
32 my $k = "svn-remote.$repo_id.ignore-paths";
33 my $v = eval { command_oneline('config', '--get', $k) };
34 $self->{ignore_regex} = $v;
35
36 $k = "svn-remote.$repo_id.preserve-empty-dirs";
37 $v = eval { command_oneline('config', '--get', '--bool', $k) };
38 if ($v && $v eq 'true') {
39 $_preserve_empty_dirs = 1;
40 $k = "svn-remote.$repo_id.placeholder-filename";
41 $v = eval { command_oneline('config', '--get', $k) };
42 $_placeholder_filename = $v;
43 }
44
45 # Load the list of placeholder files added during previous invocations.
46 $k = "svn-remote.$repo_id.added-placeholder";
47 $v = eval { command_oneline('config', '--get-all', $k) };
48 if ($_preserve_empty_dirs && $v) {
49 # command() prints errors to stderr, so we only call it if
50 # command_oneline() succeeded.
51 my @v = command('config', '--get-all', $k);
52 $added_placeholder{ dirname($_) } = $_ foreach @v;
53 }
54
55 $self->{empty} = {};
56 $self->{dir_prop} = {};
57 $self->{file_prop} = {};
58 $self->{absent_dir} = {};
59 $self->{absent_file} = {};
60 $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
61 $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
62 $self;
63}
64
65# this uses the Ra object, so it must be called before do_{switch,update},
66# not inside them (when the Git::SVN::Fetcher object is passed) to
67# do_{switch,update}
68sub _mark_empty_symlinks {
69 my ($git_svn, $switch_path) = @_;
70 my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
71 return {} if (!defined($bool)) || (defined($bool) && ! $bool);
72
73 my %ret;
74 my ($rev, $cmt) = $git_svn->last_rev_commit;
75 return {} unless ($rev && $cmt);
76
77 # allow the warning to be printed for each revision we fetch to
78 # ensure the user sees it. The user can also disable the workaround
79 # on the repository even while git svn is running and the next
80 # revision fetched will skip this expensive function.
81 my $printed_warning;
82 chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
83 my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
84 local $/ = "\0";
85 my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path};
86 $pfx .= '/' if length($pfx);
87 while (<$ls>) {
88 chomp;
89 s/\A100644 blob $empty_blob\t//o or next;
90 unless ($printed_warning) {
91 print STDERR "Scanning for empty symlinks, ",
92 "this may take a while if you have ",
93 "many empty files\n",
94 "You may disable this with `",
95 "git config svn.brokenSymlinkWorkaround ",
96 "false'.\n",
97 "This may be done in a different ",
98 "terminal without restarting ",
99 "git svn\n";
100 $printed_warning = 1;
101 }
102 my $path = $_;
103 my (undef, $props) =
104 $git_svn->ra->get_file($pfx.$path, $rev, undef);
105 if ($props->{'svn:special'}) {
106 $ret{$path} = 1;
107 }
108 }
109 command_close_pipe($ls, $ctx);
110 \%ret;
111}
112
113# returns true if a given path is inside a ".git" directory
114sub in_dot_git {
115 $_[0] =~ m{(?:^|/)\.git(?:/|$)};
116}
117
118# return value: 0 -- don't ignore, 1 -- ignore
119sub is_path_ignored {
120 my ($self, $path) = @_;
121 return 1 if in_dot_git($path);
122 return 1 if defined($self->{ignore_regex}) &&
123 $path =~ m!$self->{ignore_regex}!;
124 return 0 unless defined($_ignore_regex);
125 return 1 if $path =~ m!$_ignore_regex!o;
126 return 0;
127}
128
129sub set_path_strip {
130 my ($self, $path) = @_;
131 $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
132}
133
134sub open_root {
135 { path => '' };
136}
137
138sub open_directory {
139 my ($self, $path, $pb, $rev) = @_;
140 { path => $path };
141}
142
143sub git_path {
144 my ($self, $path) = @_;
145 if (my $enc = $self->{pathnameencoding}) {
146 require Encode;
147 Encode::from_to($path, 'UTF-8', $enc);
148 }
149 if ($self->{path_strip}) {
150 $path =~ s!$self->{path_strip}!! or
151 die "Failed to strip path '$path' ($self->{path_strip})\n";
152 }
153 $path;
154}
155
156sub delete_entry {
157 my ($self, $path, $rev, $pb) = @_;
158 return undef if $self->is_path_ignored($path);
159
160 my $gpath = $self->git_path($path);
161 return undef if ($gpath eq '');
162
163 # remove entire directories.
164 my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
165 =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
166 if ($tree) {
167 my ($ls, $ctx) = command_output_pipe(qw/ls-tree
168 -r --name-only -z/,
169 $tree);
170 local $/ = "\0";
171 while (<$ls>) {
172 chomp;
173 my $rmpath = "$gpath/$_";
174 $self->{gii}->remove($rmpath);
175 print "\tD\t$rmpath\n" unless $::_q;
176 }
177 print "\tD\t$gpath/\n" unless $::_q;
178 command_close_pipe($ls, $ctx);
179 } else {
180 $self->{gii}->remove($gpath);
181 print "\tD\t$gpath\n" unless $::_q;
182 }
183 # Don't add to @deleted_gpath if we're deleting a placeholder file.
184 push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
185 $self->{empty}->{$path} = 0;
186 undef;
187}
188
189sub open_file {
190 my ($self, $path, $pb, $rev) = @_;
191 my ($mode, $blob);
192
193 goto out if $self->is_path_ignored($path);
194
195 my $gpath = $self->git_path($path);
196 ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
197 =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
198 unless (defined $mode && defined $blob) {
199 die "$path was not found in commit $self->{c} (r$rev)\n";
200 }
201 if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
202 $mode = '120000';
203 }
204out:
205 { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
206 pool => SVN::Pool->new, action => 'M' };
207}
208
209sub add_file {
210 my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
211 my $mode;
212
213 if (!$self->is_path_ignored($path)) {
214 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
215 delete $self->{empty}->{$dir};
216 $mode = '100644';
217
218 if ($added_placeholder{$dir}) {
219 # Remove our placeholder file, if we created one.
220 delete_entry($self, $added_placeholder{$dir})
221 unless $path eq $added_placeholder{$dir};
222 delete $added_placeholder{$dir}
223 }
224 }
225
226 { path => $path, mode_a => $mode, mode_b => $mode,
227 pool => SVN::Pool->new, action => 'A' };
228}
229
230sub add_directory {
231 my ($self, $path, $cp_path, $cp_rev) = @_;
232 goto out if $self->is_path_ignored($path);
233 my $gpath = $self->git_path($path);
234 if ($gpath eq '') {
235 my ($ls, $ctx) = command_output_pipe(qw/ls-tree
236 -r --name-only -z/,
237 $self->{c});
238 local $/ = "\0";
239 while (<$ls>) {
240 chomp;
241 $self->{gii}->remove($_);
242 print "\tD\t$_\n" unless $::_q;
243 push @deleted_gpath, $gpath;
244 }
245 command_close_pipe($ls, $ctx);
246 $self->{empty}->{$path} = 0;
247 }
248 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
249 delete $self->{empty}->{$dir};
250 $self->{empty}->{$path} = 1;
251
252 if ($added_placeholder{$dir}) {
253 # Remove our placeholder file, if we created one.
254 delete_entry($self, $added_placeholder{$dir});
255 delete $added_placeholder{$dir}
256 }
257
258out:
259 { path => $path };
260}
261
262sub change_dir_prop {
263 my ($self, $db, $prop, $value) = @_;
264 return undef if $self->is_path_ignored($db->{path});
265 $self->{dir_prop}->{$db->{path}} ||= {};
266 $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
267 undef;
268}
269
270sub absent_directory {
271 my ($self, $path, $pb) = @_;
272 return undef if $self->is_path_ignored($path);
273 $self->{absent_dir}->{$pb->{path}} ||= [];
274 push @{$self->{absent_dir}->{$pb->{path}}}, $path;
275 undef;
276}
277
278sub absent_file {
279 my ($self, $path, $pb) = @_;
280 return undef if $self->is_path_ignored($path);
281 $self->{absent_file}->{$pb->{path}} ||= [];
282 push @{$self->{absent_file}->{$pb->{path}}}, $path;
283 undef;
284}
285
286sub change_file_prop {
287 my ($self, $fb, $prop, $value) = @_;
288 return undef if $self->is_path_ignored($fb->{path});
289 if ($prop eq 'svn:executable') {
290 if ($fb->{mode_b} != 120000) {
291 $fb->{mode_b} = defined $value ? 100755 : 100644;
292 }
293 } elsif ($prop eq 'svn:special') {
294 $fb->{mode_b} = defined $value ? 120000 : 100644;
295 } else {
296 $self->{file_prop}->{$fb->{path}} ||= {};
297 $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
298 }
299 undef;
300}
301
302sub apply_textdelta {
303 my ($self, $fb, $exp) = @_;
304 return undef if $self->is_path_ignored($fb->{path});
305 my $fh = $::_repository->temp_acquire('svn_delta');
306 # $fh gets auto-closed() by SVN::TxDelta::apply(),
307 # (but $base does not,) so dup() it for reading in close_file
308 open my $dup, '<&', $fh or croak $!;
309 my $base = $::_repository->temp_acquire('git_blob');
310
311 if ($fb->{blob}) {
312 my ($base_is_link, $size);
313
314 if ($fb->{mode_a} eq '120000' &&
315 ! $self->{empty_symlinks}->{$fb->{path}}) {
316 print $base 'link ' or die "print $!\n";
317 $base_is_link = 1;
318 }
319 retry:
320 $size = $::_repository->cat_blob($fb->{blob}, $base);
321 die "Failed to read object $fb->{blob}" if ($size < 0);
322
323 if (defined $exp) {
324 seek $base, 0, 0 or croak $!;
325 my $got = ::md5sum($base);
326 if ($got ne $exp) {
327 my $err = "Checksum mismatch: ".
328 "$fb->{path} $fb->{blob}\n" .
329 "expected: $exp\n" .
330 " got: $got\n";
331 if ($base_is_link) {
332 warn $err,
333 "Retrying... (possibly ",
334 "a bad symlink from SVN)\n";
335 $::_repository->temp_reset($base);
336 $base_is_link = 0;
337 goto retry;
338 }
339 die $err;
340 }
341 }
342 }
343 seek $base, 0, 0 or croak $!;
344 $fb->{fh} = $fh;
345 $fb->{base} = $base;
346 [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
347}
348
349sub close_file {
350 my ($self, $fb, $exp) = @_;
351 return undef if $self->is_path_ignored($fb->{path});
352
353 my $hash;
354 my $path = $self->git_path($fb->{path});
355 if (my $fh = $fb->{fh}) {
356 if (defined $exp) {
357 seek($fh, 0, 0) or croak $!;
358 my $got = ::md5sum($fh);
359 if ($got ne $exp) {
360 die "Checksum mismatch: $path\n",
361 "expected: $exp\n got: $got\n";
362 }
363 }
364 if ($fb->{mode_b} == 120000) {
365 sysseek($fh, 0, 0) or croak $!;
366 my $rd = sysread($fh, my $buf, 5);
367
368 if (!defined $rd) {
369 croak "sysread: $!\n";
370 } elsif ($rd == 0) {
371 warn "$path has mode 120000",
372 " but it points to nothing\n",
373 "converting to an empty file with mode",
374 " 100644\n";
375 $fb->{mode_b} = '100644';
376 } elsif ($buf ne 'link ') {
377 warn "$path has mode 120000",
378 " but is not a link\n";
379 } else {
380 my $tmp_fh = $::_repository->temp_acquire(
381 'svn_hash');
382 my $res;
383 while ($res = sysread($fh, my $str, 1024)) {
384 my $out = syswrite($tmp_fh, $str, $res);
385 defined($out) && $out == $res
386 or croak("write ",
387 Git::temp_path($tmp_fh),
388 ": $!\n");
389 }
390 defined $res or croak $!;
391
392 ($fh, $tmp_fh) = ($tmp_fh, $fh);
393 Git::temp_release($tmp_fh, 1);
394 }
395 }
396
397 $hash = $::_repository->hash_and_insert_object(
398 Git::temp_path($fh));
399 $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
400
401 Git::temp_release($fb->{base}, 1);
402 Git::temp_release($fh, 1);
403 } else {
404 $hash = $fb->{blob} or die "no blob information\n";
405 }
406 $fb->{pool}->clear;
407 $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
408 print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
409 undef;
410}
411
412sub abort_edit {
413 my $self = shift;
414 $self->{nr} = $self->{gii}->{nr};
415 delete $self->{gii};
416 $self->SUPER::abort_edit(@_);
417}
418
419sub close_edit {
420 my $self = shift;
421
422 if ($_preserve_empty_dirs) {
423 my @empty_dirs;
424
425 # Any entry flagged as empty that also has an associated
426 # dir_prop represents a newly created empty directory.
427 foreach my $i (keys %{$self->{empty}}) {
428 push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
429 }
430
431 # Search for directories that have become empty due subsequent
432 # file deletes.
433 push @empty_dirs, $self->find_empty_directories();
434
435 # Finally, add a placeholder file to each empty directory.
436 $self->add_placeholder_file($_) foreach (@empty_dirs);
437
438 $self->stash_placeholder_list();
439 }
440
441 $self->{git_commit_ok} = 1;
442 $self->{nr} = $self->{gii}->{nr};
443 delete $self->{gii};
444 $self->SUPER::close_edit(@_);
445}
446
447sub find_empty_directories {
448 my ($self) = @_;
449 my @empty_dirs;
450 my %dirs = map { dirname($_) => 1 } @deleted_gpath;
451
452 foreach my $dir (sort keys %dirs) {
453 next if $dir eq ".";
454
455 # If there have been any additions to this directory, there is
456 # no reason to check if it is empty.
457 my $skip_added = 0;
458 foreach my $t (qw/dir_prop file_prop/) {
459 foreach my $path (keys %{ $self->{$t} }) {
460 if (exists $self->{$t}->{dirname($path)}) {
461 $skip_added = 1;
462 last;
463 }
464 }
465 last if $skip_added;
466 }
467 next if $skip_added;
468
469 # Use `git ls-tree` to get the filenames of this directory
470 # that existed prior to this particular commit.
471 my $ls = command('ls-tree', '-z', '--name-only',
472 $self->{c}, "$dir/");
473 my %files = map { $_ => 1 } split(/\0/, $ls);
474
475 # Remove the filenames that were deleted during this commit.
476 delete $files{$_} foreach (@deleted_gpath);
477
478 # Report the directory if there are no filenames left.
479 push @empty_dirs, $dir unless (scalar %files);
480 }
481 @empty_dirs;
482}
483
484sub add_placeholder_file {
485 my ($self, $dir) = @_;
486 my $path = "$dir/$_placeholder_filename";
487 my $gpath = $self->git_path($path);
488
489 my $fh = $::_repository->temp_acquire($gpath);
490 my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
491 Git::temp_release($fh, 1);
492 $self->{gii}->update('100644', $hash, $gpath) or croak $!;
493
494 # The directory should no longer be considered empty.
495 delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
496
497 # Keep track of any placeholder files we create.
498 $added_placeholder{$dir} = $path;
499}
500
501sub stash_placeholder_list {
502 my ($self) = @_;
503 my $k = "svn-remote.$repo_id.added-placeholder";
504 my $v = eval { command_oneline('config', '--get-all', $k) };
505 command_noisy('config', '--unset-all', $k) if $v;
506 foreach (values %added_placeholder) {
507 command_noisy('config', '--add', $k, $_);
508 }
509}
510
5111;
512__END__
513
514Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
515
516=head1 SYNOPSIS
517
518 use SVN::Core;
519 use SVN::Ra;
520 use Git::SVN;
521 use Git::SVN::Fetcher;
522 use Git;
523
524 my $gs = Git::SVN->find_by_url($url);
525 my $ra = SVN::Ra->new(url => $url);
526 my $editor = Git::SVN::Fetcher->new($gs);
527 my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
528 1, $editor);
529 $reporter->set_path('', $old_rev, 0);
530 $reporter->finish_report;
531 my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
532
533 foreach my $path (keys %{$editor->{dir_prop}) {
534 my $props = $editor->{dir_prop}{$path};
535 foreach my $prop (keys %$props) {
536 print "property $prop at $path changed to $props->{$prop}\n";
537 }
538 }
539 foreach my $path (keys %{$editor->{empty}) {
540 my $action = $editor->{empty}{$path} ? 'added' : 'removed';
541 print "empty directory $path $action\n";
542 }
543 foreach my $path (keys %{$editor->{file_prop}) { ... }
544 foreach my $parent (keys %{$editor->{absent_dir}}) {
545 my @children = @{$editor->{abstent_dir}{$parent}};
546 print "cannot fetch directory $parent/$_: not authorized?\n"
547 foreach @children;
548 }
549 foreach my $parent (keys %{$editor->{absent_file}) { ... }
550
551=head1 DESCRIPTION
552
553This is a subclass of C<SVN::Delta::Editor>, which means it implements
554callbacks to act as a consumer of Subversion tree deltas. This
555particular implementation of those callbacks is meant to store
556information about the resulting content which B<git svn fetch> could
557use to populate new commits and new entries for F<unhandled.log>.
558More specifically:
559
560=over
561
562=item * Additions, removals, and modifications of files are propagated
563to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
564B<git update-index>.
565
566=item * Changes in Subversion path properties are recorded in the
567C<dir_prop> and C<file_prop> fields (which are hashes).
568
569=item * Addition and removal of empty directories are indicated by
570entries with value 1 and 0 respectively in the C<empty> hash.
571
572=item * Paths that are present but cannot be conveyed (presumably due
573to permissions) are recorded in the C<absent_file> and
574C<absent_dirs> hashes. For each key, the corresponding value is
575a list of paths under that directory that were present but
576could not be conveyed.
577
578=back
579
580The interface is unstable. Do not use this module unless you are
581developing git-svn.
582
583=head1 DEPENDENCIES
584
585L<SVN::Delta> from the Subversion perl bindings,
586the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
587and git's L<Git> helper module.
588
589C<Git::SVN::Fetcher> has not been tested using callers other than
590B<git-svn> itself.
591
592=head1 SEE ALSO
593
594L<SVN::Delta>,
595L<Git::SVN::Editor>.
596
597=head1 INCOMPATIBILITIES
598
599None reported.
600
601=head1 BUGS
602
603None.