1package Git::SVN::Fetcher;
2use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
3 $_placeholder_filename @deleted_gpath %added_placeholder
4 $repo_id/;
5use strict;
6use warnings;
7use SVN::Delta;
8use Carp qw/croak/;
9use File::Basename qw/dirname/;
10use IO::File qw//;
11use Git qw/command command_oneline command_noisy command_output_pipe
12 command_input_pipe command_close_pipe
13 command_bidi_pipe command_close_bidi_pipe/;
14BEGIN {
15 @ISA = qw(SVN::Delta::Editor);
16}
17
18# file baton members: path, mode_a, mode_b, pool, fh, blob, base
19sub new {
20 my ($class, $git_svn, $switch_path) = @_;
21 my $self = SVN::Delta::Editor->new;
22 bless $self, $class;
23 if (exists $git_svn->{last_commit}) {
24 $self->{c} = $git_svn->{last_commit};
25 $self->{empty_symlinks} =
26 _mark_empty_symlinks($git_svn, $switch_path);
27 }
28
29 # some options are read globally, but can be overridden locally
30 # per [svn-remote "..."] section. Command-line options will *NOT*
31 # override options set in an [svn-remote "..."] section
32 $repo_id = $git_svn->{repo_id};
33 my $k = "svn-remote.$repo_id.ignore-paths";
34 my $v = eval { command_oneline('config', '--get', $k) };
35 $self->{ignore_regex} = $v;
36
37 $k = "svn-remote.$repo_id.include-paths";
38 $v = eval { command_oneline('config', '--get', $k) };
39 $self->{include_regex} = $v;
40
41 $k = "svn-remote.$repo_id.preserve-empty-dirs";
42 $v = eval { command_oneline('config', '--get', '--bool', $k) };
43 if ($v && $v eq 'true') {
44 $_preserve_empty_dirs = 1;
45 $k = "svn-remote.$repo_id.placeholder-filename";
46 $v = eval { command_oneline('config', '--get', $k) };
47 $_placeholder_filename = $v;
48 }
49
50 # Load the list of placeholder files added during previous invocations.
51 $k = "svn-remote.$repo_id.added-placeholder";
52 $v = eval { command_oneline('config', '--get-all', $k) };
53 if ($_preserve_empty_dirs && $v) {
54 # command() prints errors to stderr, so we only call it if
55 # command_oneline() succeeded.
56 my @v = command('config', '--get-all', $k);
57 $added_placeholder{ dirname($_) } = $_ foreach @v;
58 }
59
60 $self->{empty} = {};
61 $self->{dir_prop} = {};
62 $self->{file_prop} = {};
63 $self->{absent_dir} = {};
64 $self->{absent_file} = {};
65 require Git::IndexInfo;
66 $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
67 $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
68 $self;
69}
70
71# this uses the Ra object, so it must be called before do_{switch,update},
72# not inside them (when the Git::SVN::Fetcher object is passed) to
73# do_{switch,update}
74sub _mark_empty_symlinks {
75 my ($git_svn, $switch_path) = @_;
76 my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
77 return {} if (!defined($bool)) || (defined($bool) && ! $bool);
78
79 my %ret;
80 my ($rev, $cmt) = $git_svn->last_rev_commit;
81 return {} unless ($rev && $cmt);
82
83 # allow the warning to be printed for each revision we fetch to
84 # ensure the user sees it. The user can also disable the workaround
85 # on the repository even while git svn is running and the next
86 # revision fetched will skip this expensive function.
87 my $printed_warning;
88 chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
89 my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
90 local $/ = "\0";
91 my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
92 $pfx .= '/' if length($pfx);
93 while (<$ls>) {
94 chomp;
95 s/\A100644 blob $empty_blob\t//o or next;
96 unless ($printed_warning) {
97 print STDERR "Scanning for empty symlinks, ",
98 "this may take a while if you have ",
99 "many empty files\n",
100 "You may disable this with `",
101 "git config svn.brokenSymlinkWorkaround ",
102 "false'.\n",
103 "This may be done in a different ",
104 "terminal without restarting ",
105 "git svn\n";
106 $printed_warning = 1;
107 }
108 my $path = $_;
109 my (undef, $props) =
110 $git_svn->ra->get_file($pfx.$path, $rev, undef);
111 if ($props->{'svn:special'}) {
112 $ret{$path} = 1;
113 }
114 }
115 command_close_pipe($ls, $ctx);
116 \%ret;
117}
118
119# returns true if a given path is inside a ".git" directory
120sub in_dot_git {
121 $_[0] =~ m{(?:^|/)\.git(?:/|$)};
122}
123
124# return value: 0 -- don't ignore, 1 -- ignore
125# This will also check whether the path is explicitly included
126sub is_path_ignored {
127 my ($self, $path) = @_;
128 return 1 if in_dot_git($path);
129 return 1 if defined($self->{ignore_regex}) &&
130 $path =~ m!$self->{ignore_regex}!;
131 return 0 if defined($self->{include_regex}) &&
132 $path =~ m!$self->{include_regex}!;
133 return 0 if defined($_include_regex) &&
134 $path =~ m!$_include_regex!;
135 return 1 if defined($self->{include_regex});
136 return 1 if defined($_include_regex);
137 return 0 unless defined($_ignore_regex);
138 return 1 if $path =~ m!$_ignore_regex!o;
139 return 0;
140}
141
142sub set_path_strip {
143 my ($self, $path) = @_;
144 $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
145}
146
147sub open_root {
148 { path => '' };
149}
150
151sub open_directory {
152 my ($self, $path, $pb, $rev) = @_;
153 { path => $path };
154}
155
156sub git_path {
157 my ($self, $path) = @_;
158 if (my $enc = $self->{pathnameencoding}) {
159 require Encode;
160 Encode::from_to($path, 'UTF-8', $enc);
161 }
162 if ($self->{path_strip}) {
163 $path =~ s!$self->{path_strip}!! or
164 die "Failed to strip path '$path' ($self->{path_strip})\n";
165 }
166 $path;
167}
168
169sub delete_entry {
170 my ($self, $path, $rev, $pb) = @_;
171 return undef if $self->is_path_ignored($path);
172
173 my $gpath = $self->git_path($path);
174 return undef if ($gpath eq '');
175
176 # remove entire directories.
177 my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
178 =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
179 if ($tree) {
180 my ($ls, $ctx) = command_output_pipe(qw/ls-tree
181 -r --name-only -z/,
182 $tree);
183 local $/ = "\0";
184 while (<$ls>) {
185 chomp;
186 my $rmpath = "$gpath/$_";
187 $self->{gii}->remove($rmpath);
188 print "\tD\t$rmpath\n" unless $::_q;
189 }
190 print "\tD\t$gpath/\n" unless $::_q;
191 command_close_pipe($ls, $ctx);
192 } else {
193 $self->{gii}->remove($gpath);
194 print "\tD\t$gpath\n" unless $::_q;
195 }
196 # Don't add to @deleted_gpath if we're deleting a placeholder file.
197 push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
198 $self->{empty}->{$path} = 0;
199 undef;
200}
201
202sub open_file {
203 my ($self, $path, $pb, $rev) = @_;
204 my ($mode, $blob);
205
206 goto out if $self->is_path_ignored($path);
207
208 my $gpath = $self->git_path($path);
209 ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
210 =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
211 unless (defined $mode && defined $blob) {
212 die "$path was not found in commit $self->{c} (r$rev)\n";
213 }
214 if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
215 $mode = '120000';
216 }
217out:
218 { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
219 pool => SVN::Pool->new, action => 'M' };
220}
221
222sub add_file {
223 my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
224 my $mode;
225
226 if (!$self->is_path_ignored($path)) {
227 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
228 delete $self->{empty}->{$dir};
229 $mode = '100644';
230
231 if ($added_placeholder{$dir}) {
232 # Remove our placeholder file, if we created one.
233 delete_entry($self, $added_placeholder{$dir})
234 unless $path eq $added_placeholder{$dir};
235 delete $added_placeholder{$dir}
236 }
237 }
238
239 { path => $path, mode_a => $mode, mode_b => $mode,
240 pool => SVN::Pool->new, action => 'A' };
241}
242
243sub add_directory {
244 my ($self, $path, $cp_path, $cp_rev) = @_;
245 goto out if $self->is_path_ignored($path);
246 my $gpath = $self->git_path($path);
247 if ($gpath eq '') {
248 my ($ls, $ctx) = command_output_pipe(qw/ls-tree
249 -r --name-only -z/,
250 $self->{c});
251 local $/ = "\0";
252 while (<$ls>) {
253 chomp;
254 $self->{gii}->remove($_);
255 print "\tD\t$_\n" unless $::_q;
256 push @deleted_gpath, $gpath;
257 }
258 command_close_pipe($ls, $ctx);
259 $self->{empty}->{$path} = 0;
260 }
261 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
262 delete $self->{empty}->{$dir};
263 $self->{empty}->{$path} = 1;
264
265 if ($added_placeholder{$dir}) {
266 # Remove our placeholder file, if we created one.
267 delete_entry($self, $added_placeholder{$dir});
268 delete $added_placeholder{$dir}
269 }
270
271out:
272 { path => $path };
273}
274
275sub change_dir_prop {
276 my ($self, $db, $prop, $value) = @_;
277 return undef if $self->is_path_ignored($db->{path});
278 $self->{dir_prop}->{$db->{path}} ||= {};
279 $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
280 undef;
281}
282
283sub absent_directory {
284 my ($self, $path, $pb) = @_;
285 return undef if $self->is_path_ignored($path);
286 $self->{absent_dir}->{$pb->{path}} ||= [];
287 push @{$self->{absent_dir}->{$pb->{path}}}, $path;
288 undef;
289}
290
291sub absent_file {
292 my ($self, $path, $pb) = @_;
293 return undef if $self->is_path_ignored($path);
294 $self->{absent_file}->{$pb->{path}} ||= [];
295 push @{$self->{absent_file}->{$pb->{path}}}, $path;
296 undef;
297}
298
299sub change_file_prop {
300 my ($self, $fb, $prop, $value) = @_;
301 return undef if $self->is_path_ignored($fb->{path});
302 if ($prop eq 'svn:executable') {
303 if ($fb->{mode_b} != 120000) {
304 $fb->{mode_b} = defined $value ? 100755 : 100644;
305 }
306 } elsif ($prop eq 'svn:special') {
307 $fb->{mode_b} = defined $value ? 120000 : 100644;
308 } else {
309 $self->{file_prop}->{$fb->{path}} ||= {};
310 $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
311 }
312 undef;
313}
314
315sub apply_textdelta {
316 my ($self, $fb, $exp) = @_;
317 return undef if $self->is_path_ignored($fb->{path});
318 my $fh = $::_repository->temp_acquire('svn_delta');
319 # $fh gets auto-closed() by SVN::TxDelta::apply(),
320 # (but $base does not,) so dup() it for reading in close_file
321 open my $dup, '<&', $fh or croak $!;
322 my $base = $::_repository->temp_acquire('git_blob');
323
324 if ($fb->{blob}) {
325 my ($base_is_link, $size);
326
327 if ($fb->{mode_a} eq '120000' &&
328 ! $self->{empty_symlinks}->{$fb->{path}}) {
329 print $base 'link ' or die "print $!\n";
330 $base_is_link = 1;
331 }
332 retry:
333 $size = $::_repository->cat_blob($fb->{blob}, $base);
334 die "Failed to read object $fb->{blob}" if ($size < 0);
335
336 if (defined $exp) {
337 seek $base, 0, 0 or croak $!;
338 my $got = ::md5sum($base);
339 if ($got ne $exp) {
340 my $err = "Checksum mismatch: ".
341 "$fb->{path} $fb->{blob}\n" .
342 "expected: $exp\n" .
343 " got: $got\n";
344 if ($base_is_link) {
345 warn $err,
346 "Retrying... (possibly ",
347 "a bad symlink from SVN)\n";
348 $::_repository->temp_reset($base);
349 $base_is_link = 0;
350 goto retry;
351 }
352 die $err;
353 }
354 }
355 }
356 seek $base, 0, 0 or croak $!;
357 $fb->{fh} = $fh;
358 $fb->{base} = $base;
359 [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
360}
361
362sub close_file {
363 my ($self, $fb, $exp) = @_;
364 return undef if $self->is_path_ignored($fb->{path});
365
366 my $hash;
367 my $path = $self->git_path($fb->{path});
368 if (my $fh = $fb->{fh}) {
369 if (defined $exp) {
370 seek($fh, 0, 0) or croak $!;
371 my $got = ::md5sum($fh);
372 if ($got ne $exp) {
373 die "Checksum mismatch: $path\n",
374 "expected: $exp\n got: $got\n";
375 }
376 }
377 if ($fb->{mode_b} == 120000) {
378 sysseek($fh, 0, 0) or croak $!;
379 my $rd = sysread($fh, my $buf, 5);
380
381 if (!defined $rd) {
382 croak "sysread: $!\n";
383 } elsif ($rd == 0) {
384 warn "$path has mode 120000",
385 " but it points to nothing\n",
386 "converting to an empty file with mode",
387 " 100644\n";
388 $fb->{mode_b} = '100644';
389 } elsif ($buf ne 'link ') {
390 warn "$path has mode 120000",
391 " but is not a link\n";
392 } else {
393 my $tmp_fh = $::_repository->temp_acquire(
394 'svn_hash');
395 my $res;
396 while ($res = sysread($fh, my $str, 1024)) {
397 my $out = syswrite($tmp_fh, $str, $res);
398 defined($out) && $out == $res
399 or croak("write ",
400 Git::temp_path($tmp_fh),
401 ": $!\n");
402 }
403 defined $res or croak $!;
404
405 ($fh, $tmp_fh) = ($tmp_fh, $fh);
406 Git::temp_release($tmp_fh, 1);
407 }
408 }
409
410 $hash = $::_repository->hash_and_insert_object(
411 Git::temp_path($fh));
412 $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
413
414 Git::temp_release($fb->{base}, 1);
415 Git::temp_release($fh, 1);
416 } else {
417 $hash = $fb->{blob} or die "no blob information\n";
418 }
419 $fb->{pool}->clear;
420 $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
421 print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
422 undef;
423}
424
425sub abort_edit {
426 my $self = shift;
427 $self->{nr} = $self->{gii}->{nr};
428 delete $self->{gii};
429 $self->SUPER::abort_edit(@_);
430}
431
432sub close_edit {
433 my $self = shift;
434
435 if ($_preserve_empty_dirs) {
436 my @empty_dirs;
437
438 # Any entry flagged as empty that also has an associated
439 # dir_prop represents a newly created empty directory.
440 foreach my $i (keys %{$self->{empty}}) {
441 push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
442 }
443
444 # Search for directories that have become empty due subsequent
445 # file deletes.
446 push @empty_dirs, $self->find_empty_directories();
447
448 # Finally, add a placeholder file to each empty directory.
449 $self->add_placeholder_file($_) foreach (@empty_dirs);
450
451 $self->stash_placeholder_list();
452 }
453
454 $self->{git_commit_ok} = 1;
455 $self->{nr} = $self->{gii}->{nr};
456 delete $self->{gii};
457 $self->SUPER::close_edit(@_);
458}
459
460sub find_empty_directories {
461 my ($self) = @_;
462 my @empty_dirs;
463 my %dirs = map { dirname($_) => 1 } @deleted_gpath;
464
465 foreach my $dir (sort keys %dirs) {
466 next if $dir eq ".";
467
468 # If there have been any additions to this directory, there is
469 # no reason to check if it is empty.
470 my $skip_added = 0;
471 foreach my $t (qw/dir_prop file_prop/) {
472 foreach my $path (keys %{ $self->{$t} }) {
473 if (exists $self->{$t}->{dirname($path)}) {
474 $skip_added = 1;
475 last;
476 }
477 }
478 last if $skip_added;
479 }
480 next if $skip_added;
481
482 # Use `git ls-tree` to get the filenames of this directory
483 # that existed prior to this particular commit.
484 my $ls = command('ls-tree', '-z', '--name-only',
485 $self->{c}, "$dir/");
486 my %files = map { $_ => 1 } split(/\0/, $ls);
487
488 # Remove the filenames that were deleted during this commit.
489 delete $files{$_} foreach (@deleted_gpath);
490
491 # Report the directory if there are no filenames left.
492 push @empty_dirs, $dir unless (scalar %files);
493 }
494 @empty_dirs;
495}
496
497sub add_placeholder_file {
498 my ($self, $dir) = @_;
499 my $path = "$dir/$_placeholder_filename";
500 my $gpath = $self->git_path($path);
501
502 my $fh = $::_repository->temp_acquire($gpath);
503 my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
504 Git::temp_release($fh, 1);
505 $self->{gii}->update('100644', $hash, $gpath) or croak $!;
506
507 # The directory should no longer be considered empty.
508 delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
509
510 # Keep track of any placeholder files we create.
511 $added_placeholder{$dir} = $path;
512}
513
514sub stash_placeholder_list {
515 my ($self) = @_;
516 my $k = "svn-remote.$repo_id.added-placeholder";
517 my $v = eval { command_oneline('config', '--get-all', $k) };
518 command_noisy('config', '--unset-all', $k) if $v;
519 foreach (values %added_placeholder) {
520 command_noisy('config', '--add', $k, $_);
521 }
522}
523
5241;
525__END__
526
527=head1 NAME
528
529Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
530
531=head1 SYNOPSIS
532
533 use SVN::Core;
534 use SVN::Ra;
535 use Git::SVN;
536 use Git::SVN::Fetcher;
537 use Git;
538
539 my $gs = Git::SVN->find_by_url($url);
540 my $ra = SVN::Ra->new(url => $url);
541 my $editor = Git::SVN::Fetcher->new($gs);
542 my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
543 1, $editor);
544 $reporter->set_path('', $old_rev, 0);
545 $reporter->finish_report;
546 my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
547
548 foreach my $path (keys %{$editor->{dir_prop}) {
549 my $props = $editor->{dir_prop}{$path};
550 foreach my $prop (keys %$props) {
551 print "property $prop at $path changed to $props->{$prop}\n";
552 }
553 }
554 foreach my $path (keys %{$editor->{empty}) {
555 my $action = $editor->{empty}{$path} ? 'added' : 'removed';
556 print "empty directory $path $action\n";
557 }
558 foreach my $path (keys %{$editor->{file_prop}) { ... }
559 foreach my $parent (keys %{$editor->{absent_dir}}) {
560 my @children = @{$editor->{abstent_dir}{$parent}};
561 print "cannot fetch directory $parent/$_: not authorized?\n"
562 foreach @children;
563 }
564 foreach my $parent (keys %{$editor->{absent_file}) { ... }
565
566=head1 DESCRIPTION
567
568This is a subclass of C<SVN::Delta::Editor>, which means it implements
569callbacks to act as a consumer of Subversion tree deltas. This
570particular implementation of those callbacks is meant to store
571information about the resulting content which B<git svn fetch> could
572use to populate new commits and new entries for F<unhandled.log>.
573More specifically:
574
575=over
576
577=item * Additions, removals, and modifications of files are propagated
578to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
579B<git update-index>.
580
581=item * Changes in Subversion path properties are recorded in the
582C<dir_prop> and C<file_prop> fields (which are hashes).
583
584=item * Addition and removal of empty directories are indicated by
585entries with value 1 and 0 respectively in the C<empty> hash.
586
587=item * Paths that are present but cannot be conveyed (presumably due
588to permissions) are recorded in the C<absent_file> and
589C<absent_dirs> hashes. For each key, the corresponding value is
590a list of paths under that directory that were present but
591could not be conveyed.
592
593=back
594
595The interface is unstable. Do not use this module unless you are
596developing git-svn.
597
598=head1 DEPENDENCIES
599
600L<SVN::Delta> from the Subversion perl bindings,
601the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
602and git's L<Git> helper module.
603
604C<Git::SVN::Fetcher> has not been tested using callers other than
605B<git-svn> itself.
606
607=head1 SEE ALSO
608
609L<SVN::Delta>,
610L<Git::SVN::Editor>.
611
612=head1 INCOMPATIBILITIES
613
614None reported.
615
616=head1 BUGS
617
618None.