1package Git::SVN::Fetcher;
2use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
3 $_placeholder_filename @deleted_gpath %added_placeholder
4 $repo_id/;
5use strict;
6use warnings;
7use SVN::Delta;
8use Carp qw/croak/;
9use File::Basename qw/dirname/;
10use Git qw/command command_oneline command_noisy command_output_pipe
11 command_input_pipe command_close_pipe
12 command_bidi_pipe command_close_bidi_pipe/;
13BEGIN {
14 @ISA = qw(SVN::Delta::Editor);
15}
16
17# file baton members: path, mode_a, mode_b, pool, fh, blob, base
18sub new {
19 my ($class, $git_svn, $switch_path) = @_;
20 my $self = SVN::Delta::Editor->new;
21 bless $self, $class;
22 if (exists $git_svn->{last_commit}) {
23 $self->{c} = $git_svn->{last_commit};
24 $self->{empty_symlinks} =
25 _mark_empty_symlinks($git_svn, $switch_path);
26 }
27
28 # some options are read globally, but can be overridden locally
29 # per [svn-remote "..."] section. Command-line options will *NOT*
30 # override options set in an [svn-remote "..."] section
31 $repo_id = $git_svn->{repo_id};
32 my $k = "svn-remote.$repo_id.ignore-paths";
33 my $v = eval { command_oneline('config', '--get', $k) };
34 $self->{ignore_regex} = $v;
35
36 $k = "svn-remote.$repo_id.include-paths";
37 $v = eval { command_oneline('config', '--get', $k) };
38 $self->{include_regex} = $v;
39
40 $k = "svn-remote.$repo_id.preserve-empty-dirs";
41 $v = eval { command_oneline('config', '--get', '--bool', $k) };
42 if ($v && $v eq 'true') {
43 $_preserve_empty_dirs = 1;
44 $k = "svn-remote.$repo_id.placeholder-filename";
45 $v = eval { command_oneline('config', '--get', $k) };
46 $_placeholder_filename = $v;
47 }
48
49 # Load the list of placeholder files added during previous invocations.
50 $k = "svn-remote.$repo_id.added-placeholder";
51 $v = eval { command_oneline('config', '--get-all', $k) };
52 if ($_preserve_empty_dirs && $v) {
53 # command() prints errors to stderr, so we only call it if
54 # command_oneline() succeeded.
55 my @v = command('config', '--get-all', $k);
56 $added_placeholder{ dirname($_) } = $_ foreach @v;
57 }
58
59 $self->{empty} = {};
60 $self->{dir_prop} = {};
61 $self->{file_prop} = {};
62 $self->{absent_dir} = {};
63 $self->{absent_file} = {};
64 require Git::IndexInfo;
65 $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
66 $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
67 $self;
68}
69
70# this uses the Ra object, so it must be called before do_{switch,update},
71# not inside them (when the Git::SVN::Fetcher object is passed) to
72# do_{switch,update}
73sub _mark_empty_symlinks {
74 my ($git_svn, $switch_path) = @_;
75 my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
76 return {} if (!defined($bool)) || (defined($bool) && ! $bool);
77
78 my %ret;
79 my ($rev, $cmt) = $git_svn->last_rev_commit;
80 return {} unless ($rev && $cmt);
81
82 # allow the warning to be printed for each revision we fetch to
83 # ensure the user sees it. The user can also disable the workaround
84 # on the repository even while git svn is running and the next
85 # revision fetched will skip this expensive function.
86 my $printed_warning;
87 chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
88 my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
89 local $/ = "\0";
90 my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
91 $pfx .= '/' if length($pfx);
92 while (<$ls>) {
93 chomp;
94 s/\A100644 blob $empty_blob\t//o or next;
95 unless ($printed_warning) {
96 print STDERR "Scanning for empty symlinks, ",
97 "this may take a while if you have ",
98 "many empty files\n",
99 "You may disable this with `",
100 "git config svn.brokenSymlinkWorkaround ",
101 "false'.\n",
102 "This may be done in a different ",
103 "terminal without restarting ",
104 "git svn\n";
105 $printed_warning = 1;
106 }
107 my $path = $_;
108 my (undef, $props) =
109 $git_svn->ra->get_file($pfx.$path, $rev, undef);
110 if ($props->{'svn:special'}) {
111 $ret{$path} = 1;
112 }
113 }
114 command_close_pipe($ls, $ctx);
115 \%ret;
116}
117
118# returns true if a given path is inside a ".git" directory
119sub in_dot_git {
120 $_[0] =~ m{(?:^|/)\.git(?:/|$)};
121}
122
123# return value: 0 -- don't ignore, 1 -- ignore
124# This will also check whether the path is explicitly included
125sub is_path_ignored {
126 my ($self, $path) = @_;
127 return 1 if in_dot_git($path);
128 return 1 if defined($self->{ignore_regex}) &&
129 $path =~ m!$self->{ignore_regex}!;
130 return 0 if defined($self->{include_regex}) &&
131 $path =~ m!$self->{include_regex}!;
132 return 0 if defined($_include_regex) &&
133 $path =~ m!$_include_regex!;
134 return 1 if defined($self->{include_regex});
135 return 1 if defined($_include_regex);
136 return 0 unless defined($_ignore_regex);
137 return 1 if $path =~ m!$_ignore_regex!o;
138 return 0;
139}
140
141sub set_path_strip {
142 my ($self, $path) = @_;
143 $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
144}
145
146sub open_root {
147 { path => '' };
148}
149
150sub open_directory {
151 my ($self, $path, $pb, $rev) = @_;
152 { path => $path };
153}
154
155sub git_path {
156 my ($self, $path) = @_;
157 if (my $enc = $self->{pathnameencoding}) {
158 require Encode;
159 Encode::from_to($path, 'UTF-8', $enc);
160 }
161 if ($self->{path_strip}) {
162 $path =~ s!$self->{path_strip}!! or
163 die "Failed to strip path '$path' ($self->{path_strip})\n";
164 }
165 $path;
166}
167
168sub delete_entry {
169 my ($self, $path, $rev, $pb) = @_;
170 return undef if $self->is_path_ignored($path);
171
172 my $gpath = $self->git_path($path);
173 return undef if ($gpath eq '');
174
175 # remove entire directories.
176 my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
177 =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
178 if ($tree) {
179 my ($ls, $ctx) = command_output_pipe(qw/ls-tree
180 -r --name-only -z/,
181 $tree);
182 local $/ = "\0";
183 while (<$ls>) {
184 chomp;
185 my $rmpath = "$gpath/$_";
186 $self->{gii}->remove($rmpath);
187 print "\tD\t$rmpath\n" unless $::_q;
188 }
189 print "\tD\t$gpath/\n" unless $::_q;
190 command_close_pipe($ls, $ctx);
191 } else {
192 $self->{gii}->remove($gpath);
193 print "\tD\t$gpath\n" unless $::_q;
194 }
195 # Don't add to @deleted_gpath if we're deleting a placeholder file.
196 push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
197 $self->{empty}->{$path} = 0;
198 undef;
199}
200
201sub open_file {
202 my ($self, $path, $pb, $rev) = @_;
203 my ($mode, $blob);
204
205 goto out if $self->is_path_ignored($path);
206
207 my $gpath = $self->git_path($path);
208 ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
209 =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
210 unless (defined $mode && defined $blob) {
211 die "$path was not found in commit $self->{c} (r$rev)\n";
212 }
213 if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
214 $mode = '120000';
215 }
216out:
217 { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
218 pool => SVN::Pool->new, action => 'M' };
219}
220
221sub add_file {
222 my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
223 my $mode;
224
225 if (!$self->is_path_ignored($path)) {
226 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
227 delete $self->{empty}->{$dir};
228 $mode = '100644';
229
230 if ($added_placeholder{$dir}) {
231 # Remove our placeholder file, if we created one.
232 delete_entry($self, $added_placeholder{$dir})
233 unless $path eq $added_placeholder{$dir};
234 delete $added_placeholder{$dir}
235 }
236 }
237
238 { path => $path, mode_a => $mode, mode_b => $mode,
239 pool => SVN::Pool->new, action => 'A' };
240}
241
242sub add_directory {
243 my ($self, $path, $cp_path, $cp_rev) = @_;
244 goto out if $self->is_path_ignored($path);
245 my $gpath = $self->git_path($path);
246 if ($gpath eq '') {
247 my ($ls, $ctx) = command_output_pipe(qw/ls-tree
248 -r --name-only -z/,
249 $self->{c});
250 local $/ = "\0";
251 while (<$ls>) {
252 chomp;
253 $self->{gii}->remove($_);
254 print "\tD\t$_\n" unless $::_q;
255 push @deleted_gpath, $gpath;
256 }
257 command_close_pipe($ls, $ctx);
258 $self->{empty}->{$path} = 0;
259 }
260 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
261 delete $self->{empty}->{$dir};
262 $self->{empty}->{$path} = 1;
263
264 if ($added_placeholder{$dir}) {
265 # Remove our placeholder file, if we created one.
266 delete_entry($self, $added_placeholder{$dir});
267 delete $added_placeholder{$dir}
268 }
269
270out:
271 { path => $path };
272}
273
274sub change_dir_prop {
275 my ($self, $db, $prop, $value) = @_;
276 return undef if $self->is_path_ignored($db->{path});
277 $self->{dir_prop}->{$db->{path}} ||= {};
278 $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
279 undef;
280}
281
282sub absent_directory {
283 my ($self, $path, $pb) = @_;
284 return undef if $self->is_path_ignored($path);
285 $self->{absent_dir}->{$pb->{path}} ||= [];
286 push @{$self->{absent_dir}->{$pb->{path}}}, $path;
287 undef;
288}
289
290sub absent_file {
291 my ($self, $path, $pb) = @_;
292 return undef if $self->is_path_ignored($path);
293 $self->{absent_file}->{$pb->{path}} ||= [];
294 push @{$self->{absent_file}->{$pb->{path}}}, $path;
295 undef;
296}
297
298sub change_file_prop {
299 my ($self, $fb, $prop, $value) = @_;
300 return undef if $self->is_path_ignored($fb->{path});
301 if ($prop eq 'svn:executable') {
302 if ($fb->{mode_b} != 120000) {
303 $fb->{mode_b} = defined $value ? 100755 : 100644;
304 }
305 } elsif ($prop eq 'svn:special') {
306 $fb->{mode_b} = defined $value ? 120000 : 100644;
307 } else {
308 $self->{file_prop}->{$fb->{path}} ||= {};
309 $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
310 }
311 undef;
312}
313
314sub apply_textdelta {
315 my ($self, $fb, $exp) = @_;
316 return undef if $self->is_path_ignored($fb->{path});
317 my $suffix = 0;
318 ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix");
319 my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix");
320 # $fh gets auto-closed() by SVN::TxDelta::apply(),
321 # (but $base does not,) so dup() it for reading in close_file
322 open my $dup, '<&', $fh or croak $!;
323 my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix");
324 # close_file may call temp_acquire on 'svn_hash', but because of the
325 # call chain, if the temp_acquire call from close_file ends up being the
326 # call that first creates the 'svn_hash' temp file, then the FileHandle
327 # that's created as a result will end up in an SVN::Pool that we clear
328 # in SVN::Ra::gs_fetch_loop_common. Avoid that by making sure the
329 # 'svn_hash' FileHandle is already created before close_file is called.
330 my $tmp_fh = $::_repository->temp_acquire('svn_hash');
331 $::_repository->temp_release($tmp_fh, 1);
332
333 if ($fb->{blob}) {
334 my ($base_is_link, $size);
335
336 if ($fb->{mode_a} eq '120000' &&
337 ! $self->{empty_symlinks}->{$fb->{path}}) {
338 print $base 'link ' or die "print $!\n";
339 $base_is_link = 1;
340 }
341 retry:
342 $size = $::_repository->cat_blob($fb->{blob}, $base);
343 die "Failed to read object $fb->{blob}" if ($size < 0);
344
345 if (defined $exp) {
346 seek $base, 0, 0 or croak $!;
347 my $got = ::md5sum($base);
348 if ($got ne $exp) {
349 my $err = "Checksum mismatch: ".
350 "$fb->{path} $fb->{blob}\n" .
351 "expected: $exp\n" .
352 " got: $got\n";
353 if ($base_is_link) {
354 warn $err,
355 "Retrying... (possibly ",
356 "a bad symlink from SVN)\n";
357 $::_repository->temp_reset($base);
358 $base_is_link = 0;
359 goto retry;
360 }
361 die $err;
362 }
363 }
364 }
365 seek $base, 0, 0 or croak $!;
366 $fb->{fh} = $fh;
367 $fb->{base} = $base;
368 [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
369}
370
371sub close_file {
372 my ($self, $fb, $exp) = @_;
373 return undef if $self->is_path_ignored($fb->{path});
374
375 my $hash;
376 my $path = $self->git_path($fb->{path});
377 if (my $fh = $fb->{fh}) {
378 if (defined $exp) {
379 seek($fh, 0, 0) or croak $!;
380 my $got = ::md5sum($fh);
381 if ($got ne $exp) {
382 die "Checksum mismatch: $path\n",
383 "expected: $exp\n got: $got\n";
384 }
385 }
386 if ($fb->{mode_b} == 120000) {
387 sysseek($fh, 0, 0) or croak $!;
388 my $rd = sysread($fh, my $buf, 5);
389
390 if (!defined $rd) {
391 croak "sysread: $!\n";
392 } elsif ($rd == 0) {
393 warn "$path has mode 120000",
394 " but it points to nothing\n",
395 "converting to an empty file with mode",
396 " 100644\n";
397 $fb->{mode_b} = '100644';
398 } elsif ($buf ne 'link ') {
399 warn "$path has mode 120000",
400 " but is not a link\n";
401 } else {
402 my $tmp_fh = $::_repository->temp_acquire(
403 'svn_hash');
404 my $res;
405 while ($res = sysread($fh, my $str, 1024)) {
406 my $out = syswrite($tmp_fh, $str, $res);
407 defined($out) && $out == $res
408 or croak("write ",
409 Git::temp_path($tmp_fh),
410 ": $!\n");
411 }
412 defined $res or croak $!;
413
414 ($fh, $tmp_fh) = ($tmp_fh, $fh);
415 Git::temp_release($tmp_fh, 1);
416 }
417 }
418
419 $hash = $::_repository->hash_and_insert_object(
420 Git::temp_path($fh));
421 $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
422
423 Git::temp_release($fb->{base}, 1);
424 Git::temp_release($fh, 1);
425 } else {
426 $hash = $fb->{blob} or die "no blob information\n";
427 }
428 $fb->{pool}->clear;
429 $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
430 print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
431 undef;
432}
433
434sub abort_edit {
435 my $self = shift;
436 $self->{nr} = $self->{gii}->{nr};
437 delete $self->{gii};
438 $self->SUPER::abort_edit(@_);
439}
440
441sub close_edit {
442 my $self = shift;
443
444 if ($_preserve_empty_dirs) {
445 my @empty_dirs;
446
447 # Any entry flagged as empty that also has an associated
448 # dir_prop represents a newly created empty directory.
449 foreach my $i (keys %{$self->{empty}}) {
450 push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
451 }
452
453 # Search for directories that have become empty due subsequent
454 # file deletes.
455 push @empty_dirs, $self->find_empty_directories();
456
457 # Finally, add a placeholder file to each empty directory.
458 $self->add_placeholder_file($_) foreach (@empty_dirs);
459
460 $self->stash_placeholder_list();
461 }
462
463 $self->{git_commit_ok} = 1;
464 $self->{nr} = $self->{gii}->{nr};
465 delete $self->{gii};
466 $self->SUPER::close_edit(@_);
467}
468
469sub find_empty_directories {
470 my ($self) = @_;
471 my @empty_dirs;
472 my %dirs = map { dirname($_) => 1 } @deleted_gpath;
473
474 foreach my $dir (sort keys %dirs) {
475 next if $dir eq ".";
476
477 # If there have been any additions to this directory, there is
478 # no reason to check if it is empty.
479 my $skip_added = 0;
480 foreach my $t (qw/dir_prop file_prop/) {
481 foreach my $path (keys %{ $self->{$t} }) {
482 if (exists $self->{$t}->{dirname($path)}) {
483 $skip_added = 1;
484 last;
485 }
486 }
487 last if $skip_added;
488 }
489 next if $skip_added;
490
491 # Use `git ls-tree` to get the filenames of this directory
492 # that existed prior to this particular commit.
493 my $ls = command('ls-tree', '-z', '--name-only',
494 $self->{c}, "$dir/");
495 my %files = map { $_ => 1 } split(/\0/, $ls);
496
497 # Remove the filenames that were deleted during this commit.
498 delete $files{$_} foreach (@deleted_gpath);
499
500 # Report the directory if there are no filenames left.
501 push @empty_dirs, $dir unless (scalar %files);
502 }
503 @empty_dirs;
504}
505
506sub add_placeholder_file {
507 my ($self, $dir) = @_;
508 my $path = "$dir/$_placeholder_filename";
509 my $gpath = $self->git_path($path);
510
511 my $fh = $::_repository->temp_acquire($gpath);
512 my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
513 Git::temp_release($fh, 1);
514 $self->{gii}->update('100644', $hash, $gpath) or croak $!;
515
516 # The directory should no longer be considered empty.
517 delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
518
519 # Keep track of any placeholder files we create.
520 $added_placeholder{$dir} = $path;
521}
522
523sub stash_placeholder_list {
524 my ($self) = @_;
525 my $k = "svn-remote.$repo_id.added-placeholder";
526 my $v = eval { command_oneline('config', '--get-all', $k) };
527 command_noisy('config', '--unset-all', $k) if $v;
528 foreach (values %added_placeholder) {
529 command_noisy('config', '--add', $k, $_);
530 }
531}
532
5331;
534__END__
535
536=head1 NAME
537
538Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
539
540=head1 SYNOPSIS
541
542 use SVN::Core;
543 use SVN::Ra;
544 use Git::SVN;
545 use Git::SVN::Fetcher;
546 use Git;
547
548 my $gs = Git::SVN->find_by_url($url);
549 my $ra = SVN::Ra->new(url => $url);
550 my $editor = Git::SVN::Fetcher->new($gs);
551 my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
552 1, $editor);
553 $reporter->set_path('', $old_rev, 0);
554 $reporter->finish_report;
555 my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
556
557 foreach my $path (keys %{$editor->{dir_prop}) {
558 my $props = $editor->{dir_prop}{$path};
559 foreach my $prop (keys %$props) {
560 print "property $prop at $path changed to $props->{$prop}\n";
561 }
562 }
563 foreach my $path (keys %{$editor->{empty}) {
564 my $action = $editor->{empty}{$path} ? 'added' : 'removed';
565 print "empty directory $path $action\n";
566 }
567 foreach my $path (keys %{$editor->{file_prop}) { ... }
568 foreach my $parent (keys %{$editor->{absent_dir}}) {
569 my @children = @{$editor->{abstent_dir}{$parent}};
570 print "cannot fetch directory $parent/$_: not authorized?\n"
571 foreach @children;
572 }
573 foreach my $parent (keys %{$editor->{absent_file}) { ... }
574
575=head1 DESCRIPTION
576
577This is a subclass of C<SVN::Delta::Editor>, which means it implements
578callbacks to act as a consumer of Subversion tree deltas. This
579particular implementation of those callbacks is meant to store
580information about the resulting content which B<git svn fetch> could
581use to populate new commits and new entries for F<unhandled.log>.
582More specifically:
583
584=over
585
586=item * Additions, removals, and modifications of files are propagated
587to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
588B<git update-index>.
589
590=item * Changes in Subversion path properties are recorded in the
591C<dir_prop> and C<file_prop> fields (which are hashes).
592
593=item * Addition and removal of empty directories are indicated by
594entries with value 1 and 0 respectively in the C<empty> hash.
595
596=item * Paths that are present but cannot be conveyed (presumably due
597to permissions) are recorded in the C<absent_file> and
598C<absent_dirs> hashes. For each key, the corresponding value is
599a list of paths under that directory that were present but
600could not be conveyed.
601
602=back
603
604The interface is unstable. Do not use this module unless you are
605developing git-svn.
606
607=head1 DEPENDENCIES
608
609L<SVN::Delta> from the Subversion perl bindings,
610the core L<Carp> and L<File::Basename> modules,
611and git's L<Git> helper module.
612
613C<Git::SVN::Fetcher> has not been tested using callers other than
614B<git-svn> itself.
615
616=head1 SEE ALSO
617
618L<SVN::Delta>,
619L<Git::SVN::Editor>.
620
621=head1 INCOMPATIBILITIES
622
623None reported.
624
625=head1 BUGS
626
627None.