perl / Git / SVN / Fetcher.pmon commit Merge branch 'jc/doc-diff-blobs' into maint (c2999ad)
   1package Git::SVN::Fetcher;
   2use vars qw/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
   3            @deleted_gpath %added_placeholder $repo_id/;
   4use strict;
   5use warnings;
   6use SVN::Delta;
   7use Carp qw/croak/;
   8use File::Basename qw/dirname/;
   9use IO::File qw//;
  10use Git qw/command command_oneline command_noisy command_output_pipe
  11           command_input_pipe command_close_pipe
  12           command_bidi_pipe command_close_bidi_pipe/;
  13BEGIN {
  14        @ISA = qw(SVN::Delta::Editor);
  15}
  16
  17# file baton members: path, mode_a, mode_b, pool, fh, blob, base
  18sub new {
  19        my ($class, $git_svn, $switch_path) = @_;
  20        my $self = SVN::Delta::Editor->new;
  21        bless $self, $class;
  22        if (exists $git_svn->{last_commit}) {
  23                $self->{c} = $git_svn->{last_commit};
  24                $self->{empty_symlinks} =
  25                                  _mark_empty_symlinks($git_svn, $switch_path);
  26        }
  27
  28        # some options are read globally, but can be overridden locally
  29        # per [svn-remote "..."] section.  Command-line options will *NOT*
  30        # override options set in an [svn-remote "..."] section
  31        $repo_id = $git_svn->{repo_id};
  32        my $k = "svn-remote.$repo_id.ignore-paths";
  33        my $v = eval { command_oneline('config', '--get', $k) };
  34        $self->{ignore_regex} = $v;
  35
  36        $k = "svn-remote.$repo_id.preserve-empty-dirs";
  37        $v = eval { command_oneline('config', '--get', '--bool', $k) };
  38        if ($v && $v eq 'true') {
  39                $_preserve_empty_dirs = 1;
  40                $k = "svn-remote.$repo_id.placeholder-filename";
  41                $v = eval { command_oneline('config', '--get', $k) };
  42                $_placeholder_filename = $v;
  43        }
  44
  45        # Load the list of placeholder files added during previous invocations.
  46        $k = "svn-remote.$repo_id.added-placeholder";
  47        $v = eval { command_oneline('config', '--get-all', $k) };
  48        if ($_preserve_empty_dirs && $v) {
  49                # command() prints errors to stderr, so we only call it if
  50                # command_oneline() succeeded.
  51                my @v = command('config', '--get-all', $k);
  52                $added_placeholder{ dirname($_) } = $_ foreach @v;
  53        }
  54
  55        $self->{empty} = {};
  56        $self->{dir_prop} = {};
  57        $self->{file_prop} = {};
  58        $self->{absent_dir} = {};
  59        $self->{absent_file} = {};
  60        require Git::IndexInfo;
  61        $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  62        $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  63        $self;
  64}
  65
  66# this uses the Ra object, so it must be called before do_{switch,update},
  67# not inside them (when the Git::SVN::Fetcher object is passed) to
  68# do_{switch,update}
  69sub _mark_empty_symlinks {
  70        my ($git_svn, $switch_path) = @_;
  71        my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  72        return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  73
  74        my %ret;
  75        my ($rev, $cmt) = $git_svn->last_rev_commit;
  76        return {} unless ($rev && $cmt);
  77
  78        # allow the warning to be printed for each revision we fetch to
  79        # ensure the user sees it.  The user can also disable the workaround
  80        # on the repository even while git svn is running and the next
  81        # revision fetched will skip this expensive function.
  82        my $printed_warning;
  83        chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  84        my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  85        local $/ = "\0";
  86        my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
  87        $pfx .= '/' if length($pfx);
  88        while (<$ls>) {
  89                chomp;
  90                s/\A100644 blob $empty_blob\t//o or next;
  91                unless ($printed_warning) {
  92                        print STDERR "Scanning for empty symlinks, ",
  93                                     "this may take a while if you have ",
  94                                     "many empty files\n",
  95                                     "You may disable this with `",
  96                                     "git config svn.brokenSymlinkWorkaround ",
  97                                     "false'.\n",
  98                                     "This may be done in a different ",
  99                                     "terminal without restarting ",
 100                                     "git svn\n";
 101                        $printed_warning = 1;
 102                }
 103                my $path = $_;
 104                my (undef, $props) =
 105                               $git_svn->ra->get_file($pfx.$path, $rev, undef);
 106                if ($props->{'svn:special'}) {
 107                        $ret{$path} = 1;
 108                }
 109        }
 110        command_close_pipe($ls, $ctx);
 111        \%ret;
 112}
 113
 114# returns true if a given path is inside a ".git" directory
 115sub in_dot_git {
 116        $_[0] =~ m{(?:^|/)\.git(?:/|$)};
 117}
 118
 119# return value: 0 -- don't ignore, 1 -- ignore
 120sub is_path_ignored {
 121        my ($self, $path) = @_;
 122        return 1 if in_dot_git($path);
 123        return 1 if defined($self->{ignore_regex}) &&
 124                    $path =~ m!$self->{ignore_regex}!;
 125        return 0 unless defined($_ignore_regex);
 126        return 1 if $path =~ m!$_ignore_regex!o;
 127        return 0;
 128}
 129
 130sub set_path_strip {
 131        my ($self, $path) = @_;
 132        $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
 133}
 134
 135sub open_root {
 136        { path => '' };
 137}
 138
 139sub open_directory {
 140        my ($self, $path, $pb, $rev) = @_;
 141        { path => $path };
 142}
 143
 144sub git_path {
 145        my ($self, $path) = @_;
 146        if (my $enc = $self->{pathnameencoding}) {
 147                require Encode;
 148                Encode::from_to($path, 'UTF-8', $enc);
 149        }
 150        if ($self->{path_strip}) {
 151                $path =~ s!$self->{path_strip}!! or
 152                  die "Failed to strip path '$path' ($self->{path_strip})\n";
 153        }
 154        $path;
 155}
 156
 157sub delete_entry {
 158        my ($self, $path, $rev, $pb) = @_;
 159        return undef if $self->is_path_ignored($path);
 160
 161        my $gpath = $self->git_path($path);
 162        return undef if ($gpath eq '');
 163
 164        # remove entire directories.
 165        my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
 166                         =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
 167        if ($tree) {
 168                my ($ls, $ctx) = command_output_pipe(qw/ls-tree
 169                                                     -r --name-only -z/,
 170                                                     $tree);
 171                local $/ = "\0";
 172                while (<$ls>) {
 173                        chomp;
 174                        my $rmpath = "$gpath/$_";
 175                        $self->{gii}->remove($rmpath);
 176                        print "\tD\t$rmpath\n" unless $::_q;
 177                }
 178                print "\tD\t$gpath/\n" unless $::_q;
 179                command_close_pipe($ls, $ctx);
 180        } else {
 181                $self->{gii}->remove($gpath);
 182                print "\tD\t$gpath\n" unless $::_q;
 183        }
 184        # Don't add to @deleted_gpath if we're deleting a placeholder file.
 185        push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
 186        $self->{empty}->{$path} = 0;
 187        undef;
 188}
 189
 190sub open_file {
 191        my ($self, $path, $pb, $rev) = @_;
 192        my ($mode, $blob);
 193
 194        goto out if $self->is_path_ignored($path);
 195
 196        my $gpath = $self->git_path($path);
 197        ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
 198                             =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
 199        unless (defined $mode && defined $blob) {
 200                die "$path was not found in commit $self->{c} (r$rev)\n";
 201        }
 202        if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
 203                $mode = '120000';
 204        }
 205out:
 206        { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
 207          pool => SVN::Pool->new, action => 'M' };
 208}
 209
 210sub add_file {
 211        my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
 212        my $mode;
 213
 214        if (!$self->is_path_ignored($path)) {
 215                my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 216                delete $self->{empty}->{$dir};
 217                $mode = '100644';
 218
 219                if ($added_placeholder{$dir}) {
 220                        # Remove our placeholder file, if we created one.
 221                        delete_entry($self, $added_placeholder{$dir})
 222                                unless $path eq $added_placeholder{$dir};
 223                        delete $added_placeholder{$dir}
 224                }
 225        }
 226
 227        { path => $path, mode_a => $mode, mode_b => $mode,
 228          pool => SVN::Pool->new, action => 'A' };
 229}
 230
 231sub add_directory {
 232        my ($self, $path, $cp_path, $cp_rev) = @_;
 233        goto out if $self->is_path_ignored($path);
 234        my $gpath = $self->git_path($path);
 235        if ($gpath eq '') {
 236                my ($ls, $ctx) = command_output_pipe(qw/ls-tree
 237                                                     -r --name-only -z/,
 238                                                     $self->{c});
 239                local $/ = "\0";
 240                while (<$ls>) {
 241                        chomp;
 242                        $self->{gii}->remove($_);
 243                        print "\tD\t$_\n" unless $::_q;
 244                        push @deleted_gpath, $gpath;
 245                }
 246                command_close_pipe($ls, $ctx);
 247                $self->{empty}->{$path} = 0;
 248        }
 249        my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 250        delete $self->{empty}->{$dir};
 251        $self->{empty}->{$path} = 1;
 252
 253        if ($added_placeholder{$dir}) {
 254                # Remove our placeholder file, if we created one.
 255                delete_entry($self, $added_placeholder{$dir});
 256                delete $added_placeholder{$dir}
 257        }
 258
 259out:
 260        { path => $path };
 261}
 262
 263sub change_dir_prop {
 264        my ($self, $db, $prop, $value) = @_;
 265        return undef if $self->is_path_ignored($db->{path});
 266        $self->{dir_prop}->{$db->{path}} ||= {};
 267        $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
 268        undef;
 269}
 270
 271sub absent_directory {
 272        my ($self, $path, $pb) = @_;
 273        return undef if $self->is_path_ignored($path);
 274        $self->{absent_dir}->{$pb->{path}} ||= [];
 275        push @{$self->{absent_dir}->{$pb->{path}}}, $path;
 276        undef;
 277}
 278
 279sub absent_file {
 280        my ($self, $path, $pb) = @_;
 281        return undef if $self->is_path_ignored($path);
 282        $self->{absent_file}->{$pb->{path}} ||= [];
 283        push @{$self->{absent_file}->{$pb->{path}}}, $path;
 284        undef;
 285}
 286
 287sub change_file_prop {
 288        my ($self, $fb, $prop, $value) = @_;
 289        return undef if $self->is_path_ignored($fb->{path});
 290        if ($prop eq 'svn:executable') {
 291                if ($fb->{mode_b} != 120000) {
 292                        $fb->{mode_b} = defined $value ? 100755 : 100644;
 293                }
 294        } elsif ($prop eq 'svn:special') {
 295                $fb->{mode_b} = defined $value ? 120000 : 100644;
 296        } else {
 297                $self->{file_prop}->{$fb->{path}} ||= {};
 298                $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
 299        }
 300        undef;
 301}
 302
 303sub apply_textdelta {
 304        my ($self, $fb, $exp) = @_;
 305        return undef if $self->is_path_ignored($fb->{path});
 306        my $fh = $::_repository->temp_acquire('svn_delta');
 307        # $fh gets auto-closed() by SVN::TxDelta::apply(),
 308        # (but $base does not,) so dup() it for reading in close_file
 309        open my $dup, '<&', $fh or croak $!;
 310        my $base = $::_repository->temp_acquire('git_blob');
 311
 312        if ($fb->{blob}) {
 313                my ($base_is_link, $size);
 314
 315                if ($fb->{mode_a} eq '120000' &&
 316                    ! $self->{empty_symlinks}->{$fb->{path}}) {
 317                        print $base 'link ' or die "print $!\n";
 318                        $base_is_link = 1;
 319                }
 320        retry:
 321                $size = $::_repository->cat_blob($fb->{blob}, $base);
 322                die "Failed to read object $fb->{blob}" if ($size < 0);
 323
 324                if (defined $exp) {
 325                        seek $base, 0, 0 or croak $!;
 326                        my $got = ::md5sum($base);
 327                        if ($got ne $exp) {
 328                                my $err = "Checksum mismatch: ".
 329                                       "$fb->{path} $fb->{blob}\n" .
 330                                       "expected: $exp\n" .
 331                                       "     got: $got\n";
 332                                if ($base_is_link) {
 333                                        warn $err,
 334                                             "Retrying... (possibly ",
 335                                             "a bad symlink from SVN)\n";
 336                                        $::_repository->temp_reset($base);
 337                                        $base_is_link = 0;
 338                                        goto retry;
 339                                }
 340                                die $err;
 341                        }
 342                }
 343        }
 344        seek $base, 0, 0 or croak $!;
 345        $fb->{fh} = $fh;
 346        $fb->{base} = $base;
 347        [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
 348}
 349
 350sub close_file {
 351        my ($self, $fb, $exp) = @_;
 352        return undef if $self->is_path_ignored($fb->{path});
 353
 354        my $hash;
 355        my $path = $self->git_path($fb->{path});
 356        if (my $fh = $fb->{fh}) {
 357                if (defined $exp) {
 358                        seek($fh, 0, 0) or croak $!;
 359                        my $got = ::md5sum($fh);
 360                        if ($got ne $exp) {
 361                                die "Checksum mismatch: $path\n",
 362                                    "expected: $exp\n    got: $got\n";
 363                        }
 364                }
 365                if ($fb->{mode_b} == 120000) {
 366                        sysseek($fh, 0, 0) or croak $!;
 367                        my $rd = sysread($fh, my $buf, 5);
 368
 369                        if (!defined $rd) {
 370                                croak "sysread: $!\n";
 371                        } elsif ($rd == 0) {
 372                                warn "$path has mode 120000",
 373                                     " but it points to nothing\n",
 374                                     "converting to an empty file with mode",
 375                                     " 100644\n";
 376                                $fb->{mode_b} = '100644';
 377                        } elsif ($buf ne 'link ') {
 378                                warn "$path has mode 120000",
 379                                     " but is not a link\n";
 380                        } else {
 381                                my $tmp_fh = $::_repository->temp_acquire(
 382                                        'svn_hash');
 383                                my $res;
 384                                while ($res = sysread($fh, my $str, 1024)) {
 385                                        my $out = syswrite($tmp_fh, $str, $res);
 386                                        defined($out) && $out == $res
 387                                                or croak("write ",
 388                                                        Git::temp_path($tmp_fh),
 389                                                        ": $!\n");
 390                                }
 391                                defined $res or croak $!;
 392
 393                                ($fh, $tmp_fh) = ($tmp_fh, $fh);
 394                                Git::temp_release($tmp_fh, 1);
 395                        }
 396                }
 397
 398                $hash = $::_repository->hash_and_insert_object(
 399                                Git::temp_path($fh));
 400                $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 401
 402                Git::temp_release($fb->{base}, 1);
 403                Git::temp_release($fh, 1);
 404        } else {
 405                $hash = $fb->{blob} or die "no blob information\n";
 406        }
 407        $fb->{pool}->clear;
 408        $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
 409        print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
 410        undef;
 411}
 412
 413sub abort_edit {
 414        my $self = shift;
 415        $self->{nr} = $self->{gii}->{nr};
 416        delete $self->{gii};
 417        $self->SUPER::abort_edit(@_);
 418}
 419
 420sub close_edit {
 421        my $self = shift;
 422
 423        if ($_preserve_empty_dirs) {
 424                my @empty_dirs;
 425
 426                # Any entry flagged as empty that also has an associated
 427                # dir_prop represents a newly created empty directory.
 428                foreach my $i (keys %{$self->{empty}}) {
 429                        push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
 430                }
 431
 432                # Search for directories that have become empty due subsequent
 433                # file deletes.
 434                push @empty_dirs, $self->find_empty_directories();
 435
 436                # Finally, add a placeholder file to each empty directory.
 437                $self->add_placeholder_file($_) foreach (@empty_dirs);
 438
 439                $self->stash_placeholder_list();
 440        }
 441
 442        $self->{git_commit_ok} = 1;
 443        $self->{nr} = $self->{gii}->{nr};
 444        delete $self->{gii};
 445        $self->SUPER::close_edit(@_);
 446}
 447
 448sub find_empty_directories {
 449        my ($self) = @_;
 450        my @empty_dirs;
 451        my %dirs = map { dirname($_) => 1 } @deleted_gpath;
 452
 453        foreach my $dir (sort keys %dirs) {
 454                next if $dir eq ".";
 455
 456                # If there have been any additions to this directory, there is
 457                # no reason to check if it is empty.
 458                my $skip_added = 0;
 459                foreach my $t (qw/dir_prop file_prop/) {
 460                        foreach my $path (keys %{ $self->{$t} }) {
 461                                if (exists $self->{$t}->{dirname($path)}) {
 462                                        $skip_added = 1;
 463                                        last;
 464                                }
 465                        }
 466                        last if $skip_added;
 467                }
 468                next if $skip_added;
 469
 470                # Use `git ls-tree` to get the filenames of this directory
 471                # that existed prior to this particular commit.
 472                my $ls = command('ls-tree', '-z', '--name-only',
 473                                 $self->{c}, "$dir/");
 474                my %files = map { $_ => 1 } split(/\0/, $ls);
 475
 476                # Remove the filenames that were deleted during this commit.
 477                delete $files{$_} foreach (@deleted_gpath);
 478
 479                # Report the directory if there are no filenames left.
 480                push @empty_dirs, $dir unless (scalar %files);
 481        }
 482        @empty_dirs;
 483}
 484
 485sub add_placeholder_file {
 486        my ($self, $dir) = @_;
 487        my $path = "$dir/$_placeholder_filename";
 488        my $gpath = $self->git_path($path);
 489
 490        my $fh = $::_repository->temp_acquire($gpath);
 491        my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
 492        Git::temp_release($fh, 1);
 493        $self->{gii}->update('100644', $hash, $gpath) or croak $!;
 494
 495        # The directory should no longer be considered empty.
 496        delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
 497
 498        # Keep track of any placeholder files we create.
 499        $added_placeholder{$dir} = $path;
 500}
 501
 502sub stash_placeholder_list {
 503        my ($self) = @_;
 504        my $k = "svn-remote.$repo_id.added-placeholder";
 505        my $v = eval { command_oneline('config', '--get-all', $k) };
 506        command_noisy('config', '--unset-all', $k) if $v;
 507        foreach (values %added_placeholder) {
 508                command_noisy('config', '--add', $k, $_);
 509        }
 510}
 511
 5121;
 513__END__
 514
 515Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
 516
 517=head1 SYNOPSIS
 518
 519    use SVN::Core;
 520    use SVN::Ra;
 521    use Git::SVN;
 522    use Git::SVN::Fetcher;
 523    use Git;
 524
 525    my $gs = Git::SVN->find_by_url($url);
 526    my $ra = SVN::Ra->new(url => $url);
 527    my $editor = Git::SVN::Fetcher->new($gs);
 528    my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
 529                                  1, $editor);
 530    $reporter->set_path('', $old_rev, 0);
 531    $reporter->finish_report;
 532    my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
 533
 534    foreach my $path (keys %{$editor->{dir_prop}) {
 535        my $props = $editor->{dir_prop}{$path};
 536        foreach my $prop (keys %$props) {
 537            print "property $prop at $path changed to $props->{$prop}\n";
 538        }
 539    }
 540    foreach my $path (keys %{$editor->{empty}) {
 541        my $action = $editor->{empty}{$path} ? 'added' : 'removed';
 542        print "empty directory $path $action\n";
 543    }
 544    foreach my $path (keys %{$editor->{file_prop}) { ... }
 545    foreach my $parent (keys %{$editor->{absent_dir}}) {
 546        my @children = @{$editor->{abstent_dir}{$parent}};
 547        print "cannot fetch directory $parent/$_: not authorized?\n"
 548            foreach @children;
 549    }
 550    foreach my $parent (keys %{$editor->{absent_file}) { ... }
 551
 552=head1 DESCRIPTION
 553
 554This is a subclass of C<SVN::Delta::Editor>, which means it implements
 555callbacks to act as a consumer of Subversion tree deltas.  This
 556particular implementation of those callbacks is meant to store
 557information about the resulting content which B<git svn fetch> could
 558use to populate new commits and new entries for F<unhandled.log>.
 559More specifically:
 560
 561=over
 562
 563=item * Additions, removals, and modifications of files are propagated
 564to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
 565B<git update-index>.
 566
 567=item * Changes in Subversion path properties are recorded in the
 568C<dir_prop> and C<file_prop> fields (which are hashes).
 569
 570=item * Addition and removal of empty directories are indicated by
 571entries with value 1 and 0 respectively in the C<empty> hash.
 572
 573=item * Paths that are present but cannot be conveyed (presumably due
 574to permissions) are recorded in the C<absent_file> and
 575C<absent_dirs> hashes.  For each key, the corresponding value is
 576a list of paths under that directory that were present but
 577could not be conveyed.
 578
 579=back
 580
 581The interface is unstable.  Do not use this module unless you are
 582developing git-svn.
 583
 584=head1 DEPENDENCIES
 585
 586L<SVN::Delta> from the Subversion perl bindings,
 587the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
 588and git's L<Git> helper module.
 589
 590C<Git::SVN::Fetcher> has not been tested using callers other than
 591B<git-svn> itself.
 592
 593=head1 SEE ALSO
 594
 595L<SVN::Delta>,
 596L<Git::SVN::Editor>.
 597
 598=head1 INCOMPATIBILITIES
 599
 600None reported.
 601
 602=head1 BUGS
 603
 604None.