perl / Git / SVN / Fetcher.pmon commit git-svn: allow git-svn fetching to work using serf (8ac251b)
   1package Git::SVN::Fetcher;
   2use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
   3            $_placeholder_filename @deleted_gpath %added_placeholder
   4            $repo_id/;
   5use strict;
   6use warnings;
   7use SVN::Delta;
   8use Carp qw/croak/;
   9use File::Basename qw/dirname/;
  10use IO::File qw//;
  11use Git qw/command command_oneline command_noisy command_output_pipe
  12           command_input_pipe command_close_pipe
  13           command_bidi_pipe command_close_bidi_pipe/;
  14BEGIN {
  15        @ISA = qw(SVN::Delta::Editor);
  16}
  17
  18# file baton members: path, mode_a, mode_b, pool, fh, blob, base
  19sub new {
  20        my ($class, $git_svn, $switch_path) = @_;
  21        my $self = SVN::Delta::Editor->new;
  22        bless $self, $class;
  23        if (exists $git_svn->{last_commit}) {
  24                $self->{c} = $git_svn->{last_commit};
  25                $self->{empty_symlinks} =
  26                                  _mark_empty_symlinks($git_svn, $switch_path);
  27        }
  28
  29        # some options are read globally, but can be overridden locally
  30        # per [svn-remote "..."] section.  Command-line options will *NOT*
  31        # override options set in an [svn-remote "..."] section
  32        $repo_id = $git_svn->{repo_id};
  33        my $k = "svn-remote.$repo_id.ignore-paths";
  34        my $v = eval { command_oneline('config', '--get', $k) };
  35        $self->{ignore_regex} = $v;
  36
  37        $k = "svn-remote.$repo_id.include-paths";
  38        $v = eval { command_oneline('config', '--get', $k) };
  39        $self->{include_regex} = $v;
  40
  41        $k = "svn-remote.$repo_id.preserve-empty-dirs";
  42        $v = eval { command_oneline('config', '--get', '--bool', $k) };
  43        if ($v && $v eq 'true') {
  44                $_preserve_empty_dirs = 1;
  45                $k = "svn-remote.$repo_id.placeholder-filename";
  46                $v = eval { command_oneline('config', '--get', $k) };
  47                $_placeholder_filename = $v;
  48        }
  49
  50        # Load the list of placeholder files added during previous invocations.
  51        $k = "svn-remote.$repo_id.added-placeholder";
  52        $v = eval { command_oneline('config', '--get-all', $k) };
  53        if ($_preserve_empty_dirs && $v) {
  54                # command() prints errors to stderr, so we only call it if
  55                # command_oneline() succeeded.
  56                my @v = command('config', '--get-all', $k);
  57                $added_placeholder{ dirname($_) } = $_ foreach @v;
  58        }
  59
  60        $self->{empty} = {};
  61        $self->{dir_prop} = {};
  62        $self->{file_prop} = {};
  63        $self->{absent_dir} = {};
  64        $self->{absent_file} = {};
  65        require Git::IndexInfo;
  66        $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  67        $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  68        $self;
  69}
  70
  71# this uses the Ra object, so it must be called before do_{switch,update},
  72# not inside them (when the Git::SVN::Fetcher object is passed) to
  73# do_{switch,update}
  74sub _mark_empty_symlinks {
  75        my ($git_svn, $switch_path) = @_;
  76        my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  77        return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  78
  79        my %ret;
  80        my ($rev, $cmt) = $git_svn->last_rev_commit;
  81        return {} unless ($rev && $cmt);
  82
  83        # allow the warning to be printed for each revision we fetch to
  84        # ensure the user sees it.  The user can also disable the workaround
  85        # on the repository even while git svn is running and the next
  86        # revision fetched will skip this expensive function.
  87        my $printed_warning;
  88        chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  89        my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  90        local $/ = "\0";
  91        my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
  92        $pfx .= '/' if length($pfx);
  93        while (<$ls>) {
  94                chomp;
  95                s/\A100644 blob $empty_blob\t//o or next;
  96                unless ($printed_warning) {
  97                        print STDERR "Scanning for empty symlinks, ",
  98                                     "this may take a while if you have ",
  99                                     "many empty files\n",
 100                                     "You may disable this with `",
 101                                     "git config svn.brokenSymlinkWorkaround ",
 102                                     "false'.\n",
 103                                     "This may be done in a different ",
 104                                     "terminal without restarting ",
 105                                     "git svn\n";
 106                        $printed_warning = 1;
 107                }
 108                my $path = $_;
 109                my (undef, $props) =
 110                               $git_svn->ra->get_file($pfx.$path, $rev, undef);
 111                if ($props->{'svn:special'}) {
 112                        $ret{$path} = 1;
 113                }
 114        }
 115        command_close_pipe($ls, $ctx);
 116        \%ret;
 117}
 118
 119# returns true if a given path is inside a ".git" directory
 120sub in_dot_git {
 121        $_[0] =~ m{(?:^|/)\.git(?:/|$)};
 122}
 123
 124# return value: 0 -- don't ignore, 1 -- ignore
 125# This will also check whether the path is explicitly included
 126sub is_path_ignored {
 127        my ($self, $path) = @_;
 128        return 1 if in_dot_git($path);
 129        return 1 if defined($self->{ignore_regex}) &&
 130                    $path =~ m!$self->{ignore_regex}!;
 131        return 0 if defined($self->{include_regex}) &&
 132                    $path =~ m!$self->{include_regex}!;
 133        return 0 if defined($_include_regex) &&
 134                    $path =~ m!$_include_regex!;
 135        return 1 if defined($self->{include_regex});
 136        return 1 if defined($_include_regex);
 137        return 0 unless defined($_ignore_regex);
 138        return 1 if $path =~ m!$_ignore_regex!o;
 139        return 0;
 140}
 141
 142sub set_path_strip {
 143        my ($self, $path) = @_;
 144        $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
 145}
 146
 147sub open_root {
 148        { path => '' };
 149}
 150
 151sub open_directory {
 152        my ($self, $path, $pb, $rev) = @_;
 153        { path => $path };
 154}
 155
 156sub git_path {
 157        my ($self, $path) = @_;
 158        if (my $enc = $self->{pathnameencoding}) {
 159                require Encode;
 160                Encode::from_to($path, 'UTF-8', $enc);
 161        }
 162        if ($self->{path_strip}) {
 163                $path =~ s!$self->{path_strip}!! or
 164                  die "Failed to strip path '$path' ($self->{path_strip})\n";
 165        }
 166        $path;
 167}
 168
 169sub delete_entry {
 170        my ($self, $path, $rev, $pb) = @_;
 171        return undef if $self->is_path_ignored($path);
 172
 173        my $gpath = $self->git_path($path);
 174        return undef if ($gpath eq '');
 175
 176        # remove entire directories.
 177        my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
 178                         =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
 179        if ($tree) {
 180                my ($ls, $ctx) = command_output_pipe(qw/ls-tree
 181                                                     -r --name-only -z/,
 182                                                     $tree);
 183                local $/ = "\0";
 184                while (<$ls>) {
 185                        chomp;
 186                        my $rmpath = "$gpath/$_";
 187                        $self->{gii}->remove($rmpath);
 188                        print "\tD\t$rmpath\n" unless $::_q;
 189                }
 190                print "\tD\t$gpath/\n" unless $::_q;
 191                command_close_pipe($ls, $ctx);
 192        } else {
 193                $self->{gii}->remove($gpath);
 194                print "\tD\t$gpath\n" unless $::_q;
 195        }
 196        # Don't add to @deleted_gpath if we're deleting a placeholder file.
 197        push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
 198        $self->{empty}->{$path} = 0;
 199        undef;
 200}
 201
 202sub open_file {
 203        my ($self, $path, $pb, $rev) = @_;
 204        my ($mode, $blob);
 205
 206        goto out if $self->is_path_ignored($path);
 207
 208        my $gpath = $self->git_path($path);
 209        ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
 210                             =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
 211        unless (defined $mode && defined $blob) {
 212                die "$path was not found in commit $self->{c} (r$rev)\n";
 213        }
 214        if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
 215                $mode = '120000';
 216        }
 217out:
 218        { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
 219          pool => SVN::Pool->new, action => 'M' };
 220}
 221
 222sub add_file {
 223        my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
 224        my $mode;
 225
 226        if (!$self->is_path_ignored($path)) {
 227                my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 228                delete $self->{empty}->{$dir};
 229                $mode = '100644';
 230
 231                if ($added_placeholder{$dir}) {
 232                        # Remove our placeholder file, if we created one.
 233                        delete_entry($self, $added_placeholder{$dir})
 234                                unless $path eq $added_placeholder{$dir};
 235                        delete $added_placeholder{$dir}
 236                }
 237        }
 238
 239        { path => $path, mode_a => $mode, mode_b => $mode,
 240          pool => SVN::Pool->new, action => 'A' };
 241}
 242
 243sub add_directory {
 244        my ($self, $path, $cp_path, $cp_rev) = @_;
 245        goto out if $self->is_path_ignored($path);
 246        my $gpath = $self->git_path($path);
 247        if ($gpath eq '') {
 248                my ($ls, $ctx) = command_output_pipe(qw/ls-tree
 249                                                     -r --name-only -z/,
 250                                                     $self->{c});
 251                local $/ = "\0";
 252                while (<$ls>) {
 253                        chomp;
 254                        $self->{gii}->remove($_);
 255                        print "\tD\t$_\n" unless $::_q;
 256                        push @deleted_gpath, $gpath;
 257                }
 258                command_close_pipe($ls, $ctx);
 259                $self->{empty}->{$path} = 0;
 260        }
 261        my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 262        delete $self->{empty}->{$dir};
 263        $self->{empty}->{$path} = 1;
 264
 265        if ($added_placeholder{$dir}) {
 266                # Remove our placeholder file, if we created one.
 267                delete_entry($self, $added_placeholder{$dir});
 268                delete $added_placeholder{$dir}
 269        }
 270
 271out:
 272        { path => $path };
 273}
 274
 275sub change_dir_prop {
 276        my ($self, $db, $prop, $value) = @_;
 277        return undef if $self->is_path_ignored($db->{path});
 278        $self->{dir_prop}->{$db->{path}} ||= {};
 279        $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
 280        undef;
 281}
 282
 283sub absent_directory {
 284        my ($self, $path, $pb) = @_;
 285        return undef if $self->is_path_ignored($path);
 286        $self->{absent_dir}->{$pb->{path}} ||= [];
 287        push @{$self->{absent_dir}->{$pb->{path}}}, $path;
 288        undef;
 289}
 290
 291sub absent_file {
 292        my ($self, $path, $pb) = @_;
 293        return undef if $self->is_path_ignored($path);
 294        $self->{absent_file}->{$pb->{path}} ||= [];
 295        push @{$self->{absent_file}->{$pb->{path}}}, $path;
 296        undef;
 297}
 298
 299sub change_file_prop {
 300        my ($self, $fb, $prop, $value) = @_;
 301        return undef if $self->is_path_ignored($fb->{path});
 302        if ($prop eq 'svn:executable') {
 303                if ($fb->{mode_b} != 120000) {
 304                        $fb->{mode_b} = defined $value ? 100755 : 100644;
 305                }
 306        } elsif ($prop eq 'svn:special') {
 307                $fb->{mode_b} = defined $value ? 120000 : 100644;
 308        } else {
 309                $self->{file_prop}->{$fb->{path}} ||= {};
 310                $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
 311        }
 312        undef;
 313}
 314
 315sub apply_textdelta {
 316        my ($self, $fb, $exp) = @_;
 317        return undef if $self->is_path_ignored($fb->{path});
 318        my $suffix = 0;
 319        ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix");
 320        my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix");
 321        # $fh gets auto-closed() by SVN::TxDelta::apply(),
 322        # (but $base does not,) so dup() it for reading in close_file
 323        open my $dup, '<&', $fh or croak $!;
 324        my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix");
 325
 326        if ($fb->{blob}) {
 327                my ($base_is_link, $size);
 328
 329                if ($fb->{mode_a} eq '120000' &&
 330                    ! $self->{empty_symlinks}->{$fb->{path}}) {
 331                        print $base 'link ' or die "print $!\n";
 332                        $base_is_link = 1;
 333                }
 334        retry:
 335                $size = $::_repository->cat_blob($fb->{blob}, $base);
 336                die "Failed to read object $fb->{blob}" if ($size < 0);
 337
 338                if (defined $exp) {
 339                        seek $base, 0, 0 or croak $!;
 340                        my $got = ::md5sum($base);
 341                        if ($got ne $exp) {
 342                                my $err = "Checksum mismatch: ".
 343                                       "$fb->{path} $fb->{blob}\n" .
 344                                       "expected: $exp\n" .
 345                                       "     got: $got\n";
 346                                if ($base_is_link) {
 347                                        warn $err,
 348                                             "Retrying... (possibly ",
 349                                             "a bad symlink from SVN)\n";
 350                                        $::_repository->temp_reset($base);
 351                                        $base_is_link = 0;
 352                                        goto retry;
 353                                }
 354                                die $err;
 355                        }
 356                }
 357        }
 358        seek $base, 0, 0 or croak $!;
 359        $fb->{fh} = $fh;
 360        $fb->{base} = $base;
 361        [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
 362}
 363
 364sub close_file {
 365        my ($self, $fb, $exp) = @_;
 366        return undef if $self->is_path_ignored($fb->{path});
 367
 368        my $hash;
 369        my $path = $self->git_path($fb->{path});
 370        if (my $fh = $fb->{fh}) {
 371                if (defined $exp) {
 372                        seek($fh, 0, 0) or croak $!;
 373                        my $got = ::md5sum($fh);
 374                        if ($got ne $exp) {
 375                                die "Checksum mismatch: $path\n",
 376                                    "expected: $exp\n    got: $got\n";
 377                        }
 378                }
 379                if ($fb->{mode_b} == 120000) {
 380                        sysseek($fh, 0, 0) or croak $!;
 381                        my $rd = sysread($fh, my $buf, 5);
 382
 383                        if (!defined $rd) {
 384                                croak "sysread: $!\n";
 385                        } elsif ($rd == 0) {
 386                                warn "$path has mode 120000",
 387                                     " but it points to nothing\n",
 388                                     "converting to an empty file with mode",
 389                                     " 100644\n";
 390                                $fb->{mode_b} = '100644';
 391                        } elsif ($buf ne 'link ') {
 392                                warn "$path has mode 120000",
 393                                     " but is not a link\n";
 394                        } else {
 395                                my $tmp_fh = $::_repository->temp_acquire(
 396                                        'svn_hash');
 397                                my $res;
 398                                while ($res = sysread($fh, my $str, 1024)) {
 399                                        my $out = syswrite($tmp_fh, $str, $res);
 400                                        defined($out) && $out == $res
 401                                                or croak("write ",
 402                                                        Git::temp_path($tmp_fh),
 403                                                        ": $!\n");
 404                                }
 405                                defined $res or croak $!;
 406
 407                                ($fh, $tmp_fh) = ($tmp_fh, $fh);
 408                                Git::temp_release($tmp_fh, 1);
 409                        }
 410                }
 411
 412                $hash = $::_repository->hash_and_insert_object(
 413                                Git::temp_path($fh));
 414                $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 415
 416                Git::temp_release($fb->{base}, 1);
 417                Git::temp_release($fh, 1);
 418        } else {
 419                $hash = $fb->{blob} or die "no blob information\n";
 420        }
 421        $fb->{pool}->clear;
 422        $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
 423        print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
 424        undef;
 425}
 426
 427sub abort_edit {
 428        my $self = shift;
 429        $self->{nr} = $self->{gii}->{nr};
 430        delete $self->{gii};
 431        $self->SUPER::abort_edit(@_);
 432}
 433
 434sub close_edit {
 435        my $self = shift;
 436
 437        if ($_preserve_empty_dirs) {
 438                my @empty_dirs;
 439
 440                # Any entry flagged as empty that also has an associated
 441                # dir_prop represents a newly created empty directory.
 442                foreach my $i (keys %{$self->{empty}}) {
 443                        push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
 444                }
 445
 446                # Search for directories that have become empty due subsequent
 447                # file deletes.
 448                push @empty_dirs, $self->find_empty_directories();
 449
 450                # Finally, add a placeholder file to each empty directory.
 451                $self->add_placeholder_file($_) foreach (@empty_dirs);
 452
 453                $self->stash_placeholder_list();
 454        }
 455
 456        $self->{git_commit_ok} = 1;
 457        $self->{nr} = $self->{gii}->{nr};
 458        delete $self->{gii};
 459        $self->SUPER::close_edit(@_);
 460}
 461
 462sub find_empty_directories {
 463        my ($self) = @_;
 464        my @empty_dirs;
 465        my %dirs = map { dirname($_) => 1 } @deleted_gpath;
 466
 467        foreach my $dir (sort keys %dirs) {
 468                next if $dir eq ".";
 469
 470                # If there have been any additions to this directory, there is
 471                # no reason to check if it is empty.
 472                my $skip_added = 0;
 473                foreach my $t (qw/dir_prop file_prop/) {
 474                        foreach my $path (keys %{ $self->{$t} }) {
 475                                if (exists $self->{$t}->{dirname($path)}) {
 476                                        $skip_added = 1;
 477                                        last;
 478                                }
 479                        }
 480                        last if $skip_added;
 481                }
 482                next if $skip_added;
 483
 484                # Use `git ls-tree` to get the filenames of this directory
 485                # that existed prior to this particular commit.
 486                my $ls = command('ls-tree', '-z', '--name-only',
 487                                 $self->{c}, "$dir/");
 488                my %files = map { $_ => 1 } split(/\0/, $ls);
 489
 490                # Remove the filenames that were deleted during this commit.
 491                delete $files{$_} foreach (@deleted_gpath);
 492
 493                # Report the directory if there are no filenames left.
 494                push @empty_dirs, $dir unless (scalar %files);
 495        }
 496        @empty_dirs;
 497}
 498
 499sub add_placeholder_file {
 500        my ($self, $dir) = @_;
 501        my $path = "$dir/$_placeholder_filename";
 502        my $gpath = $self->git_path($path);
 503
 504        my $fh = $::_repository->temp_acquire($gpath);
 505        my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
 506        Git::temp_release($fh, 1);
 507        $self->{gii}->update('100644', $hash, $gpath) or croak $!;
 508
 509        # The directory should no longer be considered empty.
 510        delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
 511
 512        # Keep track of any placeholder files we create.
 513        $added_placeholder{$dir} = $path;
 514}
 515
 516sub stash_placeholder_list {
 517        my ($self) = @_;
 518        my $k = "svn-remote.$repo_id.added-placeholder";
 519        my $v = eval { command_oneline('config', '--get-all', $k) };
 520        command_noisy('config', '--unset-all', $k) if $v;
 521        foreach (values %added_placeholder) {
 522                command_noisy('config', '--add', $k, $_);
 523        }
 524}
 525
 5261;
 527__END__
 528
 529=head1 NAME
 530
 531Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
 532
 533=head1 SYNOPSIS
 534
 535    use SVN::Core;
 536    use SVN::Ra;
 537    use Git::SVN;
 538    use Git::SVN::Fetcher;
 539    use Git;
 540
 541    my $gs = Git::SVN->find_by_url($url);
 542    my $ra = SVN::Ra->new(url => $url);
 543    my $editor = Git::SVN::Fetcher->new($gs);
 544    my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
 545                                  1, $editor);
 546    $reporter->set_path('', $old_rev, 0);
 547    $reporter->finish_report;
 548    my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
 549
 550    foreach my $path (keys %{$editor->{dir_prop}) {
 551        my $props = $editor->{dir_prop}{$path};
 552        foreach my $prop (keys %$props) {
 553            print "property $prop at $path changed to $props->{$prop}\n";
 554        }
 555    }
 556    foreach my $path (keys %{$editor->{empty}) {
 557        my $action = $editor->{empty}{$path} ? 'added' : 'removed';
 558        print "empty directory $path $action\n";
 559    }
 560    foreach my $path (keys %{$editor->{file_prop}) { ... }
 561    foreach my $parent (keys %{$editor->{absent_dir}}) {
 562        my @children = @{$editor->{abstent_dir}{$parent}};
 563        print "cannot fetch directory $parent/$_: not authorized?\n"
 564            foreach @children;
 565    }
 566    foreach my $parent (keys %{$editor->{absent_file}) { ... }
 567
 568=head1 DESCRIPTION
 569
 570This is a subclass of C<SVN::Delta::Editor>, which means it implements
 571callbacks to act as a consumer of Subversion tree deltas.  This
 572particular implementation of those callbacks is meant to store
 573information about the resulting content which B<git svn fetch> could
 574use to populate new commits and new entries for F<unhandled.log>.
 575More specifically:
 576
 577=over
 578
 579=item * Additions, removals, and modifications of files are propagated
 580to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
 581B<git update-index>.
 582
 583=item * Changes in Subversion path properties are recorded in the
 584C<dir_prop> and C<file_prop> fields (which are hashes).
 585
 586=item * Addition and removal of empty directories are indicated by
 587entries with value 1 and 0 respectively in the C<empty> hash.
 588
 589=item * Paths that are present but cannot be conveyed (presumably due
 590to permissions) are recorded in the C<absent_file> and
 591C<absent_dirs> hashes.  For each key, the corresponding value is
 592a list of paths under that directory that were present but
 593could not be conveyed.
 594
 595=back
 596
 597The interface is unstable.  Do not use this module unless you are
 598developing git-svn.
 599
 600=head1 DEPENDENCIES
 601
 602L<SVN::Delta> from the Subversion perl bindings,
 603the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
 604and git's L<Git> helper module.
 605
 606C<Git::SVN::Fetcher> has not been tested using callers other than
 607B<git-svn> itself.
 608
 609=head1 SEE ALSO
 610
 611L<SVN::Delta>,
 612L<Git::SVN::Editor>.
 613
 614=head1 INCOMPATIBILITIES
 615
 616None reported.
 617
 618=head1 BUGS
 619
 620None.