perl / Git / SVN / Fetcher.pmon commit git-svn: added an --include-path flag (a7b1023)
   1package Git::SVN::Fetcher;
   2use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
   3            $_placeholder_filename @deleted_gpath %added_placeholder
   4            $repo_id/;
   5use strict;
   6use warnings;
   7use SVN::Delta;
   8use Carp qw/croak/;
   9use File::Basename qw/dirname/;
  10use IO::File qw//;
  11use Git qw/command command_oneline command_noisy command_output_pipe
  12           command_input_pipe command_close_pipe
  13           command_bidi_pipe command_close_bidi_pipe/;
  14BEGIN {
  15        @ISA = qw(SVN::Delta::Editor);
  16}
  17
  18# file baton members: path, mode_a, mode_b, pool, fh, blob, base
  19sub new {
  20        my ($class, $git_svn, $switch_path) = @_;
  21        my $self = SVN::Delta::Editor->new;
  22        bless $self, $class;
  23        if (exists $git_svn->{last_commit}) {
  24                $self->{c} = $git_svn->{last_commit};
  25                $self->{empty_symlinks} =
  26                                  _mark_empty_symlinks($git_svn, $switch_path);
  27        }
  28
  29        # some options are read globally, but can be overridden locally
  30        # per [svn-remote "..."] section.  Command-line options will *NOT*
  31        # override options set in an [svn-remote "..."] section
  32        $repo_id = $git_svn->{repo_id};
  33        my $k = "svn-remote.$repo_id.ignore-paths";
  34        my $v = eval { command_oneline('config', '--get', $k) };
  35        $self->{ignore_regex} = $v;
  36
  37        $k = "svn-remote.$repo_id.include-paths";
  38        $v = eval { command_oneline('config', '--get', $k) };
  39        $self->{include_regex} = $v;
  40
  41        $k = "svn-remote.$repo_id.preserve-empty-dirs";
  42        $v = eval { command_oneline('config', '--get', '--bool', $k) };
  43        if ($v && $v eq 'true') {
  44                $_preserve_empty_dirs = 1;
  45                $k = "svn-remote.$repo_id.placeholder-filename";
  46                $v = eval { command_oneline('config', '--get', $k) };
  47                $_placeholder_filename = $v;
  48        }
  49
  50        # Load the list of placeholder files added during previous invocations.
  51        $k = "svn-remote.$repo_id.added-placeholder";
  52        $v = eval { command_oneline('config', '--get-all', $k) };
  53        if ($_preserve_empty_dirs && $v) {
  54                # command() prints errors to stderr, so we only call it if
  55                # command_oneline() succeeded.
  56                my @v = command('config', '--get-all', $k);
  57                $added_placeholder{ dirname($_) } = $_ foreach @v;
  58        }
  59
  60        $self->{empty} = {};
  61        $self->{dir_prop} = {};
  62        $self->{file_prop} = {};
  63        $self->{absent_dir} = {};
  64        $self->{absent_file} = {};
  65        require Git::IndexInfo;
  66        $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
  67        $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
  68        $self;
  69}
  70
  71# this uses the Ra object, so it must be called before do_{switch,update},
  72# not inside them (when the Git::SVN::Fetcher object is passed) to
  73# do_{switch,update}
  74sub _mark_empty_symlinks {
  75        my ($git_svn, $switch_path) = @_;
  76        my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
  77        return {} if (!defined($bool)) || (defined($bool) && ! $bool);
  78
  79        my %ret;
  80        my ($rev, $cmt) = $git_svn->last_rev_commit;
  81        return {} unless ($rev && $cmt);
  82
  83        # allow the warning to be printed for each revision we fetch to
  84        # ensure the user sees it.  The user can also disable the workaround
  85        # on the repository even while git svn is running and the next
  86        # revision fetched will skip this expensive function.
  87        my $printed_warning;
  88        chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
  89        my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
  90        local $/ = "\0";
  91        my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
  92        $pfx .= '/' if length($pfx);
  93        while (<$ls>) {
  94                chomp;
  95                s/\A100644 blob $empty_blob\t//o or next;
  96                unless ($printed_warning) {
  97                        print STDERR "Scanning for empty symlinks, ",
  98                                     "this may take a while if you have ",
  99                                     "many empty files\n",
 100                                     "You may disable this with `",
 101                                     "git config svn.brokenSymlinkWorkaround ",
 102                                     "false'.\n",
 103                                     "This may be done in a different ",
 104                                     "terminal without restarting ",
 105                                     "git svn\n";
 106                        $printed_warning = 1;
 107                }
 108                my $path = $_;
 109                my (undef, $props) =
 110                               $git_svn->ra->get_file($pfx.$path, $rev, undef);
 111                if ($props->{'svn:special'}) {
 112                        $ret{$path} = 1;
 113                }
 114        }
 115        command_close_pipe($ls, $ctx);
 116        \%ret;
 117}
 118
 119# returns true if a given path is inside a ".git" directory
 120sub in_dot_git {
 121        $_[0] =~ m{(?:^|/)\.git(?:/|$)};
 122}
 123
 124# return value: 0 -- don't ignore, 1 -- ignore
 125# This will also check whether the path is explicitly included
 126sub is_path_ignored {
 127        my ($self, $path) = @_;
 128        return 1 if in_dot_git($path);
 129        return 1 if defined($self->{ignore_regex}) &&
 130                    $path =~ m!$self->{ignore_regex}!;
 131        return 0 if defined($self->{include_regex}) &&
 132                    $path =~ m!$self->{include_regex}!;
 133        return 0 if defined($_include_regex) &&
 134                    $path =~ m!$_include_regex!;
 135        return 1 if defined($self->{include_regex});
 136        return 1 if defined($_include_regex);
 137        return 0 unless defined($_ignore_regex);
 138        return 1 if $path =~ m!$_ignore_regex!o;
 139        return 0;
 140}
 141
 142sub set_path_strip {
 143        my ($self, $path) = @_;
 144        $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
 145}
 146
 147sub open_root {
 148        { path => '' };
 149}
 150
 151sub open_directory {
 152        my ($self, $path, $pb, $rev) = @_;
 153        { path => $path };
 154}
 155
 156sub git_path {
 157        my ($self, $path) = @_;
 158        if (my $enc = $self->{pathnameencoding}) {
 159                require Encode;
 160                Encode::from_to($path, 'UTF-8', $enc);
 161        }
 162        if ($self->{path_strip}) {
 163                $path =~ s!$self->{path_strip}!! or
 164                  die "Failed to strip path '$path' ($self->{path_strip})\n";
 165        }
 166        $path;
 167}
 168
 169sub delete_entry {
 170        my ($self, $path, $rev, $pb) = @_;
 171        return undef if $self->is_path_ignored($path);
 172
 173        my $gpath = $self->git_path($path);
 174        return undef if ($gpath eq '');
 175
 176        # remove entire directories.
 177        my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
 178                         =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
 179        if ($tree) {
 180                my ($ls, $ctx) = command_output_pipe(qw/ls-tree
 181                                                     -r --name-only -z/,
 182                                                     $tree);
 183                local $/ = "\0";
 184                while (<$ls>) {
 185                        chomp;
 186                        my $rmpath = "$gpath/$_";
 187                        $self->{gii}->remove($rmpath);
 188                        print "\tD\t$rmpath\n" unless $::_q;
 189                }
 190                print "\tD\t$gpath/\n" unless $::_q;
 191                command_close_pipe($ls, $ctx);
 192        } else {
 193                $self->{gii}->remove($gpath);
 194                print "\tD\t$gpath\n" unless $::_q;
 195        }
 196        # Don't add to @deleted_gpath if we're deleting a placeholder file.
 197        push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
 198        $self->{empty}->{$path} = 0;
 199        undef;
 200}
 201
 202sub open_file {
 203        my ($self, $path, $pb, $rev) = @_;
 204        my ($mode, $blob);
 205
 206        goto out if $self->is_path_ignored($path);
 207
 208        my $gpath = $self->git_path($path);
 209        ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
 210                             =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
 211        unless (defined $mode && defined $blob) {
 212                die "$path was not found in commit $self->{c} (r$rev)\n";
 213        }
 214        if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
 215                $mode = '120000';
 216        }
 217out:
 218        { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
 219          pool => SVN::Pool->new, action => 'M' };
 220}
 221
 222sub add_file {
 223        my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
 224        my $mode;
 225
 226        if (!$self->is_path_ignored($path)) {
 227                my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 228                delete $self->{empty}->{$dir};
 229                $mode = '100644';
 230
 231                if ($added_placeholder{$dir}) {
 232                        # Remove our placeholder file, if we created one.
 233                        delete_entry($self, $added_placeholder{$dir})
 234                                unless $path eq $added_placeholder{$dir};
 235                        delete $added_placeholder{$dir}
 236                }
 237        }
 238
 239        { path => $path, mode_a => $mode, mode_b => $mode,
 240          pool => SVN::Pool->new, action => 'A' };
 241}
 242
 243sub add_directory {
 244        my ($self, $path, $cp_path, $cp_rev) = @_;
 245        goto out if $self->is_path_ignored($path);
 246        my $gpath = $self->git_path($path);
 247        if ($gpath eq '') {
 248                my ($ls, $ctx) = command_output_pipe(qw/ls-tree
 249                                                     -r --name-only -z/,
 250                                                     $self->{c});
 251                local $/ = "\0";
 252                while (<$ls>) {
 253                        chomp;
 254                        $self->{gii}->remove($_);
 255                        print "\tD\t$_\n" unless $::_q;
 256                        push @deleted_gpath, $gpath;
 257                }
 258                command_close_pipe($ls, $ctx);
 259                $self->{empty}->{$path} = 0;
 260        }
 261        my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 262        delete $self->{empty}->{$dir};
 263        $self->{empty}->{$path} = 1;
 264
 265        if ($added_placeholder{$dir}) {
 266                # Remove our placeholder file, if we created one.
 267                delete_entry($self, $added_placeholder{$dir});
 268                delete $added_placeholder{$dir}
 269        }
 270
 271out:
 272        { path => $path };
 273}
 274
 275sub change_dir_prop {
 276        my ($self, $db, $prop, $value) = @_;
 277        return undef if $self->is_path_ignored($db->{path});
 278        $self->{dir_prop}->{$db->{path}} ||= {};
 279        $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
 280        undef;
 281}
 282
 283sub absent_directory {
 284        my ($self, $path, $pb) = @_;
 285        return undef if $self->is_path_ignored($path);
 286        $self->{absent_dir}->{$pb->{path}} ||= [];
 287        push @{$self->{absent_dir}->{$pb->{path}}}, $path;
 288        undef;
 289}
 290
 291sub absent_file {
 292        my ($self, $path, $pb) = @_;
 293        return undef if $self->is_path_ignored($path);
 294        $self->{absent_file}->{$pb->{path}} ||= [];
 295        push @{$self->{absent_file}->{$pb->{path}}}, $path;
 296        undef;
 297}
 298
 299sub change_file_prop {
 300        my ($self, $fb, $prop, $value) = @_;
 301        return undef if $self->is_path_ignored($fb->{path});
 302        if ($prop eq 'svn:executable') {
 303                if ($fb->{mode_b} != 120000) {
 304                        $fb->{mode_b} = defined $value ? 100755 : 100644;
 305                }
 306        } elsif ($prop eq 'svn:special') {
 307                $fb->{mode_b} = defined $value ? 120000 : 100644;
 308        } else {
 309                $self->{file_prop}->{$fb->{path}} ||= {};
 310                $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
 311        }
 312        undef;
 313}
 314
 315sub apply_textdelta {
 316        my ($self, $fb, $exp) = @_;
 317        return undef if $self->is_path_ignored($fb->{path});
 318        my $fh = $::_repository->temp_acquire('svn_delta');
 319        # $fh gets auto-closed() by SVN::TxDelta::apply(),
 320        # (but $base does not,) so dup() it for reading in close_file
 321        open my $dup, '<&', $fh or croak $!;
 322        my $base = $::_repository->temp_acquire('git_blob');
 323
 324        if ($fb->{blob}) {
 325                my ($base_is_link, $size);
 326
 327                if ($fb->{mode_a} eq '120000' &&
 328                    ! $self->{empty_symlinks}->{$fb->{path}}) {
 329                        print $base 'link ' or die "print $!\n";
 330                        $base_is_link = 1;
 331                }
 332        retry:
 333                $size = $::_repository->cat_blob($fb->{blob}, $base);
 334                die "Failed to read object $fb->{blob}" if ($size < 0);
 335
 336                if (defined $exp) {
 337                        seek $base, 0, 0 or croak $!;
 338                        my $got = ::md5sum($base);
 339                        if ($got ne $exp) {
 340                                my $err = "Checksum mismatch: ".
 341                                       "$fb->{path} $fb->{blob}\n" .
 342                                       "expected: $exp\n" .
 343                                       "     got: $got\n";
 344                                if ($base_is_link) {
 345                                        warn $err,
 346                                             "Retrying... (possibly ",
 347                                             "a bad symlink from SVN)\n";
 348                                        $::_repository->temp_reset($base);
 349                                        $base_is_link = 0;
 350                                        goto retry;
 351                                }
 352                                die $err;
 353                        }
 354                }
 355        }
 356        seek $base, 0, 0 or croak $!;
 357        $fb->{fh} = $fh;
 358        $fb->{base} = $base;
 359        [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
 360}
 361
 362sub close_file {
 363        my ($self, $fb, $exp) = @_;
 364        return undef if $self->is_path_ignored($fb->{path});
 365
 366        my $hash;
 367        my $path = $self->git_path($fb->{path});
 368        if (my $fh = $fb->{fh}) {
 369                if (defined $exp) {
 370                        seek($fh, 0, 0) or croak $!;
 371                        my $got = ::md5sum($fh);
 372                        if ($got ne $exp) {
 373                                die "Checksum mismatch: $path\n",
 374                                    "expected: $exp\n    got: $got\n";
 375                        }
 376                }
 377                if ($fb->{mode_b} == 120000) {
 378                        sysseek($fh, 0, 0) or croak $!;
 379                        my $rd = sysread($fh, my $buf, 5);
 380
 381                        if (!defined $rd) {
 382                                croak "sysread: $!\n";
 383                        } elsif ($rd == 0) {
 384                                warn "$path has mode 120000",
 385                                     " but it points to nothing\n",
 386                                     "converting to an empty file with mode",
 387                                     " 100644\n";
 388                                $fb->{mode_b} = '100644';
 389                        } elsif ($buf ne 'link ') {
 390                                warn "$path has mode 120000",
 391                                     " but is not a link\n";
 392                        } else {
 393                                my $tmp_fh = $::_repository->temp_acquire(
 394                                        'svn_hash');
 395                                my $res;
 396                                while ($res = sysread($fh, my $str, 1024)) {
 397                                        my $out = syswrite($tmp_fh, $str, $res);
 398                                        defined($out) && $out == $res
 399                                                or croak("write ",
 400                                                        Git::temp_path($tmp_fh),
 401                                                        ": $!\n");
 402                                }
 403                                defined $res or croak $!;
 404
 405                                ($fh, $tmp_fh) = ($tmp_fh, $fh);
 406                                Git::temp_release($tmp_fh, 1);
 407                        }
 408                }
 409
 410                $hash = $::_repository->hash_and_insert_object(
 411                                Git::temp_path($fh));
 412                $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 413
 414                Git::temp_release($fb->{base}, 1);
 415                Git::temp_release($fh, 1);
 416        } else {
 417                $hash = $fb->{blob} or die "no blob information\n";
 418        }
 419        $fb->{pool}->clear;
 420        $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
 421        print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
 422        undef;
 423}
 424
 425sub abort_edit {
 426        my $self = shift;
 427        $self->{nr} = $self->{gii}->{nr};
 428        delete $self->{gii};
 429        $self->SUPER::abort_edit(@_);
 430}
 431
 432sub close_edit {
 433        my $self = shift;
 434
 435        if ($_preserve_empty_dirs) {
 436                my @empty_dirs;
 437
 438                # Any entry flagged as empty that also has an associated
 439                # dir_prop represents a newly created empty directory.
 440                foreach my $i (keys %{$self->{empty}}) {
 441                        push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
 442                }
 443
 444                # Search for directories that have become empty due subsequent
 445                # file deletes.
 446                push @empty_dirs, $self->find_empty_directories();
 447
 448                # Finally, add a placeholder file to each empty directory.
 449                $self->add_placeholder_file($_) foreach (@empty_dirs);
 450
 451                $self->stash_placeholder_list();
 452        }
 453
 454        $self->{git_commit_ok} = 1;
 455        $self->{nr} = $self->{gii}->{nr};
 456        delete $self->{gii};
 457        $self->SUPER::close_edit(@_);
 458}
 459
 460sub find_empty_directories {
 461        my ($self) = @_;
 462        my @empty_dirs;
 463        my %dirs = map { dirname($_) => 1 } @deleted_gpath;
 464
 465        foreach my $dir (sort keys %dirs) {
 466                next if $dir eq ".";
 467
 468                # If there have been any additions to this directory, there is
 469                # no reason to check if it is empty.
 470                my $skip_added = 0;
 471                foreach my $t (qw/dir_prop file_prop/) {
 472                        foreach my $path (keys %{ $self->{$t} }) {
 473                                if (exists $self->{$t}->{dirname($path)}) {
 474                                        $skip_added = 1;
 475                                        last;
 476                                }
 477                        }
 478                        last if $skip_added;
 479                }
 480                next if $skip_added;
 481
 482                # Use `git ls-tree` to get the filenames of this directory
 483                # that existed prior to this particular commit.
 484                my $ls = command('ls-tree', '-z', '--name-only',
 485                                 $self->{c}, "$dir/");
 486                my %files = map { $_ => 1 } split(/\0/, $ls);
 487
 488                # Remove the filenames that were deleted during this commit.
 489                delete $files{$_} foreach (@deleted_gpath);
 490
 491                # Report the directory if there are no filenames left.
 492                push @empty_dirs, $dir unless (scalar %files);
 493        }
 494        @empty_dirs;
 495}
 496
 497sub add_placeholder_file {
 498        my ($self, $dir) = @_;
 499        my $path = "$dir/$_placeholder_filename";
 500        my $gpath = $self->git_path($path);
 501
 502        my $fh = $::_repository->temp_acquire($gpath);
 503        my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
 504        Git::temp_release($fh, 1);
 505        $self->{gii}->update('100644', $hash, $gpath) or croak $!;
 506
 507        # The directory should no longer be considered empty.
 508        delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
 509
 510        # Keep track of any placeholder files we create.
 511        $added_placeholder{$dir} = $path;
 512}
 513
 514sub stash_placeholder_list {
 515        my ($self) = @_;
 516        my $k = "svn-remote.$repo_id.added-placeholder";
 517        my $v = eval { command_oneline('config', '--get-all', $k) };
 518        command_noisy('config', '--unset-all', $k) if $v;
 519        foreach (values %added_placeholder) {
 520                command_noisy('config', '--add', $k, $_);
 521        }
 522}
 523
 5241;
 525__END__
 526
 527=head1 NAME
 528
 529Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
 530
 531=head1 SYNOPSIS
 532
 533    use SVN::Core;
 534    use SVN::Ra;
 535    use Git::SVN;
 536    use Git::SVN::Fetcher;
 537    use Git;
 538
 539    my $gs = Git::SVN->find_by_url($url);
 540    my $ra = SVN::Ra->new(url => $url);
 541    my $editor = Git::SVN::Fetcher->new($gs);
 542    my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
 543                                  1, $editor);
 544    $reporter->set_path('', $old_rev, 0);
 545    $reporter->finish_report;
 546    my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
 547
 548    foreach my $path (keys %{$editor->{dir_prop}) {
 549        my $props = $editor->{dir_prop}{$path};
 550        foreach my $prop (keys %$props) {
 551            print "property $prop at $path changed to $props->{$prop}\n";
 552        }
 553    }
 554    foreach my $path (keys %{$editor->{empty}) {
 555        my $action = $editor->{empty}{$path} ? 'added' : 'removed';
 556        print "empty directory $path $action\n";
 557    }
 558    foreach my $path (keys %{$editor->{file_prop}) { ... }
 559    foreach my $parent (keys %{$editor->{absent_dir}}) {
 560        my @children = @{$editor->{abstent_dir}{$parent}};
 561        print "cannot fetch directory $parent/$_: not authorized?\n"
 562            foreach @children;
 563    }
 564    foreach my $parent (keys %{$editor->{absent_file}) { ... }
 565
 566=head1 DESCRIPTION
 567
 568This is a subclass of C<SVN::Delta::Editor>, which means it implements
 569callbacks to act as a consumer of Subversion tree deltas.  This
 570particular implementation of those callbacks is meant to store
 571information about the resulting content which B<git svn fetch> could
 572use to populate new commits and new entries for F<unhandled.log>.
 573More specifically:
 574
 575=over
 576
 577=item * Additions, removals, and modifications of files are propagated
 578to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
 579B<git update-index>.
 580
 581=item * Changes in Subversion path properties are recorded in the
 582C<dir_prop> and C<file_prop> fields (which are hashes).
 583
 584=item * Addition and removal of empty directories are indicated by
 585entries with value 1 and 0 respectively in the C<empty> hash.
 586
 587=item * Paths that are present but cannot be conveyed (presumably due
 588to permissions) are recorded in the C<absent_file> and
 589C<absent_dirs> hashes.  For each key, the corresponding value is
 590a list of paths under that directory that were present but
 591could not be conveyed.
 592
 593=back
 594
 595The interface is unstable.  Do not use this module unless you are
 596developing git-svn.
 597
 598=head1 DEPENDENCIES
 599
 600L<SVN::Delta> from the Subversion perl bindings,
 601the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
 602and git's L<Git> helper module.
 603
 604C<Git::SVN::Fetcher> has not been tested using callers other than
 605B<git-svn> itself.
 606
 607=head1 SEE ALSO
 608
 609L<SVN::Delta>,
 610L<Git::SVN::Editor>.
 611
 612=head1 INCOMPATIBILITIES
 613
 614None reported.
 615
 616=head1 BUGS
 617
 618None.