contrib / fast-import / import-directories.perlon commit Merge branch 'rr/tests-dedup-test-config' (4806c8c)
   1#!/usr/bin/perl
   2#
   3# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
   4#
   5# ------------------------------------------------------------------------
   6#
   7# This program is free software; you can redistribute it and/or modify
   8# it under the terms of the GNU General Public License as published by
   9# the Free Software Foundation.
  10#
  11# This program is distributed in the hope that it will be useful,
  12# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14# GNU General Public License for more details.
  15#
  16# You should have received a copy of the GNU General Public License
  17# along with this program; if not, write to the Free Software
  18# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19#
  20# ------------------------------------------------------------------------
  21
  22=pod
  23
  24=head1 NAME
  25
  26import-directories - Import bits and pieces to Git.
  27
  28=head1 SYNOPSIS
  29
  30B<import-directories.perl> F<configfile> F<outputfile>
  31
  32=head1 DESCRIPTION
  33
  34Script to import arbitrary projects version controlled by the "copy the
  35source directory to a new location and edit it there"-version controlled
  36projects into version control. Handles projects with arbitrary branching
  37and version trees, taking a file describing the inputs and generating a
  38file compatible with the L<git-fast-import(1)> format.
  39
  40=head1 CONFIGURATION FILE
  41
  42=head2 Format
  43
  44The configuration file is based on the standard I<.ini> format.
  45
  46 ; Comments start with semi-colons
  47 [section]
  48 key=value
  49
  50Please see below for information on how to escape special characters.
  51
  52=head2 Global configuration
  53
  54Global configuration is done in the B<[config]> section, which should be
  55the first section in the file. Configuration can be changed by
  56repeating configuration sections later on.
  57
  58 [config]
  59 ; configure conversion of CRLFs. "convert" means that all CRLFs
  60 ; should be converted into LFs (suitable for the core.autocrlf
  61 ; setting set to true in Git). "none" means that all data is
  62 ; treated as binary.
  63 crlf=convert
  64
  65=head2 Revision configuration
  66
  67Each revision that is to be imported is described in three
  68sections. Revisions should be defined in topological order, so
  69that a revision's parent has always been defined when a new revision
  70is introduced. All the sections for one revision must be defined
  71before defining the next revision.
  72
  73Each revision is assigned a unique numerical identifier. The
  74numbers do not need to be consecutive, nor monotonically
  75increasing.
  76
  77For instance, if your configuration file contains only the two
  78revisions 4711 and 42, where 4711 is the initial commit, the
  79only requirement is that 4711 is completely defined before 42.
  80
  81=pod
  82
  83=head3 Revision description section
  84
  85A section whose section name is just an integer gives meta-data
  86about the revision.
  87
  88 [3]
  89 ; author sets the author of the revisions
  90 author=Peter Krefting <peter@softwolves.pp.se>
  91 ; branch sets the branch that the revision should be committed to
  92 branch=master
  93 ; parent describes the revision that is the parent of this commit
  94 ; (optional)
  95 parent=1
  96 ; merges describes a revision that is merged into this commit
  97 ; (optional; can be repeated)
  98 merges=2
  99 ; selects one file to take the timestamp from
 100 ; (optional; if unspecified, the most recent file from the .files
 101 ;  section is used)
 102 timestamp=3/source.c
 103
 104=head3 Revision contents section
 105
 106A section whose section name is an integer followed by B<.files>
 107describe all the files included in this revision. If a file that
 108was available previously is not included in this revision, it will
 109be removed.
 110
 111If an on-disk revision is incomplete, you can point to files from
 112a previous revision. There are no restriction as to where the source
 113files are located, nor to the names of them.
 114
 115 [3.files]
 116 ; the key is the path inside the repository, the value is the path
 117 ; as seen from the importer script.
 118 source.c=ver-3.00/source.c
 119 source.h=ver-2.99/source.h
 120 readme.txt=ver-3.00/introduction to the project.txt
 121
 122File names are treated as byte strings (but please see below on
 123quoting rules), and should be stored in the configuration file in
 124the encoding that should be used in the generated repository.
 125
 126=head3 Revision commit message section
 127
 128A section whose section name is an integer followed by B<.message>
 129gives the commit message. This section is read verbatim, up until
 130the beginning of the next section. As such, a commit message may not
 131contain a line that begins with an opening square bracket ("[") and
 132ends with a closing square bracket ("]"), unless they are surrounded
 133by whitespace or other characters.
 134
 135 [3.message]
 136 Implement foobar.
 137 ; trailing blank lines are ignored.
 138
 139=cut
 140
 141# Globals
 142use strict;
 143use warnings;
 144use integer;
 145my $crlfmode = 0;
 146my @revs;
 147my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
 148my $sectiontype = 0;
 149my $rev = 0;
 150my $mark = 1;
 151
 152# Check command line
 153if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
 154{
 155    exec('perldoc', $0);
 156    exit 1;
 157}
 158
 159# Open configuration
 160my $config = $ARGV[0];
 161open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
 162
 163# Open output
 164my $output = $ARGV[1];
 165open OUT, '>', $output or die "Cannot create output file \"$output\": ";
 166binmode OUT;
 167
 168LINE: while (my $line = <CFG>)
 169{
 170        $line =~ s/\r?\n$//;
 171        next LINE if $sectiontype != 4 && $line eq '';
 172        next LINE if $line =~ /^;/;
 173        my $oldsectiontype = $sectiontype;
 174        my $oldrev = $rev;
 175
 176        # Sections
 177        if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
 178        {
 179                if ($1 eq 'config')
 180                {
 181                        $sectiontype = 1;
 182                }
 183                elsif ($3 eq '')
 184                {
 185                        $sectiontype = 2;
 186                        $rev = $2;
 187                        # Create a new revision
 188                        die "Duplicate rev: $line\n " if defined $revmap{$rev};
 189                        print "Reading revision $rev\n";
 190                        push @revs, $rev;
 191                        $revmap{$rev} = $mark ++;
 192                        $time{$revmap{$rev}} = 0;
 193                }
 194                elsif ($3 eq '.files')
 195                {
 196                        $sectiontype = 3;
 197                        $rev = $2;
 198                        die "Revision mismatch: $line\n " unless $rev == $oldrev;
 199                }
 200                elsif ($3 eq '.message')
 201                {
 202                        $sectiontype = 4;
 203                        $rev = $2;
 204                        die "Revision mismatch: $line\n " unless $rev == $oldrev;
 205                }
 206                else
 207                {
 208                        die "Internal parse error: $line\n ";
 209                }
 210                next LINE;
 211        }
 212
 213        # Parse data
 214        if ($sectiontype != 4)
 215        {
 216                # Key and value
 217                if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
 218                {
 219                        my ($key, $value) = &parsekeyvaluepair($1);
 220                        # Global configuration
 221                        if (1 == $sectiontype)
 222                        {
 223                                if ($key eq 'crlf')
 224                                {
 225                                        $crlfmode = 1, next LINE if $value eq 'convert';
 226                                        $crlfmode = 0, next LINE if $value eq 'none';
 227                                }
 228                                die "Unknown configuration option: $line\n ";
 229                        }
 230                        # Revision specification
 231                        if (2 == $sectiontype)
 232                        {
 233                                my $current = $revmap{$rev};
 234                                $author{$current} = $value, next LINE if $key eq 'author';
 235                                $branch{$current} = $value, next LINE if $key eq 'branch';
 236                                $parent{$current} = $value, next LINE if $key eq 'parent';
 237                                $timesource{$current} = $value, next LINE if $key eq 'timestamp';
 238                                push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
 239                                die "Unknown revision option: $line\n ";
 240                        }
 241                        # Filespecs
 242                        if (3 == $sectiontype)
 243                        {
 244                                # Add the file and create a marker
 245                                die "File not found: $line\n " unless -f $value;
 246                                my $current = $revmap{$rev};
 247                                ${$files{$current}}{$key} = $mark;
 248                                my $time = &fileblob($value, $crlfmode, $mark ++);
 249
 250                                # Update revision timestamp if more recent than other
 251                                # files seen, or if this is the file we have selected
 252                                # to take the time stamp from using the "timestamp"
 253                                # directive.
 254                                if ((defined $timesource{$current} && $timesource{$current} eq $value)
 255                                    || $time > $time{$current})
 256                                {
 257                                        $time{$current} = $time;
 258                                }
 259                        }
 260                }
 261                else
 262                {
 263                        die "Parse error: $line\n ";
 264                }
 265        }
 266        else
 267        {
 268                # Commit message
 269                my $current = $revmap{$rev};
 270                if (defined $message{$current})
 271                {
 272                        $message{$current} .= "\n";
 273                }
 274                $message{$current} .= $line;
 275        }
 276}
 277close CFG;
 278
 279# Start spewing out data for git-fast-import
 280foreach my $commit (@revs)
 281{
 282        # Progress
 283        print OUT "progress Creating revision $commit\n";
 284
 285        # Create commit header
 286        my $mark = $revmap{$commit};
 287
 288        # Branch and commit id
 289        print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
 290
 291        # Author and timestamp
 292        die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
 293        print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
 294
 295        # Commit message
 296        die "No message defined for $commit\n" unless defined $message{$mark};
 297        my $message = $message{$mark};
 298        $message =~ s/\n$//; # Kill trailing empty line
 299        print OUT "data ", length($message), "\n", $message, "\n";
 300
 301        # Parent and any merges
 302        print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
 303        if (defined $merges{$mark})
 304        {
 305                foreach my $merge (@{$merges{$mark}})
 306                {
 307                        print OUT "merge :", $revmap{$merge}, "\n";
 308                }
 309        }
 310
 311        # Output file marks
 312        print OUT "deleteall\n"; # start from scratch
 313        foreach my $file (sort keys %{$files{$mark}})
 314        {
 315                print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
 316        }
 317        print OUT "\n";
 318}
 319
 320# Create one file blob
 321sub fileblob
 322{
 323        my ($filename, $crlfmode, $mark) = @_;
 324
 325        # Import the file
 326        print OUT "progress Importing $filename\nblob\nmark :$mark\n";
 327        open FILE, '<', $filename or die "Cannot read $filename\n ";
 328        binmode FILE;
 329        my ($size, $mtime) = (stat(FILE))[7,9];
 330        my $file;
 331        read FILE, $file, $size;
 332        close FILE;
 333        $file =~ s/\r\n/\n/g if $crlfmode;
 334        print OUT "data ", length($file), "\n", $file, "\n";
 335
 336        return $mtime;
 337}
 338
 339# Parse a key=value pair
 340sub parsekeyvaluepair
 341{
 342=pod
 343
 344=head2 Escaping special characters
 345
 346Key and value strings may be enclosed in quotes, in which case
 347whitespace inside the quotes is preserved. Additionally, an equal
 348sign may be included in the key by preceding it with a backslash.
 349For example:
 350
 351 "key1 "=value1
 352 key2=" value2"
 353 key\=3=value3
 354 key4=value=4
 355 "key5""=value5
 356
 357Here the first key is "key1 " (note the trailing white-space) and the
 358second value is " value2" (note the leading white-space). The third
 359key contains an equal sign "key=3" and so does the fourth value, which
 360does not need to be escaped. The fifth key contains a trailing quote,
 361which does not need to be escaped since it is inside a surrounding
 362quote.
 363
 364=cut
 365        my $pair = shift;
 366
 367        # Separate key and value by the first non-quoted equal sign
 368        my ($key, $value);
 369        if ($pair =~ /^(.*[^\\])=(.*)$/)
 370        {
 371                ($key, $value) = ($1, $2)
 372        }
 373        else
 374        {
 375                die "Parse error: $pair\n ";
 376        }
 377
 378        # Unquote and unescape the key and value separately
 379        return (&unescape($key), &unescape($value));
 380}
 381
 382# Unquote and unescape
 383sub unescape
 384{
 385        my $string = shift;
 386
 387        # First remove enclosing quotes. Backslash before the trailing
 388        # quote leaves both.
 389        if ($string =~ /^"(.*[^\\])"$/)
 390        {
 391                $string = $1;
 392        }
 393
 394        # Second remove any backslashes inside the unquoted string.
 395        # For later: Handle special sequences like \t ?
 396        $string =~ s/\\(.)/$1/g;
 397
 398        return $string;
 399}
 400
 401__END__
 402
 403=pod
 404
 405=head1 EXAMPLES
 406
 407B<import-directories.perl> F<project.import>
 408
 409=head1 AUTHOR
 410
 411Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
 412
 413This program is free software; you can redistribute it and/or modify
 414it under the terms of the GNU General Public License as published by
 415the Free Software Foundation.
 416
 417=cut