contrib / fast-import / import-directories.perlon commit sha1_file: allow map_sha1_file_1 to handle arbitrary repositories (1fea63e)
   1#!/usr/bin/perl
   2#
   3# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
   4#
   5# ------------------------------------------------------------------------
   6#
   7# This program is free software; you can redistribute it and/or modify
   8# it under the terms of the GNU General Public License as published by
   9# the Free Software Foundation.
  10#
  11# This program is distributed in the hope that it will be useful,
  12# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14# GNU General Public License for more details.
  15#
  16# You should have received a copy of the GNU General Public License
  17# along with this program; if not, see <http://www.gnu.org/licenses/>.
  18#
  19# ------------------------------------------------------------------------
  20
  21=pod
  22
  23=head1 NAME
  24
  25import-directories - Import bits and pieces to Git.
  26
  27=head1 SYNOPSIS
  28
  29B<import-directories.perl> F<configfile> F<outputfile>
  30
  31=head1 DESCRIPTION
  32
  33Script to import arbitrary projects version controlled by the "copy the
  34source directory to a new location and edit it there"-version controlled
  35projects into version control. Handles projects with arbitrary branching
  36and version trees, taking a file describing the inputs and generating a
  37file compatible with the L<git-fast-import(1)> format.
  38
  39=head1 CONFIGURATION FILE
  40
  41=head2 Format
  42
  43The configuration file is based on the standard I<.ini> format.
  44
  45 ; Comments start with semi-colons
  46 [section]
  47 key=value
  48
  49Please see below for information on how to escape special characters.
  50
  51=head2 Global configuration
  52
  53Global configuration is done in the B<[config]> section, which should be
  54the first section in the file. Configuration can be changed by
  55repeating configuration sections later on.
  56
  57 [config]
  58 ; configure conversion of CRLFs. "convert" means that all CRLFs
  59 ; should be converted into LFs (suitable for the core.autocrlf
  60 ; setting set to true in Git). "none" means that all data is
  61 ; treated as binary.
  62 crlf=convert
  63
  64=head2 Revision configuration
  65
  66Each revision that is to be imported is described in three
  67sections. Revisions should be defined in topological order, so
  68that a revision's parent has always been defined when a new revision
  69is introduced. All the sections for one revision must be defined
  70before defining the next revision.
  71
  72Each revision is assigned a unique numerical identifier. The
  73numbers do not need to be consecutive, nor monotonically
  74increasing.
  75
  76For instance, if your configuration file contains only the two
  77revisions 4711 and 42, where 4711 is the initial commit, the
  78only requirement is that 4711 is completely defined before 42.
  79
  80=pod
  81
  82=head3 Revision description section
  83
  84A section whose section name is just an integer gives meta-data
  85about the revision.
  86
  87 [3]
  88 ; author sets the author of the revisions
  89 author=Peter Krefting <peter@softwolves.pp.se>
  90 ; branch sets the branch that the revision should be committed to
  91 branch=master
  92 ; parent describes the revision that is the parent of this commit
  93 ; (optional)
  94 parent=1
  95 ; merges describes a revision that is merged into this commit
  96 ; (optional; can be repeated)
  97 merges=2
  98 ; selects one file to take the timestamp from
  99 ; (optional; if unspecified, the most recent file from the .files
 100 ;  section is used)
 101 timestamp=3/source.c
 102
 103=head3 Revision contents section
 104
 105A section whose section name is an integer followed by B<.files>
 106describe all the files included in this revision. If a file that
 107was available previously is not included in this revision, it will
 108be removed.
 109
 110If an on-disk revision is incomplete, you can point to files from
 111a previous revision. There are no restrictions on where the source
 112files are located, nor on their names.
 113
 114 [3.files]
 115 ; the key is the path inside the repository, the value is the path
 116 ; as seen from the importer script.
 117 source.c=ver-3.00/source.c
 118 source.h=ver-2.99/source.h
 119 readme.txt=ver-3.00/introduction to the project.txt
 120
 121File names are treated as byte strings (but please see below on
 122quoting rules), and should be stored in the configuration file in
 123the encoding that should be used in the generated repository.
 124
 125=head3 Revision commit message section
 126
 127A section whose section name is an integer followed by B<.message>
 128gives the commit message. This section is read verbatim, up until
 129the beginning of the next section. As such, a commit message may not
 130contain a line that begins with an opening square bracket ("[") and
 131ends with a closing square bracket ("]"), unless they are surrounded
 132by whitespace or other characters.
 133
 134 [3.message]
 135 Implement foobar.
 136 ; trailing blank lines are ignored.
 137
 138=cut
 139
 140# Globals
 141use strict;
 142use warnings;
 143use integer;
 144my $crlfmode = 0;
 145my @revs;
 146my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
 147my $sectiontype = 0;
 148my $rev = 0;
 149my $mark = 1;
 150
 151# Check command line
 152if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
 153{
 154    exec('perldoc', $0);
 155    exit 1;
 156}
 157
 158# Open configuration
 159my $config = $ARGV[0];
 160open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
 161
 162# Open output
 163my $output = $ARGV[1];
 164open OUT, '>', $output or die "Cannot create output file \"$output\": ";
 165binmode OUT;
 166
 167LINE: while (my $line = <CFG>)
 168{
 169        $line =~ s/\r?\n$//;
 170        next LINE if $sectiontype != 4 && $line eq '';
 171        next LINE if $line =~ /^;/;
 172        my $oldsectiontype = $sectiontype;
 173        my $oldrev = $rev;
 174
 175        # Sections
 176        if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
 177        {
 178                if ($1 eq 'config')
 179                {
 180                        $sectiontype = 1;
 181                }
 182                elsif ($3 eq '')
 183                {
 184                        $sectiontype = 2;
 185                        $rev = $2;
 186                        # Create a new revision
 187                        die "Duplicate rev: $line\n " if defined $revmap{$rev};
 188                        print "Reading revision $rev\n";
 189                        push @revs, $rev;
 190                        $revmap{$rev} = $mark ++;
 191                        $time{$revmap{$rev}} = 0;
 192                }
 193                elsif ($3 eq '.files')
 194                {
 195                        $sectiontype = 3;
 196                        $rev = $2;
 197                        die "Revision mismatch: $line\n " unless $rev == $oldrev;
 198                }
 199                elsif ($3 eq '.message')
 200                {
 201                        $sectiontype = 4;
 202                        $rev = $2;
 203                        die "Revision mismatch: $line\n " unless $rev == $oldrev;
 204                }
 205                else
 206                {
 207                        die "Internal parse error: $line\n ";
 208                }
 209                next LINE;
 210        }
 211
 212        # Parse data
 213        if ($sectiontype != 4)
 214        {
 215                # Key and value
 216                if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
 217                {
 218                        my ($key, $value) = &parsekeyvaluepair($1);
 219                        # Global configuration
 220                        if (1 == $sectiontype)
 221                        {
 222                                if ($key eq 'crlf')
 223                                {
 224                                        $crlfmode = 1, next LINE if $value eq 'convert';
 225                                        $crlfmode = 0, next LINE if $value eq 'none';
 226                                }
 227                                die "Unknown configuration option: $line\n ";
 228                        }
 229                        # Revision specification
 230                        if (2 == $sectiontype)
 231                        {
 232                                my $current = $revmap{$rev};
 233                                $author{$current} = $value, next LINE if $key eq 'author';
 234                                $branch{$current} = $value, next LINE if $key eq 'branch';
 235                                $parent{$current} = $value, next LINE if $key eq 'parent';
 236                                $timesource{$current} = $value, next LINE if $key eq 'timestamp';
 237                                push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
 238                                die "Unknown revision option: $line\n ";
 239                        }
 240                        # Filespecs
 241                        if (3 == $sectiontype)
 242                        {
 243                                # Add the file and create a marker
 244                                die "File not found: $line\n " unless -f $value;
 245                                my $current = $revmap{$rev};
 246                                ${$files{$current}}{$key} = $mark;
 247                                my $time = &fileblob($value, $crlfmode, $mark ++);
 248
 249                                # Update revision timestamp if more recent than other
 250                                # files seen, or if this is the file we have selected
 251                                # to take the time stamp from using the "timestamp"
 252                                # directive.
 253                                if ((defined $timesource{$current} && $timesource{$current} eq $value)
 254                                    || $time > $time{$current})
 255                                {
 256                                        $time{$current} = $time;
 257                                }
 258                        }
 259                }
 260                else
 261                {
 262                        die "Parse error: $line\n ";
 263                }
 264        }
 265        else
 266        {
 267                # Commit message
 268                my $current = $revmap{$rev};
 269                if (defined $message{$current})
 270                {
 271                        $message{$current} .= "\n";
 272                }
 273                $message{$current} .= $line;
 274        }
 275}
 276close CFG;
 277
 278# Start spewing out data for git-fast-import
 279foreach my $commit (@revs)
 280{
 281        # Progress
 282        print OUT "progress Creating revision $commit\n";
 283
 284        # Create commit header
 285        my $mark = $revmap{$commit};
 286
 287        # Branch and commit id
 288        print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
 289
 290        # Author and timestamp
 291        die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
 292        print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
 293
 294        # Commit message
 295        die "No message defined for $commit\n" unless defined $message{$mark};
 296        my $message = $message{$mark};
 297        $message =~ s/\n$//; # Kill trailing empty line
 298        print OUT "data ", length($message), "\n", $message, "\n";
 299
 300        # Parent and any merges
 301        print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
 302        if (defined $merges{$mark})
 303        {
 304                foreach my $merge (@{$merges{$mark}})
 305                {
 306                        print OUT "merge :", $revmap{$merge}, "\n";
 307                }
 308        }
 309
 310        # Output file marks
 311        print OUT "deleteall\n"; # start from scratch
 312        foreach my $file (sort keys %{$files{$mark}})
 313        {
 314                print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
 315        }
 316        print OUT "\n";
 317}
 318
 319# Create one file blob
 320sub fileblob
 321{
 322        my ($filename, $crlfmode, $mark) = @_;
 323
 324        # Import the file
 325        print OUT "progress Importing $filename\nblob\nmark :$mark\n";
 326        open FILE, '<', $filename or die "Cannot read $filename\n ";
 327        binmode FILE;
 328        my ($size, $mtime) = (stat(FILE))[7,9];
 329        my $file;
 330        read FILE, $file, $size;
 331        close FILE;
 332        $file =~ s/\r\n/\n/g if $crlfmode;
 333        print OUT "data ", length($file), "\n", $file, "\n";
 334
 335        return $mtime;
 336}
 337
 338# Parse a key=value pair
 339sub parsekeyvaluepair
 340{
 341=pod
 342
 343=head2 Escaping special characters
 344
 345Key and value strings may be enclosed in quotes, in which case
 346whitespace inside the quotes is preserved. Additionally, an equal
 347sign may be included in the key by preceding it with a backslash.
 348For example:
 349
 350 "key1 "=value1
 351 key2=" value2"
 352 key\=3=value3
 353 key4=value=4
 354 "key5""=value5
 355
 356Here the first key is "key1 " (note the trailing white-space) and the
 357second value is " value2" (note the leading white-space). The third
 358key contains an equal sign "key=3" and so does the fourth value, which
 359does not need to be escaped. The fifth key contains a trailing quote,
 360which does not need to be escaped since it is inside a surrounding
 361quote.
 362
 363=cut
 364        my $pair = shift;
 365
 366        # Separate key and value by the first non-quoted equal sign
 367        my ($key, $value);
 368        if ($pair =~ /^(.*[^\\])=(.*)$/)
 369        {
 370                ($key, $value) = ($1, $2)
 371        }
 372        else
 373        {
 374                die "Parse error: $pair\n ";
 375        }
 376
 377        # Unquote and unescape the key and value separately
 378        return (&unescape($key), &unescape($value));
 379}
 380
 381# Unquote and unescape
 382sub unescape
 383{
 384        my $string = shift;
 385
 386        # First remove enclosing quotes. Backslash before the trailing
 387        # quote leaves both.
 388        if ($string =~ /^"(.*[^\\])"$/)
 389        {
 390                $string = $1;
 391        }
 392
 393        # Second remove any backslashes inside the unquoted string.
 394        # For later: Handle special sequences like \t ?
 395        $string =~ s/\\(.)/$1/g;
 396
 397        return $string;
 398}
 399
 400__END__
 401
 402=pod
 403
 404=head1 EXAMPLES
 405
 406B<import-directories.perl> F<project.import>
 407
 408=head1 AUTHOR
 409
 410Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
 411
 412This program is free software; you can redistribute it and/or modify
 413it under the terms of the GNU General Public License as published by
 414the Free Software Foundation.
 415
 416=cut