contrib / fast-import / import-directories.perlon commit index-pack: rationalize unpack_entry_data() (7ce4721)
   1#!/usr/bin/perl -w
   2#
   3# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
   4#
   5# ------------------------------------------------------------------------
   6#
   7# This program is free software; you can redistribute it and/or modify
   8# it under the terms of the GNU General Public License as published by
   9# the Free Software Foundation.
  10#
  11# This program is distributed in the hope that it will be useful,
  12# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14# GNU General Public License for more details.
  15#
  16# You should have received a copy of the GNU General Public License
  17# along with this program; if not, write to the Free Software
  18# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19#
  20# ------------------------------------------------------------------------
  21
  22=pod
  23
  24=head1 NAME
  25
  26import-directories - Import bits and pieces to Git.
  27
  28=head1 SYNOPSIS
  29
  30B<import-directories.perl> F<configfile> F<outputfile>
  31
  32=head1 DESCRIPTION
  33
  34Script to import arbitrary projects version controlled by the "copy the
  35source directory to a new location and edit it there"-version controlled
  36projects into version control. Handles projects with arbitrary branching
  37and version trees, taking a file describing the inputs and generating a
  38file compatible with the L<git-fast-import(1)> format.
  39
  40=head1 CONFIGURATION FILE
  41
  42=head2 Format
  43
  44The configuration file is based on the standard I<.ini> format.
  45
  46 ; Comments start with semi-colons
  47 [section]
  48 key=value
  49
  50Please see below for information on how to escape special characters.
  51
  52=head2 Global configuration
  53
  54Global configuration is done in the B<[config]> section, which should be
  55the first section in the file. Configuration can be changed by
  56repeating configuration sections later on.
  57
  58 [config]
  59 ; configure conversion of CRLFs. "convert" means that all CRLFs
  60 ; should be converted into LFs (suitable for the core.autocrlf
  61 ; setting set to true in Git). "none" means that all data is
  62 ; treated as binary.
  63 crlf=convert
  64
  65=head2 Revision configuration
  66
  67Each revision that is to be imported is described in three
  68sections. Revisions should be defined in topological order, so
  69that a revision's parent has always been defined when a new revision
  70is introduced. All the sections for one revision must be defined
  71before defining the next revision.
  72
  73Each revision is assigned a unique numerical identifier. The
  74numbers do not need to be consecutive, nor monotonically
  75increasing.
  76
  77For instance, if your configuration file contains only the two
  78revisions 4711 and 42, where 4711 is the initial commit, the
  79only requirement is that 4711 is completely defined before 42.
  80
  81=pod
  82
  83=head3 Revision description section
  84
  85A section whose section name is just an integer gives meta-data
  86about the revision.
  87
  88 [3]
  89 ; author sets the author of the revisions
  90 author=Peter Krefting <peter@softwolves.pp.se>
  91 ; branch sets the branch that the revision should be committed to
  92 branch=master
  93 ; parent describes the revision that is the parent of this commit
  94 ; (optional)
  95 parent=1
  96 ; merges describes a revision that is merged into this commit
  97 ; (optional; can be repeated)
  98 merges=2
  99 ; selects one file to take the timestamp from
 100 ; (optional; if unspecified, the most recent file from the .files
 101 ;  section is used)
 102 timestamp=3/source.c
 103
 104=head3 Revision contents section
 105
 106A section whose section name is an integer followed by B<.files>
 107describe all the files included in this revision. If a file that
 108was available previously is not included in this revision, it will
 109be removed.
 110
 111If an on-disk revision is incomplete, you can point to files from
 112a previous revision. There are no restriction as to where the source
 113files are located, nor to the names of them.
 114
 115 [3.files]
 116 ; the key is the path inside the repository, the value is the path
 117 ; as seen from the importer script.
 118 source.c=ver-3.00/source.c
 119 source.h=ver-2.99/source.h
 120 readme.txt=ver-3.00/introduction to the project.txt
 121
 122File names are treated as byte strings (but please see below on
 123quoting rules), and should be stored in the configuration file in
 124the encoding that should be used in the generated repository.
 125
 126=head3 Revision commit message section
 127
 128A section whose section name is an integer followed by B<.message>
 129gives the commit message. This section is read verbatim, up until
 130the beginning of the next section. As such, a commit message may not
 131contain a line that begins with an opening square bracket ("[") and
 132ends with a closing square bracket ("]"), unless they are surrounded
 133by whitespace or other characters.
 134
 135 [3.message]
 136 Implement foobar.
 137 ; trailing blank lines are ignored.
 138
 139=cut
 140
 141# Globals
 142use strict;
 143use integer;
 144my $crlfmode = 0;
 145my @revs;
 146my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
 147my $sectiontype = 0;
 148my $rev = 0;
 149my $mark = 1;
 150
 151# Check command line
 152if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
 153{
 154    exec('perldoc', $0);
 155    exit 1;
 156}
 157
 158# Open configuration
 159my $config = $ARGV[0];
 160open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
 161
 162# Open output
 163my $output = $ARGV[1];
 164open OUT, '>', $output or die "Cannot create output file \"$output\": ";
 165binmode OUT;
 166
 167LINE: while (my $line = <CFG>)
 168{
 169        $line =~ s/\r?\n$//;
 170        next LINE if $sectiontype != 4 && $line eq '';
 171        next LINE if $line =~ /^;/;
 172        my $oldsectiontype = $sectiontype;
 173        my $oldrev = $rev;
 174
 175        # Sections
 176        if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
 177        {
 178                if ($1 eq 'config')
 179                {
 180                        $sectiontype = 1;
 181                }
 182                elsif ($3 eq '')
 183                {
 184                        $sectiontype = 2;
 185                        $rev = $2;
 186                        # Create a new revision
 187                        die "Duplicate rev: $line\n " if defined $revmap{$rev};
 188                        print "Reading revision $rev\n";
 189                        push @revs, $rev;
 190                        $revmap{$rev} = $mark ++;
 191                        $time{$revmap{$rev}} = 0;
 192                }
 193                elsif ($3 eq '.files')
 194                {
 195                        $sectiontype = 3;
 196                        $rev = $2;
 197                        die "Revision mismatch: $line\n " unless $rev == $oldrev;
 198                }
 199                elsif ($3 eq '.message')
 200                {
 201                        $sectiontype = 4;
 202                        $rev = $2;
 203                        die "Revision mismatch: $line\n " unless $rev == $oldrev;
 204                }
 205                else
 206                {
 207                        die "Internal parse error: $line\n ";
 208                }
 209                next LINE;
 210        }
 211
 212        # Parse data
 213        if ($sectiontype != 4)
 214        {
 215                # Key and value
 216                if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
 217                {
 218                        my ($key, $value) = &parsekeyvaluepair($1);
 219                        # Global configuration
 220                        if (1 == $sectiontype)
 221                        {
 222                                if ($key eq 'crlf')
 223                                {
 224                                        $crlfmode = 1, next LINE if $value eq 'convert';
 225                                        $crlfmode = 0, next LINE if $value eq 'none';
 226                                }
 227                                die "Unknown configuration option: $line\n ";
 228                        }
 229                        # Revision specification
 230                        if (2 == $sectiontype)
 231                        {
 232                                my $current = $revmap{$rev};
 233                                $author{$current} = $value, next LINE if $key eq 'author';
 234                                $branch{$current} = $value, next LINE if $key eq 'branch';
 235                                $parent{$current} = $value, next LINE if $key eq 'parent';
 236                                $timesource{$current} = $value, next LINE if $key eq 'timestamp';
 237                                push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
 238                                die "Unknown revision option: $line\n ";
 239                        }
 240                        # Filespecs
 241                        if (3 == $sectiontype)
 242                        {
 243                                # Add the file and create a marker
 244                                die "File not found: $line\n " unless -f $value;
 245                                my $current = $revmap{$rev};
 246                                ${$files{$current}}{$key} = $mark;
 247                                my $time = &fileblob($value, $crlfmode, $mark ++);
 248
 249                                # Update revision timestamp if more recent than other
 250                                # files seen, or if this is the file we have selected
 251                                # to take the time stamp from using the "timestamp"
 252                                # directive.
 253                                if ((defined $timesource{$current} && $timesource{$current} eq $value)
 254                                    || $time > $time{$current})
 255                                {
 256                                        $time{$current} = $time;
 257                                }
 258                        }
 259                }
 260                else
 261                {
 262                        die "Parse error: $line\n ";
 263                }
 264        }
 265        else
 266        {
 267                # Commit message
 268                my $current = $revmap{$rev};
 269                if (defined $message{$current})
 270                {
 271                        $message{$current} .= "\n";
 272                }
 273                $message{$current} .= $line;
 274        }
 275}
 276close CFG;
 277
 278# Start spewing out data for git-fast-import
 279foreach my $commit (@revs)
 280{
 281        # Progress
 282        print OUT "progress Creating revision $commit\n";
 283
 284        # Create commit header
 285        my $mark = $revmap{$commit};
 286
 287        # Branch and commit id
 288        print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
 289
 290        # Author and timestamp
 291        die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
 292        print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
 293
 294        # Commit message
 295        die "No message defined for $commit\n" unless defined $message{$mark};
 296        my $message = $message{$mark};
 297        $message =~ s/\n$//; # Kill trailing empty line
 298        print OUT "data ", length($message), "\n", $message, "\n";
 299
 300        # Parent and any merges
 301        print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
 302        if (defined $merges{$mark})
 303        {
 304                foreach my $merge (@{$merges{$mark}})
 305                {
 306                        print OUT "merge :", $revmap{$merge}, "\n";
 307                }
 308        }
 309
 310        # Output file marks
 311        print OUT "deleteall\n"; # start from scratch
 312        foreach my $file (sort keys %{$files{$mark}})
 313        {
 314                print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
 315        }
 316        print OUT "\n";
 317}
 318
 319# Create one file blob
 320sub fileblob
 321{
 322        my ($filename, $crlfmode, $mark) = @_;
 323
 324        # Import the file
 325        print OUT "progress Importing $filename\nblob\nmark :$mark\n";
 326        open FILE, '<', $filename or die "Cannot read $filename\n ";
 327        binmode FILE;
 328        my ($size, $mtime) = (stat(FILE))[7,9];
 329        my $file;
 330        read FILE, $file, $size;
 331        close FILE;
 332        $file =~ s/\r\n/\n/g if $crlfmode;
 333        print OUT "data ", length($file), "\n", $file, "\n";
 334
 335        return $mtime;
 336}
 337
 338# Parse a key=value pair
 339sub parsekeyvaluepair
 340{
 341=pod
 342
 343=head2 Escaping special characters
 344
 345Key and value strings may be enclosed in quotes, in which case
 346whitespace inside the quotes is preserved. Additionally, an equal
 347sign may be included in the key by preceeding it with a backslash.
 348For example:
 349
 350 "key1 "=value1
 351 key2=" value2"
 352 key\=3=value3
 353 key4=value=4
 354 "key5""=value5
 355
 356Here the first key is "key1 " (note the trailing white-space) and the
 357second value is " value2" (note the leading white-space). The third
 358key contains an equal sign "key=3" and so does the fourth value, which
 359does not need to be escaped. The fifth key contains a trailing quote,
 360which does not need to be escaped since it is inside a surrounding
 361quote.
 362
 363=cut
 364        my $pair = shift;
 365
 366        # Separate key and value by the first non-quoted equal sign
 367        my ($key, $value);
 368        if ($pair =~ /^(.*[^\\])=(.*)$/)
 369        {
 370                ($key, $value) = ($1, $2)
 371        }
 372        else
 373        {
 374                die "Parse error: $pair\n ";
 375        }
 376
 377        # Unquote and unescape the key and value separately
 378        return (&unescape($key), &unescape($value));
 379}
 380
 381# Unquote and unescape
 382sub unescape
 383{
 384        my $string = shift;
 385
 386        # First remove enclosing quotes. Backslash before the trailing
 387        # quote leaves both.
 388        if ($string =~ /^"(.*[^\\])"$/)
 389        {
 390                $string = $1;
 391        }
 392
 393        # Second remove any backslashes inside the unquoted string.
 394        # For later: Handle special sequences like \t ?
 395        $string =~ s/\\(.)/$1/g;
 396
 397        return $string;
 398}
 399
 400__END__
 401
 402=pod
 403
 404=head1 EXAMPLES
 405
 406B<import-directories.perl> F<project.import>
 407
 408=head1 AUTHOR
 409
 410Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
 411
 412This program is free software; you can redistribute it and/or modify
 413it under the terms of the GNU General Public License as published by
 414the Free Software Foundation.
 415
 416=cut