1#!/usr/bin/perl 2# 3# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se> 4# 5# ------------------------------------------------------------------------ 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, write to the Free Software 18# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19# 20# ------------------------------------------------------------------------ 21 22=pod 23 24=head1 NAME 25 26import-directories - Import bits and pieces to Git. 27 28=head1 SYNOPSIS 29 30B<import-directories.perl> F<configfile> F<outputfile> 31 32=head1 DESCRIPTION 33 34Script to import arbitrary projects version controlled by the "copy the 35source directory to a new location and edit it there"-version controlled 36projects into version control. Handles projects with arbitrary branching 37and version trees, taking a file describing the inputs and generating a 38file compatible with the L<git-fast-import(1)> format. 39 40=head1 CONFIGURATION FILE 41 42=head2 Format 43 44The configuration file is based on the standard I<.ini> format. 45 46 ; Comments start with semi-colons 47 [section] 48 key=value 49 50Please see below for information on how to escape special characters. 51 52=head2 Global configuration 53 54Global configuration is done in the B<[config]> section, which should be 55the first section in the file. Configuration can be changed by 56repeating configuration sections later on. 57 58 [config] 59 ; configure conversion of CRLFs. "convert" means that all CRLFs 60 ; should be converted into LFs (suitable for the core.autocrlf 61 ; setting set to true in Git). "none" means that all data is 62 ; treated as binary. 63 crlf=convert 64 65=head2 Revision configuration 66 67Each revision that is to be imported is described in three 68sections. Revisions should be defined in topological order, so 69that a revision's parent has always been defined when a new revision 70is introduced. All the sections for one revision must be defined 71before defining the next revision. 72 73Each revision is assigned a unique numerical identifier. The 74numbers do not need to be consecutive, nor monotonically 75increasing. 76 77For instance, if your configuration file contains only the two 78revisions 4711 and 42, where 4711 is the initial commit, the 79only requirement is that 4711 is completely defined before 42. 80 81=pod 82 83=head3 Revision description section 84 85A section whose section name is just an integer gives meta-data 86about the revision. 87 88 [3] 89 ; author sets the author of the revisions 90 author=Peter Krefting <peter@softwolves.pp.se> 91 ; branch sets the branch that the revision should be committed to 92 branch=master 93 ; parent describes the revision that is the parent of this commit 94 ; (optional) 95 parent=1 96 ; merges describes a revision that is merged into this commit 97 ; (optional; can be repeated) 98 merges=2 99 ; selects one file to take the timestamp from 100 ; (optional; if unspecified, the most recent file from the .files 101 ; section is used) 102 timestamp=3/source.c 103 104=head3 Revision contents section 105 106A section whose section name is an integer followed by B<.files> 107describe all the files included in this revision. If a file that 108was available previously is not included in this revision, it will 109be removed. 110 111If an on-disk revision is incomplete, you can point to files from 112a previous revision. There are no restriction as to where the source 113files are located, nor to the names of them. 114 115 [3.files] 116 ; the key is the path inside the repository, the value is the path 117 ; as seen from the importer script. 118 source.c=ver-3.00/source.c 119 source.h=ver-2.99/source.h 120 readme.txt=ver-3.00/introduction to the project.txt 121 122File names are treated as byte strings (but please see below on 123quoting rules), and should be stored in the configuration file in 124the encoding that should be used in the generated repository. 125 126=head3 Revision commit message section 127 128A section whose section name is an integer followed by B<.message> 129gives the commit message. This section is read verbatim, up until 130the beginning of the next section. As such, a commit message may not 131contain a line that begins with an opening square bracket ("[") and 132ends with a closing square bracket ("]"), unless they are surrounded 133by whitespace or other characters. 134 135 [3.message] 136 Implement foobar. 137 ; trailing blank lines are ignored. 138 139=cut 140 141# Globals 142use strict; 143use warnings; 144use integer; 145my$crlfmode=0; 146my@revs; 147my(%revmap,%message,%files,%author,%branch,%parent,%merges,%time,%timesource); 148my$sectiontype=0; 149my$rev=0; 150my$mark=1; 151 152# Check command line 153if($#ARGV<1||$ARGV[0] =~/^--?h/) 154{ 155exec('perldoc',$0); 156exit1; 157} 158 159# Open configuration 160my$config=$ARGV[0]; 161open CFG,'<',$configor die"Cannot open configuration file\"$config\": "; 162 163# Open output 164my$output=$ARGV[1]; 165open OUT,'>',$outputor die"Cannot create output file\"$output\": "; 166binmode OUT; 167 168LINE:while(my$line= <CFG>) 169{ 170$line=~s/\r?\n$//; 171next LINE if$sectiontype!=4&&$lineeq''; 172next LINE if$line=~/^;/; 173my$oldsectiontype=$sectiontype; 174my$oldrev=$rev; 175 176# Sections 177if($line=~ m"^\[(config|(\d+)(|\.files|\.message))\]$") 178{ 179if($1eq'config') 180{ 181$sectiontype=1; 182} 183elsif($3eq'') 184{ 185$sectiontype=2; 186$rev=$2; 187# Create a new revision 188die"Duplicate rev:$line\n"ifdefined$revmap{$rev}; 189print"Reading revision$rev\n"; 190push@revs,$rev; 191$revmap{$rev} =$mark++; 192$time{$revmap{$rev}} =0; 193} 194elsif($3eq'.files') 195{ 196$sectiontype=3; 197$rev=$2; 198die"Revision mismatch:$line\n"unless$rev==$oldrev; 199} 200elsif($3eq'.message') 201{ 202$sectiontype=4; 203$rev=$2; 204die"Revision mismatch:$line\n"unless$rev==$oldrev; 205} 206else 207{ 208die"Internal parse error:$line\n"; 209} 210next LINE; 211} 212 213# Parse data 214if($sectiontype!=4) 215{ 216# Key and value 217if($line=~ m"^\s*([^\s].*=.*[^\s])\s*$") 218{ 219my($key,$value) = &parsekeyvaluepair($1); 220# Global configuration 221if(1==$sectiontype) 222{ 223if($keyeq'crlf') 224{ 225$crlfmode=1,next LINE if$valueeq'convert'; 226$crlfmode=0,next LINE if$valueeq'none'; 227} 228die"Unknown configuration option:$line\n"; 229} 230# Revision specification 231if(2==$sectiontype) 232{ 233my$current=$revmap{$rev}; 234$author{$current} =$value,next LINE if$keyeq'author'; 235$branch{$current} =$value,next LINE if$keyeq'branch'; 236$parent{$current} =$value,next LINE if$keyeq'parent'; 237$timesource{$current} =$value,next LINE if$keyeq'timestamp'; 238push(@{$merges{$current}},$value),next LINE if$keyeq'merges'; 239die"Unknown revision option:$line\n"; 240} 241# Filespecs 242if(3==$sectiontype) 243{ 244# Add the file and create a marker 245die"File not found:$line\n"unless-f $value; 246my$current=$revmap{$rev}; 247${$files{$current}}{$key} =$mark; 248my$time= &fileblob($value,$crlfmode,$mark++); 249 250# Update revision timestamp if more recent than other 251# files seen, or if this is the file we have selected 252# to take the time stamp from using the "timestamp" 253# directive. 254if((defined$timesource{$current} &&$timesource{$current}eq$value) 255||$time>$time{$current}) 256{ 257$time{$current} =$time; 258} 259} 260} 261else 262{ 263die"Parse error:$line\n"; 264} 265} 266else 267{ 268# Commit message 269my$current=$revmap{$rev}; 270if(defined$message{$current}) 271{ 272$message{$current} .="\n"; 273} 274$message{$current} .=$line; 275} 276} 277close CFG; 278 279# Start spewing out data for git-fast-import 280foreachmy$commit(@revs) 281{ 282# Progress 283print OUT "progress Creating revision$commit\n"; 284 285# Create commit header 286my$mark=$revmap{$commit}; 287 288# Branch and commit id 289print OUT "commit refs/heads/",$branch{$mark},"\nmark :",$mark,"\n"; 290 291# Author and timestamp 292die"No timestamp defined for$commit(no files?)\n"unlessdefined$time{$mark}; 293print OUT "committer ",$author{$mark}," ",$time{$mark}," +0100\n"; 294 295# Commit message 296die"No message defined for$commit\n"unlessdefined$message{$mark}; 297my$message=$message{$mark}; 298$message=~s/\n$//;# Kill trailing empty line 299print OUT "data ",length($message),"\n",$message,"\n"; 300 301# Parent and any merges 302print OUT "from :",$revmap{$parent{$mark}},"\n"ifdefined$parent{$mark}; 303if(defined$merges{$mark}) 304{ 305foreachmy$merge(@{$merges{$mark}}) 306{ 307print OUT "merge :",$revmap{$merge},"\n"; 308} 309} 310 311# Output file marks 312print OUT "deleteall\n";# start from scratch 313foreachmy$file(sort keys%{$files{$mark}}) 314{ 315print OUT "M 644 :", ${$files{$mark}}{$file},"$file\n"; 316} 317print OUT "\n"; 318} 319 320# Create one file blob 321sub fileblob 322{ 323my($filename,$crlfmode,$mark) =@_; 324 325# Import the file 326print OUT "progress Importing$filename\nblob\nmark :$mark\n"; 327open FILE,'<',$filenameor die"Cannot read$filename\n"; 328binmode FILE; 329my($size,$mtime) = (stat(FILE))[7,9]; 330my$file; 331read FILE,$file,$size; 332close FILE; 333$file=~s/\r\n/\n/gif$crlfmode; 334print OUT "data ",length($file),"\n",$file,"\n"; 335 336return$mtime; 337} 338 339# Parse a key=value pair 340sub parsekeyvaluepair 341{ 342=pod 343 344=head2 Escaping special characters 345 346Key and value strings may be enclosed in quotes, in which case 347whitespace inside the quotes is preserved. Additionally, an equal 348sign may be included in the key by preceding it with a backslash. 349For example: 350 351 "key1 "=value1 352 key2=" value2" 353 key\=3=value3 354 key4=value=4 355 "key5""=value5 356 357Here the first key is "key1 " (note the trailing white-space) and the 358second value is " value2" (note the leading white-space). The third 359key contains an equal sign "key=3" and so does the fourth value, which 360does not need to be escaped. The fifth key contains a trailing quote, 361which does not need to be escaped since it is inside a surrounding 362quote. 363 364=cut 365my$pair=shift; 366 367# Separate key and value by the first non-quoted equal sign 368my($key,$value); 369if($pair=~/^(.*[^\\])=(.*)$/) 370{ 371($key,$value) = ($1,$2) 372} 373else 374{ 375die"Parse error:$pair\n"; 376} 377 378# Unquote and unescape the key and value separately 379return(&unescape($key), &unescape($value)); 380} 381 382# Unquote and unescape 383sub unescape 384{ 385my$string=shift; 386 387# First remove enclosing quotes. Backslash before the trailing 388# quote leaves both. 389if($string=~/^"(.*[^\\])"$/) 390{ 391$string=$1; 392} 393 394# Second remove any backslashes inside the unquoted string. 395# For later: Handle special sequences like \t ? 396$string=~s/\\(.)/$1/g; 397 398return$string; 399} 400 401__END__ 402 403=pod 404 405=head1 EXAMPLES 406 407B<import-directories.perl> F<project.import> 408 409=head1 AUTHOR 410 411Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se> 412 413This program is free software; you can redistribute it and/or modify 414it under the terms of the GNU General Public License as published by 415the Free Software Foundation. 416 417=cut