1#!/usr/bin/env python
2#
3# p4-fast-export.py
4#
5# Author: Simon Hausmann <hausmann@kde.org>
6# License: MIT <http://www.opensource.org/licenses/mit-license.php>
7#
8# TODO:
9# - support integrations (at least p4i)
10# - support p4 submit (hah!)
11# - emulate p4's delete behavior: if a directory becomes empty delete it. continue
12# with parent dir until non-empty dir is found.
13#
14import os, string, sys, time, os.path
15import marshal, popen2, getopt, sha
16from sets import Set;
17
18dataCache = False
19commandCache = False
20
21silent = False
22knownBranches = Set()
23createdBranches = Set()
24committedChanges = Set()
25branch = "refs/heads/master"
26globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
27detectBranches = False
28changesFile = ""
29if len(globalPrefix) != 0:
30 globalPrefix = globalPrefix[:-1]
31
32try:
33 opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
34 "cache", "command-cache" ])
35except getopt.GetoptError:
36 print "fixme, syntax error"
37 sys.exit(1)
38
39for o, a in opts:
40 if o == "--branch":
41 branch = "refs/heads/" + a
42 elif o == "--detect-branches":
43 detectBranches = True
44 elif o == "--changesfile":
45 changesFile = a
46 elif o == "--silent":
47 silent= True
48 elif o == "--known-branches":
49 for branch in open(a).readlines():
50 knownBranches.add(branch[:-1])
51 elif o == "--cache":
52 dataCache = True
53 commandCache = True
54 elif o == "--command-cache":
55 commandCache = True
56
57if len(args) == 0 and len(globalPrefix) != 0:
58 if not silent:
59 print "[using previously specified depot path %s]" % globalPrefix
60elif len(args) != 1:
61 print "usage: %s //depot/path[@revRange]" % sys.argv[0]
62 print "\n example:"
63 print " %s //depot/my/project/ -- to import the current head"
64 print " %s //depot/my/project/@all -- to import everything"
65 print " %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
66 print ""
67 print " (a ... is not needed in the path p4 specification, it's added implicitly)"
68 print ""
69 sys.exit(1)
70else:
71 if len(globalPrefix) != 0 and globalPrefix != args[0]:
72 print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
73 sys.exit(1)
74 globalPrefix = args[0]
75
76changeRange = ""
77revision = ""
78users = {}
79initialParent = ""
80lastChange = 0
81initialTag = ""
82
83if globalPrefix.find("@") != -1:
84 atIdx = globalPrefix.index("@")
85 changeRange = globalPrefix[atIdx:]
86 if changeRange == "@all":
87 changeRange = ""
88 elif changeRange.find(",") == -1:
89 revision = changeRange
90 changeRange = ""
91 globalPrefix = globalPrefix[0:atIdx]
92elif globalPrefix.find("#") != -1:
93 hashIdx = globalPrefix.index("#")
94 revision = globalPrefix[hashIdx:]
95 globalPrefix = globalPrefix[0:hashIdx]
96elif len(previousDepotPath) == 0:
97 revision = "#head"
98
99if globalPrefix.endswith("..."):
100 globalPrefix = globalPrefix[:-3]
101
102if not globalPrefix.endswith("/"):
103 globalPrefix += "/"
104
105def p4File(depotPath):
106 cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
107
108 data = 0
109 try:
110 if not dataCache:
111 raise
112 data = open(cacheKey, "rb").read()
113 except:
114 data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
115 if dataCache:
116 open(cacheKey, "wb").write(data)
117
118 return data
119
120def p4CmdList(cmd):
121 fullCmd = "p4 -G %s" % cmd;
122
123 cacheKey = sha.new(fullCmd).hexdigest()
124 cacheKey = "/tmp/p4cache/cmd-" + cacheKey
125
126 cached = True
127 pipe = 0
128 try:
129 if not commandCache:
130 raise
131 pipe = open(cacheKey, "rb")
132 except:
133 cached = False
134 pipe = os.popen(fullCmd, "rb")
135
136 result = []
137 try:
138 while True:
139 entry = marshal.load(pipe)
140 result.append(entry)
141 except EOFError:
142 pass
143 pipe.close()
144
145 if not cached and commandCache:
146 pipe = open(cacheKey, "wb")
147 for r in result:
148 marshal.dump(r, pipe)
149 pipe.close()
150
151 return result
152
153def p4Cmd(cmd):
154 list = p4CmdList(cmd)
155 result = {}
156 for entry in list:
157 result.update(entry)
158 return result;
159
160def extractFilesFromCommit(commit):
161 files = []
162 fnum = 0
163 while commit.has_key("depotFile%s" % fnum):
164 path = commit["depotFile%s" % fnum]
165 if not path.startswith(globalPrefix):
166# if not silent:
167# print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
168 fnum = fnum + 1
169 continue
170
171 file = {}
172 file["path"] = path
173 file["rev"] = commit["rev%s" % fnum]
174 file["action"] = commit["action%s" % fnum]
175 file["type"] = commit["type%s" % fnum]
176 files.append(file)
177 fnum = fnum + 1
178 return files
179
180def isSubPathOf(first, second):
181 if not first.startswith(second):
182 return False
183 if first == second:
184 return True
185 return first[len(second)] == "/"
186
187def branchesForCommit(files):
188 global knownBranches
189 branches = Set()
190
191 for file in files:
192 relativePath = file["path"][len(globalPrefix):]
193 # strip off the filename
194 relativePath = relativePath[0:relativePath.rfind("/")]
195
196# if len(branches) == 0:
197# branches.add(relativePath)
198# knownBranches.add(relativePath)
199# continue
200
201 ###### this needs more testing :)
202 knownBranch = False
203 for branch in branches:
204 if relativePath == branch:
205 knownBranch = True
206 break
207# if relativePath.startswith(branch):
208 if isSubPathOf(relativePath, branch):
209 knownBranch = True
210 break
211# if branch.startswith(relativePath):
212 if isSubPathOf(branch, relativePath):
213 branches.remove(branch)
214 break
215
216 if knownBranch:
217 continue
218
219 for branch in knownBranches:
220 #if relativePath.startswith(branch):
221 if isSubPathOf(relativePath, branch):
222 if len(branches) == 0:
223 relativePath = branch
224 else:
225 knownBranch = True
226 break
227
228 if knownBranch:
229 continue
230
231 branches.add(relativePath)
232 knownBranches.add(relativePath)
233
234 return branches
235
236def findBranchParent(branchPrefix, files):
237 for file in files:
238 path = file["path"]
239 if not path.startswith(branchPrefix):
240 continue
241 action = file["action"]
242 if action != "integrate" and action != "branch":
243 continue
244 rev = file["rev"]
245 depotPath = path + "#" + rev
246
247 log = p4CmdList("filelog \"%s\"" % depotPath)
248 if len(log) != 1:
249 print "eek! I got confused by the filelog of %s" % depotPath
250 sys.exit(1);
251
252 log = log[0]
253 if log["action0"] != action:
254 print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
255 sys.exit(1);
256
257 branchAction = log["how0,0"]
258# if branchAction == "branch into" or branchAction == "ignored":
259# continue # ignore for branching
260
261 if not branchAction.endswith(" from"):
262 continue # ignore for branching
263# print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
264# sys.exit(1);
265
266 source = log["file0,0"]
267 if source.startswith(branchPrefix):
268 continue
269
270 lastSourceRev = log["erev0,0"]
271
272 sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
273 if len(sourceLog) != 1:
274 print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
275 sys.exit(1);
276 sourceLog = sourceLog[0]
277
278 relPath = source[len(globalPrefix):]
279 # strip off the filename
280 relPath = relPath[0:relPath.rfind("/")]
281
282 for branch in knownBranches:
283 if isSubPathOf(relPath, branch):
284# print "determined parent branch branch %s due to change in file %s" % (branch, source)
285 return branch
286# else:
287# print "%s is not a subpath of branch %s" % (relPath, branch)
288
289 return ""
290
291def commit(details, files, branch, branchPrefix, parent, merged = ""):
292 global users
293 global lastChange
294 global committedChanges
295
296 epoch = details["time"]
297 author = details["user"]
298
299 gitStream.write("commit %s\n" % branch)
300# gitStream.write("mark :%s\n" % details["change"])
301 committedChanges.add(int(details["change"]))
302 committer = ""
303 if author in users:
304 committer = "%s %s %s" % (users[author], epoch, tz)
305 else:
306 committer = "%s <a@b> %s %s" % (author, epoch, tz)
307
308 gitStream.write("committer %s\n" % committer)
309
310 gitStream.write("data <<EOT\n")
311 gitStream.write(details["desc"])
312 gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
313 gitStream.write("EOT\n\n")
314
315 if len(parent) > 0:
316 gitStream.write("from %s\n" % parent)
317
318 if len(merged) > 0:
319 gitStream.write("merge %s\n" % merged)
320
321 for file in files:
322 path = file["path"]
323 if not path.startswith(branchPrefix):
324# if not silent:
325# print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
326 continue
327 rev = file["rev"]
328 depotPath = path + "#" + rev
329 relPath = path[len(branchPrefix):]
330 action = file["action"]
331
332 if file["type"] == "apple":
333 print "\nfile %s is a strange apple file that forks. Ignoring!" % path
334 continue
335
336 if action == "delete":
337 gitStream.write("D %s\n" % relPath)
338 else:
339 mode = 644
340 if file["type"].startswith("x"):
341 mode = 755
342
343 data = p4File(depotPath)
344
345 gitStream.write("M %s inline %s\n" % (mode, relPath))
346 gitStream.write("data %s\n" % len(data))
347 gitStream.write(data)
348 gitStream.write("\n")
349
350 gitStream.write("\n")
351
352 lastChange = int(details["change"])
353
354def extractFilesInCommitToBranch(files, branchPrefix):
355 newFiles = []
356
357 for file in files:
358 path = file["path"]
359 if path.startswith(branchPrefix):
360 newFiles.append(file)
361
362 return newFiles
363
364def findBranchSourceHeuristic(files, branch, branchPrefix):
365 for file in files:
366 action = file["action"]
367 if action != "integrate" and action != "branch":
368 continue
369 path = file["path"]
370 rev = file["rev"]
371 depotPath = path + "#" + rev
372
373 log = p4CmdList("filelog \"%s\"" % depotPath)
374 if len(log) != 1:
375 print "eek! I got confused by the filelog of %s" % depotPath
376 sys.exit(1);
377
378 log = log[0]
379 if log["action0"] != action:
380 print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
381 sys.exit(1);
382
383 branchAction = log["how0,0"]
384
385 if not branchAction.endswith(" from"):
386 continue # ignore for branching
387# print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
388# sys.exit(1);
389
390 source = log["file0,0"]
391 if source.startswith(branchPrefix):
392 continue
393
394 lastSourceRev = log["erev0,0"]
395
396 sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
397 if len(sourceLog) != 1:
398 print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
399 sys.exit(1);
400 sourceLog = sourceLog[0]
401
402 relPath = source[len(globalPrefix):]
403 # strip off the filename
404 relPath = relPath[0:relPath.rfind("/")]
405
406 for candidate in knownBranches:
407 if isSubPathOf(relPath, candidate) and candidate != branch:
408 return candidate
409
410 return ""
411
412def changeIsBranchMerge(sourceBranch, destinationBranch, change):
413 sourceFiles = {}
414 for file in p4CmdList("files %s...@%s" % (globalPrefix + sourceBranch + "/", change)):
415 if file["action"] == "delete":
416 continue
417 sourceFiles[file["depotFile"]] = file
418
419 destinationFiles = {}
420 for file in p4CmdList("files %s...@%s" % (globalPrefix + destinationBranch + "/", change)):
421 destinationFiles[file["depotFile"]] = file
422
423 for fileName in sourceFiles.keys():
424 integrations = []
425 deleted = False
426 integrationCount = 0
427 for integration in p4CmdList("integrated \"%s\"" % fileName):
428 toFile = integration["fromFile"] # yes, it's true, it's fromFile
429 if not toFile in destinationFiles:
430 continue
431 destFile = destinationFiles[toFile]
432 if destFile["action"] == "delete":
433# print "file %s has been deleted in %s" % (fileName, toFile)
434 deleted = True
435 break
436 integrationCount += 1
437 if integration["how"] == "branch from":
438 continue
439
440 if int(integration["change"]) == change:
441 integrations.append(integration)
442 continue
443 if int(integration["change"]) > change:
444 continue
445
446 destRev = int(destFile["rev"])
447
448 startRev = integration["startFromRev"][1:]
449 if startRev == "none":
450 startRev = 0
451 else:
452 startRev = int(startRev)
453
454 endRev = integration["endFromRev"][1:]
455 if endRev == "none":
456 endRev = 0
457 else:
458 endRev = int(endRev)
459
460 initialBranch = (destRev == 1 and integration["how"] != "branch into")
461 inRange = (destRev >= startRev and destRev <= endRev)
462 newer = (destRev > startRev and destRev > endRev)
463
464 if initialBranch or inRange or newer:
465 integrations.append(integration)
466
467 if deleted:
468 continue
469
470 if len(integrations) == 0 and integrationCount > 1:
471 print "file %s was not integrated from %s into %s" % (fileName, sourceBranch, destinationBranch)
472 return False
473
474 return True
475
476def getUserMap():
477 users = {}
478
479 for output in p4CmdList("users"):
480 if not output.has_key("User"):
481 continue
482 users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
483 return users
484
485users = getUserMap()
486
487if len(changeRange) == 0:
488 try:
489 sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
490 output = sout.read()
491 if output.endswith("\n"):
492 output = output[:-1]
493 tagIdx = output.index(" tags/p4/")
494 caretIdx = output.find("^")
495 endPos = len(output)
496 if caretIdx != -1:
497 endPos = caretIdx
498 rev = int(output[tagIdx + 9 : endPos]) + 1
499 changeRange = "@%s,#head" % rev
500 initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
501 initialTag = "p4/%s" % (int(rev) - 1)
502 except:
503 pass
504
505tz = - time.timezone / 36
506tzsign = ("%s" % tz)[0]
507if tzsign != '+' and tzsign != '-':
508 tz = "+" + ("%s" % tz)
509
510gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
511
512if len(revision) > 0:
513 print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
514
515 details = { "user" : "git perforce import user", "time" : int(time.time()) }
516 details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
517 details["change"] = revision
518 newestRevision = 0
519
520 fileCnt = 0
521 for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
522 change = int(info["change"])
523 if change > newestRevision:
524 newestRevision = change
525
526 if info["action"] == "delete":
527 continue
528
529 for prop in [ "depotFile", "rev", "action", "type" ]:
530 details["%s%s" % (prop, fileCnt)] = info[prop]
531
532 fileCnt = fileCnt + 1
533
534 details["change"] = newestRevision
535
536 try:
537 commit(details, extractFilesFromCommit(details), branch, globalPrefix)
538 except:
539 print gitError.read()
540
541else:
542 changes = []
543
544 if len(changesFile) > 0:
545 output = open(changesFile).readlines()
546 changeSet = Set()
547 for line in output:
548 changeSet.add(int(line))
549
550 for change in changeSet:
551 changes.append(change)
552
553 changes.sort()
554 else:
555 output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
556
557 for line in output:
558 changeNum = line.split(" ")[1]
559 changes.append(changeNum)
560
561 changes.reverse()
562
563 if len(changes) == 0:
564 if not silent:
565 print "no changes to import!"
566 sys.exit(1)
567
568 cnt = 1
569 for change in changes:
570 description = p4Cmd("describe %s" % change)
571
572 if not silent:
573 sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
574 sys.stdout.flush()
575 cnt = cnt + 1
576
577 try:
578 files = extractFilesFromCommit(description)
579 if detectBranches:
580 for branch in branchesForCommit(files):
581 knownBranches.add(branch)
582 branchPrefix = globalPrefix + branch + "/"
583
584 filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
585
586 merged = ""
587 parent = ""
588 ########### remove cnt!!!
589 if branch not in createdBranches and cnt > 2:
590 createdBranches.add(branch)
591 parent = findBranchParent(branchPrefix, files)
592 if parent == branch:
593 parent = ""
594 # elif len(parent) > 0:
595 # print "%s branched off of %s" % (branch, parent)
596
597 if len(parent) == 0:
598 merged = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
599 if len(merged) > 0:
600 print "change %s could be a merge from %s into %s" % (description["change"], merged, branch)
601 if not changeIsBranchMerge(merged, branch, int(description["change"])):
602 merged = ""
603
604 branch = "refs/heads/" + branch
605 if len(parent) > 0:
606 parent = "refs/heads/" + parent
607 if len(merged) > 0:
608 merged = "refs/heads/" + merged
609 commit(description, files, branch, branchPrefix, parent, merged)
610 else:
611 commit(description, files, branch, globalPrefix, initialParent)
612 initialParent = ""
613 except IOError:
614 print gitError.read()
615 sys.exit(1)
616
617if not silent:
618 print ""
619
620gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
621gitStream.write("from %s\n\n" % branch);
622
623
624gitStream.close()
625gitOutput.close()
626gitError.close()
627
628os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
629if len(initialTag) > 0:
630 os.popen("git tag -d %s" % initialTag).read()
631
632sys.exit(0)