Work in progress on detecting branches.
[gitweb.git] / contrib / fast-import / p4-fast-export.py
index 1f19cbc5605ea9ae71b1555431271848d2326e64..a2cca31173e30fb0035d4a2beeb9d848df9cb3cb 100755 (executable)
@@ -8,17 +8,28 @@
 # TODO:
 #       - support integrations (at least p4i)
 #       - support p4 submit (hah!)
+#       - emulate p4's delete behavior: if a directory becomes empty delete it. continue
+#         with parent dir until non-empty dir is found.
 #
-import os, string, sys, time
-import marshal, popen2, getopt
+import os, string, sys, time, os.path
+import marshal, popen2, getopt, sha
+from sets import Set;
 
+cacheDebug = False
+
+silent = False
+knownBranches = Set()
+createdBranches = Set()
+committedChanges = Set()
 branch = "refs/heads/master"
-prefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
-if len(prefix) != 0:
-    prefix = prefix[:-1]
+globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
+detectBranches = False
+changesFile = ""
+if len(globalPrefix) != 0:
+    globalPrefix = globalPrefix[:-1]
 
 try:
-    opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=" ])
+    opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=" ])
 except getopt.GetoptError:
     print "fixme, syntax error"
     sys.exit(1)
 for o, a in opts:
     if o == "--branch":
         branch = "refs/heads/" + a
-
-if len(args) == 0 and len(prefix) != 0:
-    print "[using previously specified depot path %s]" % prefix
+    elif o == "--detect-branches":
+        detectBranches = True
+    elif o == "--changesfile":
+        changesFile = a
+    elif o == "--silent":
+        silent= True
+    elif o == "--known-branches":
+        for branch in o.split(","):
+            knownBranches.add(branch)
+
+if len(args) == 0 and len(globalPrefix) != 0:
+    if not silent:
+        print "[using previously specified depot path %s]" % globalPrefix
 elif len(args) != 1:
     print "usage: %s //depot/path[@revRange]" % sys.argv[0]
     print "\n    example:"
     print ""
     sys.exit(1)
 else:
-    if len(prefix) != 0 and prefix != args[0]:
-        print "previous import used depot path %s and now %s was specified. this doesn't work!" % (prefix, args[0])
+    if len(globalPrefix) != 0 and globalPrefix != args[0]:
+        print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
         sys.exit(1)
-    prefix = args[0]
+    globalPrefix = args[0]
 
 changeRange = ""
 revision = ""
 users = {}
 initialParent = ""
-lastChange = ""
-lastCommitter = ""
+lastChange = 0
 initialTag = ""
 
-if prefix.find("@") != -1:
-    atIdx = prefix.index("@")
-    changeRange = prefix[atIdx:]
+if globalPrefix.find("@") != -1:
+    atIdx = globalPrefix.index("@")
+    changeRange = globalPrefix[atIdx:]
     if changeRange == "@all":
         changeRange = ""
     elif changeRange.find(",") == -1:
         revision = changeRange
         changeRange = ""
-    prefix = prefix[0:atIdx]
-elif prefix.find("#") != -1:
-    hashIdx = prefix.index("#")
-    revision = prefix[hashIdx:]
-    prefix = prefix[0:hashIdx]
+    globalPrefix = globalPrefix[0:atIdx]
+elif globalPrefix.find("#") != -1:
+    hashIdx = globalPrefix.index("#")
+    revision = globalPrefix[hashIdx:]
+    globalPrefix = globalPrefix[0:hashIdx]
 elif len(previousDepotPath) == 0:
     revision = "#head"
 
-if prefix.endswith("..."):
-    prefix = prefix[:-3]
+if globalPrefix.endswith("..."):
+    globalPrefix = globalPrefix[:-3]
+
+if not globalPrefix.endswith("/"):
+    globalPrefix += "/"
+
+def p4File(depotPath):
+    cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
+
+    data = 0
+    try:
+        if not cacheDebug:
+            raise
+        data = open(cacheKey, "rb").read()
+    except:
+        data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
+        if cacheDebug:
+            open(cacheKey, "wb").write(data)
 
-if not prefix.endswith("/"):
-    prefix += "/"
+    return data
 
 def p4CmdList(cmd):
-    pipe = os.popen("p4 -G %s" % cmd, "rb")
+    fullCmd = "p4 -G %s" % cmd;
+
+    cacheKey = sha.new(fullCmd).hexdigest()
+    cacheKey = "/tmp/p4cache/cmd-" + cacheKey
+
+    cached = True
+    pipe = 0
+    try:
+        if not cacheDebug:
+            raise
+        pipe = open(cacheKey, "rb")
+    except:
+        cached = False
+        pipe = os.popen(fullCmd, "rb")
+
     result = []
     try:
         while True:
@@ -85,6 +134,13 @@ def p4CmdList(cmd):
     except EOFError:
         pass
     pipe.close()
+
+    if not cached and cacheDebug:
+        pipe = open(cacheKey, "wb")
+        for r in result:
+            marshal.dump(r, pipe)
+        pipe.close()
+
     return result
 
 def p4Cmd(cmd):
@@ -94,16 +150,148 @@ def p4Cmd(cmd):
         result.update(entry)
     return result;
 
-def commit(details):
-    global initialParent
+def extractFilesFromCommit(commit):
+    files = []
+    fnum = 0
+    while commit.has_key("depotFile%s" % fnum):
+        path =  commit["depotFile%s" % fnum]
+        if not path.startswith(globalPrefix):
+#            if not silent:
+#                print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
+            fnum = fnum + 1
+            continue
+
+        file = {}
+        file["path"] = path
+        file["rev"] = commit["rev%s" % fnum]
+        file["action"] = commit["action%s" % fnum]
+        file["type"] = commit["type%s" % fnum]
+        files.append(file)
+        fnum = fnum + 1
+    return files
+
+def isSubPathOf(first, second):
+    if not first.startswith(second):
+        return False
+    if first == second:
+        return True
+    return first[len(second)] == "/"
+
+def branchesForCommit(files):
+    global knownBranches
+    branches = Set()
+
+    for file in files:
+        relativePath = file["path"][len(globalPrefix):]
+        # strip off the filename
+        relativePath = relativePath[0:relativePath.rfind("/")]
+
+#        if len(branches) == 0:
+#            branches.add(relativePath)
+#            knownBranches.add(relativePath)
+#            continue
+
+        ###### this needs more testing :)
+        knownBranch = False
+        for branch in branches:
+            if relativePath == branch:
+                knownBranch = True
+                break
+#            if relativePath.startswith(branch):
+            if isSubPathOf(relativePath, branch):
+                knownBranch = True
+                break
+#            if branch.startswith(relativePath):
+            if isSubPathOf(branch, relativePath):
+                branches.remove(branch)
+                break
+
+        if knownBranch:
+            continue
+
+        for branch in knownBranches:
+            #if relativePath.startswith(branch):
+            if isSubPathOf(relativePath, branch):
+                if len(branches) == 0:
+                    relativePath = branch
+                else:
+                    knownBranch = True
+                break
+
+        if knownBranch:
+            continue
+
+        branches.add(relativePath)
+        knownBranches.add(relativePath)
+
+    return branches
+
+def findBranchParent(branchPrefix, files):
+    for file in files:
+        path = file["path"]
+        if not path.startswith(branchPrefix):
+            continue
+        action = file["action"]
+        if action != "integrate" and action != "branch":
+            continue
+        rev = file["rev"]
+        depotPath = path + "#" + rev
+
+        log = p4CmdList("filelog \"%s\"" % depotPath)
+        if len(log) != 1:
+            print "eek! I got confused by the filelog of %s" % depotPath
+            sys.exit(1);
+
+        log = log[0]
+        if log["action0"] != action:
+            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
+            sys.exit(1);
+
+        branchAction = log["how0,0"]
+#        if branchAction == "branch into" or branchAction == "ignored":
+#            continue # ignore for branching
+
+        if not branchAction.endswith(" from"):
+            continue # ignore for branching
+#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
+#            sys.exit(1);
+
+        source = log["file0,0"]
+        if source.startswith(branchPrefix):
+            continue
+
+        lastSourceRev = log["erev0,0"]
+
+        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
+        if len(sourceLog) != 1:
+            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
+            sys.exit(1);
+        sourceLog = sourceLog[0]
+
+        relPath = source[len(globalPrefix):]
+        # strip off the filename
+        relPath = relPath[0:relPath.rfind("/")]
+
+        for branch in knownBranches:
+            if isSubPathOf(relPath, branch):
+#                print "determined parent branch branch %s due to change in file %s" % (branch, source)
+                return "refs/heads/%s" % branch
+#            else:
+#                print "%s is not a subpath of branch %s" % (relPath, branch)
+
+    return ""
+
+def commit(details, files, branch, branchPrefix, parent):
     global users
     global lastChange
-    global lastCommitter
+    global committedChanges
 
     epoch = details["time"]
     author = details["user"]
 
     gitStream.write("commit %s\n" % branch)
+    gitStream.write("mark :%s\n" % details["change"])
+    committedChanges.add(int(details["change"]))
     committer = ""
     if author in users:
         committer = "%s %s %s" % (users[author], epoch, tz)
@@ -114,46 +302,40 @@ def commit(details):
 
     gitStream.write("data <<EOT\n")
     gitStream.write(details["desc"])
-    gitStream.write("\n[ imported from %s; change %s ]\n" % (prefix, details["change"]))
+    gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
     gitStream.write("EOT\n\n")
 
-    if len(initialParent) > 0:
-        gitStream.write("from %s\n" % initialParent)
-        initialParent = ""
+    if len(parent) > 0:
+        gitStream.write("from %s\n" % parent)
 
-    fnum = 0
-    while details.has_key("depotFile%s" % fnum):
-        path = details["depotFile%s" % fnum]
-        if not path.startswith(prefix):
-            print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, prefix, change)
-            fnum = fnum + 1
+    for file in files:
+        path = file["path"]
+        if not path.startswith(branchPrefix):
+#            if not silent:
+#                print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
             continue
-
-        rev = details["rev%s" % fnum]
+        rev = file["rev"]
         depotPath = path + "#" + rev
-        relPath = path[len(prefix):]
-        action = details["action%s" % fnum]
+        relPath = path[len(branchPrefix):]
+        action = file["action"]
 
         if action == "delete":
             gitStream.write("D %s\n" % relPath)
         else:
             mode = 644
-            if details["type%s" % fnum].startswith("x"):
+            if file["type"].startswith("x"):
                 mode = 755
 
-            data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
+            data = p4File(depotPath)
 
             gitStream.write("M %s inline %s\n" % (mode, relPath))
             gitStream.write("data %s\n" % len(data))
             gitStream.write(data)
             gitStream.write("\n")
 
-        fnum = fnum + 1
-
     gitStream.write("\n")
 
-    lastChange = details["change"]
-    lastCommitter = committer
+    lastChange = int(details["change"])
 
 def getUserMap():
     users = {}
@@ -170,17 +352,20 @@ def getUserMap():
     try:
         sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
         output = sout.read()
+        if output.endswith("\n"):
+            output = output[:-1]
         tagIdx = output.index(" tags/p4/")
-        caretIdx = output.index("^")
-        rev = int(output[tagIdx + 9 : caretIdx]) + 1
+        caretIdx = output.find("^")
+        endPos = len(output)
+        if caretIdx != -1:
+            endPos = caretIdx
+        rev = int(output[tagIdx + 9 : endPos]) + 1
         changeRange = "@%s,#head" % rev
         initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
         initialTag = "p4/%s" % (int(rev) - 1)
     except:
         pass
 
-sys.stderr.write("\n")
-
 tz = - time.timezone / 36
 tzsign = ("%s" % tz)[0]
 if tzsign != '+' and tzsign != '-':
@@ -189,15 +374,15 @@ def getUserMap():
 gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
 
 if len(revision) > 0:
-    print "Doing initial import of %s from revision %s" % (prefix, revision)
+    print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
 
     details = { "user" : "git perforce import user", "time" : int(time.time()) }
-    details["desc"] = "Initial import of %s from the state at revision %s" % (prefix, revision)
+    details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
     details["change"] = revision
     newestRevision = 0
 
     fileCnt = 0
-    for info in p4CmdList("files %s...%s" % (prefix, revision)):
+    for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
         change = int(info["change"])
         if change > newestRevision:
             newestRevision = change
@@ -213,50 +398,84 @@ def getUserMap():
     details["change"] = newestRevision
 
     try:
-        commit(details)
+        commit(details, extractFilesFromCommit(details), branch, globalPrefix)
     except:
         print gitError.read()
 
 else:
-    output = os.popen("p4 changes %s...%s" % (prefix, changeRange)).readlines()
-
     changes = []
-    for line in output:
-        changeNum = line.split(" ")[1]
-        changes.append(changeNum)
 
-    changes.reverse()
+    if len(changesFile) > 0:
+        output = open(changesFile).readlines()
+        changeSet = Set()
+        for line in output:
+            changeSet.add(int(line))
+
+        for change in changeSet:
+            changes.append(change)
+
+        changes.sort()
+    else:
+        output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
+
+        for line in output:
+            changeNum = line.split(" ")[1]
+            changes.append(changeNum)
+
+        changes.reverse()
 
     if len(changes) == 0:
-        print "no changes to import!"
+        if not silent:
+            print "no changes to import!"
         sys.exit(1)
 
     cnt = 1
     for change in changes:
         description = p4Cmd("describe %s" % change)
 
-        sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
-        sys.stdout.flush()
+        if not silent:
+            sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
+            sys.stdout.flush()
         cnt = cnt + 1
 
-        try:
-            commit(description)
-        except:
-            print gitError.read()
-            sys.exit(1)
+#        try:
+        files = extractFilesFromCommit(description)
+        if detectBranches:
+            for branch in branchesForCommit(files):
+                knownBranches.add(branch)
+                branchPrefix = globalPrefix + branch + "/"
+
+                parent = ""
+                ########### remove cnt!!!
+                if branch not in createdBranches and cnt > 2:
+                    createdBranches.add(branch)
+                    parent = findBranchParent(branchPrefix, files)
+                    if parent == branch:
+                        parent = ""
+#                    elif len(parent) > 0:
+#                        print "%s branched off of %s" % (branch, parent)
+
+                branch = "refs/heads/" + branch
+                commit(description, files, branch, branchPrefix, parent)
+        else:
+            commit(description, files, branch, globalPrefix, initialParent)
+            initialParent = ""
+#        except:
+#            print gitError.read()
+#            sys.exit(1)
+
+if not silent:
+    print ""
 
-print ""
+gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
+gitStream.write("from %s\n\n" % branch);
 
-gitStream.write("tag p4/%s\n" % lastChange)
-gitStream.write("from %s\n" % branch);
-gitStream.write("tagger %s\n" % lastCommitter);
-gitStream.write("data 0\n\n")
 
 gitStream.close()
 gitOutput.close()
 gitError.close()
 
-os.popen("git-repo-config p4.depotpath %s" % prefix).read()
+os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
 if len(initialTag) > 0:
     os.popen("git tag -d %s" % initialTag).read()