65b7fca4b6571d4bfc231005a63a77ccb148347f
   1#!/usr/bin/python
   2#
   3# p4-fast-export.py
   4#
   5# Author: Simon Hausmann <hausmann@kde.org>
   6# License: MIT <http://www.opensource.org/licenses/mit-license.php>
   7#
   8# TODO:
   9#       - support integrations (at least p4i)
  10#       - support p4 submit (hah!)
  11#       - emulate p4's delete behavior: if a directory becomes empty delete it. continue
  12#         with parent dir until non-empty dir is found.
  13#
  14import os, string, sys, time, os.path
  15import marshal, popen2, getopt, sha
  16from sets import Set;
  17
  18cacheDebug = False
  19
  20silent = False
  21knownBranches = Set()
  22createdBranches = Set()
  23committedChanges = Set()
  24branch = "refs/heads/master"
  25globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
  26detectBranches = False
  27changesFile = ""
  28if len(globalPrefix) != 0:
  29    globalPrefix = globalPrefix[:-1]
  30
  31try:
  32    opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
  33                                                   "cache-debug" ])
  34except getopt.GetoptError:
  35    print "fixme, syntax error"
  36    sys.exit(1)
  37
  38for o, a in opts:
  39    if o == "--branch":
  40        branch = "refs/heads/" + a
  41    elif o == "--detect-branches":
  42        detectBranches = True
  43    elif o == "--changesfile":
  44        changesFile = a
  45    elif o == "--silent":
  46        silent= True
  47    elif o == "--known-branches":
  48        for branch in open(a).readlines():
  49            knownBranches.add(branch[:-1])
  50    elif o == "--cache-debug":
  51        cacheDebug = True
  52
  53if len(args) == 0 and len(globalPrefix) != 0:
  54    if not silent:
  55        print "[using previously specified depot path %s]" % globalPrefix
  56elif len(args) != 1:
  57    print "usage: %s //depot/path[@revRange]" % sys.argv[0]
  58    print "\n    example:"
  59    print "    %s //depot/my/project/ -- to import the current head"
  60    print "    %s //depot/my/project/@all -- to import everything"
  61    print "    %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
  62    print ""
  63    print "    (a ... is not needed in the path p4 specification, it's added implicitly)"
  64    print ""
  65    sys.exit(1)
  66else:
  67    if len(globalPrefix) != 0 and globalPrefix != args[0]:
  68        print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
  69        sys.exit(1)
  70    globalPrefix = args[0]
  71
  72changeRange = ""
  73revision = ""
  74users = {}
  75initialParent = ""
  76lastChange = 0
  77initialTag = ""
  78
  79if globalPrefix.find("@") != -1:
  80    atIdx = globalPrefix.index("@")
  81    changeRange = globalPrefix[atIdx:]
  82    if changeRange == "@all":
  83        changeRange = ""
  84    elif changeRange.find(",") == -1:
  85        revision = changeRange
  86        changeRange = ""
  87    globalPrefix = globalPrefix[0:atIdx]
  88elif globalPrefix.find("#") != -1:
  89    hashIdx = globalPrefix.index("#")
  90    revision = globalPrefix[hashIdx:]
  91    globalPrefix = globalPrefix[0:hashIdx]
  92elif len(previousDepotPath) == 0:
  93    revision = "#head"
  94
  95if globalPrefix.endswith("..."):
  96    globalPrefix = globalPrefix[:-3]
  97
  98if not globalPrefix.endswith("/"):
  99    globalPrefix += "/"
 100
 101def p4File(depotPath):
 102    cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
 103
 104    data = 0
 105    try:
 106        if not cacheDebug:
 107            raise
 108        data = open(cacheKey, "rb").read()
 109    except:
 110        data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
 111        if cacheDebug:
 112            open(cacheKey, "wb").write(data)
 113
 114    return data
 115
 116def p4CmdList(cmd):
 117    fullCmd = "p4 -G %s" % cmd;
 118
 119    cacheKey = sha.new(fullCmd).hexdigest()
 120    cacheKey = "/tmp/p4cache/cmd-" + cacheKey
 121
 122    cached = True
 123    pipe = 0
 124    try:
 125        if not cacheDebug:
 126            raise
 127        pipe = open(cacheKey, "rb")
 128    except:
 129        cached = False
 130        pipe = os.popen(fullCmd, "rb")
 131
 132    result = []
 133    try:
 134        while True:
 135            entry = marshal.load(pipe)
 136            result.append(entry)
 137    except EOFError:
 138        pass
 139    pipe.close()
 140
 141    if not cached and cacheDebug:
 142        pipe = open(cacheKey, "wb")
 143        for r in result:
 144            marshal.dump(r, pipe)
 145        pipe.close()
 146
 147    return result
 148
 149def p4Cmd(cmd):
 150    list = p4CmdList(cmd)
 151    result = {}
 152    for entry in list:
 153        result.update(entry)
 154    return result;
 155
 156def extractFilesFromCommit(commit):
 157    files = []
 158    fnum = 0
 159    while commit.has_key("depotFile%s" % fnum):
 160        path =  commit["depotFile%s" % fnum]
 161        if not path.startswith(globalPrefix):
 162#            if not silent:
 163#                print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
 164            fnum = fnum + 1
 165            continue
 166
 167        file = {}
 168        file["path"] = path
 169        file["rev"] = commit["rev%s" % fnum]
 170        file["action"] = commit["action%s" % fnum]
 171        file["type"] = commit["type%s" % fnum]
 172        files.append(file)
 173        fnum = fnum + 1
 174    return files
 175
 176def isSubPathOf(first, second):
 177    if not first.startswith(second):
 178        return False
 179    if first == second:
 180        return True
 181    return first[len(second)] == "/"
 182
 183def branchesForCommit(files):
 184    global knownBranches
 185    branches = Set()
 186
 187    for file in files:
 188        relativePath = file["path"][len(globalPrefix):]
 189        # strip off the filename
 190        relativePath = relativePath[0:relativePath.rfind("/")]
 191
 192#        if len(branches) == 0:
 193#            branches.add(relativePath)
 194#            knownBranches.add(relativePath)
 195#            continue
 196
 197        ###### this needs more testing :)
 198        knownBranch = False
 199        for branch in branches:
 200            if relativePath == branch:
 201                knownBranch = True
 202                break
 203#            if relativePath.startswith(branch):
 204            if isSubPathOf(relativePath, branch):
 205                knownBranch = True
 206                break
 207#            if branch.startswith(relativePath):
 208            if isSubPathOf(branch, relativePath):
 209                branches.remove(branch)
 210                break
 211
 212        if knownBranch:
 213            continue
 214
 215        for branch in knownBranches:
 216            #if relativePath.startswith(branch):
 217            if isSubPathOf(relativePath, branch):
 218                if len(branches) == 0:
 219                    relativePath = branch
 220                else:
 221                    knownBranch = True
 222                break
 223
 224        if knownBranch:
 225            continue
 226
 227        branches.add(relativePath)
 228        knownBranches.add(relativePath)
 229
 230    return branches
 231
 232def findBranchParent(branchPrefix, files):
 233    for file in files:
 234        path = file["path"]
 235        if not path.startswith(branchPrefix):
 236            continue
 237        action = file["action"]
 238        if action != "integrate" and action != "branch":
 239            continue
 240        rev = file["rev"]
 241        depotPath = path + "#" + rev
 242
 243        log = p4CmdList("filelog \"%s\"" % depotPath)
 244        if len(log) != 1:
 245            print "eek! I got confused by the filelog of %s" % depotPath
 246            sys.exit(1);
 247
 248        log = log[0]
 249        if log["action0"] != action:
 250            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 251            sys.exit(1);
 252
 253        branchAction = log["how0,0"]
 254#        if branchAction == "branch into" or branchAction == "ignored":
 255#            continue # ignore for branching
 256
 257        if not branchAction.endswith(" from"):
 258            continue # ignore for branching
 259#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 260#            sys.exit(1);
 261
 262        source = log["file0,0"]
 263        if source.startswith(branchPrefix):
 264            continue
 265
 266        lastSourceRev = log["erev0,0"]
 267
 268        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 269        if len(sourceLog) != 1:
 270            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 271            sys.exit(1);
 272        sourceLog = sourceLog[0]
 273
 274        relPath = source[len(globalPrefix):]
 275        # strip off the filename
 276        relPath = relPath[0:relPath.rfind("/")]
 277
 278        for branch in knownBranches:
 279            if isSubPathOf(relPath, branch):
 280#                print "determined parent branch branch %s due to change in file %s" % (branch, source)
 281                return branch
 282#            else:
 283#                print "%s is not a subpath of branch %s" % (relPath, branch)
 284
 285    return ""
 286
 287def commit(details, files, branch, branchPrefix, parent):
 288    global users
 289    global lastChange
 290    global committedChanges
 291
 292    epoch = details["time"]
 293    author = details["user"]
 294
 295    gitStream.write("commit %s\n" % branch)
 296    gitStream.write("mark :%s\n" % details["change"])
 297    committedChanges.add(int(details["change"]))
 298    committer = ""
 299    if author in users:
 300        committer = "%s %s %s" % (users[author], epoch, tz)
 301    else:
 302        committer = "%s <a@b> %s %s" % (author, epoch, tz)
 303
 304    gitStream.write("committer %s\n" % committer)
 305
 306    gitStream.write("data <<EOT\n")
 307    gitStream.write(details["desc"])
 308    gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
 309    gitStream.write("EOT\n\n")
 310
 311    if len(parent) > 0:
 312        gitStream.write("from %s\n" % parent)
 313
 314    for file in files:
 315        path = file["path"]
 316        if not path.startswith(branchPrefix):
 317#            if not silent:
 318#                print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
 319            continue
 320        rev = file["rev"]
 321        depotPath = path + "#" + rev
 322        relPath = path[len(branchPrefix):]
 323        action = file["action"]
 324
 325        if action == "delete":
 326            gitStream.write("D %s\n" % relPath)
 327        else:
 328            mode = 644
 329            if file["type"].startswith("x"):
 330                mode = 755
 331
 332            data = p4File(depotPath)
 333
 334            gitStream.write("M %s inline %s\n" % (mode, relPath))
 335            gitStream.write("data %s\n" % len(data))
 336            gitStream.write(data)
 337            gitStream.write("\n")
 338
 339    gitStream.write("\n")
 340
 341    lastChange = int(details["change"])
 342
 343def extractFilesInCommitToBranch(files, branchPrefix):
 344    newFiles = []
 345
 346    for file in files:
 347        path = file["path"]
 348        if path.startswith(branchPrefix):
 349            newFiles.append(file)
 350
 351    return newFiles
 352
 353def findBranchSourceHeuristic(files, branch, branchPrefix):
 354    for file in files:
 355        action = file["action"]
 356        if action != "integrate" and action != "branch":
 357            continue
 358        path = file["path"]
 359        rev = file["rev"]
 360        depotPath = path + "#" + rev
 361
 362        log = p4CmdList("filelog \"%s\"" % depotPath)
 363        if len(log) != 1:
 364            print "eek! I got confused by the filelog of %s" % depotPath
 365            sys.exit(1);
 366
 367        log = log[0]
 368        if log["action0"] != action:
 369            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 370            sys.exit(1);
 371
 372        branchAction = log["how0,0"]
 373
 374        if not branchAction.endswith(" from"):
 375            continue # ignore for branching
 376#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 377#            sys.exit(1);
 378
 379        source = log["file0,0"]
 380        if source.startswith(branchPrefix):
 381            continue
 382
 383        lastSourceRev = log["erev0,0"]
 384
 385        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 386        if len(sourceLog) != 1:
 387            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 388            sys.exit(1);
 389        sourceLog = sourceLog[0]
 390
 391        relPath = source[len(globalPrefix):]
 392        # strip off the filename
 393        relPath = relPath[0:relPath.rfind("/")]
 394
 395        for candidate in knownBranches:
 396            if isSubPathOf(relPath, candidate) and candidate != branch:
 397                return candidate
 398
 399    return ""
 400
 401def changeIsBranchMerge(sourceBranch, destinationBranch, change):
 402    return False
 403
 404def getUserMap():
 405    users = {}
 406
 407    for output in p4CmdList("users"):
 408        if not output.has_key("User"):
 409            continue
 410        users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
 411    return users
 412
 413users = getUserMap()
 414
 415if len(changeRange) == 0:
 416    try:
 417        sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
 418        output = sout.read()
 419        if output.endswith("\n"):
 420            output = output[:-1]
 421        tagIdx = output.index(" tags/p4/")
 422        caretIdx = output.find("^")
 423        endPos = len(output)
 424        if caretIdx != -1:
 425            endPos = caretIdx
 426        rev = int(output[tagIdx + 9 : endPos]) + 1
 427        changeRange = "@%s,#head" % rev
 428        initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
 429        initialTag = "p4/%s" % (int(rev) - 1)
 430    except:
 431        pass
 432
 433tz = - time.timezone / 36
 434tzsign = ("%s" % tz)[0]
 435if tzsign != '+' and tzsign != '-':
 436    tz = "+" + ("%s" % tz)
 437
 438gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
 439
 440if len(revision) > 0:
 441    print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
 442
 443    details = { "user" : "git perforce import user", "time" : int(time.time()) }
 444    details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
 445    details["change"] = revision
 446    newestRevision = 0
 447
 448    fileCnt = 0
 449    for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
 450        change = int(info["change"])
 451        if change > newestRevision:
 452            newestRevision = change
 453
 454        if info["action"] == "delete":
 455            continue
 456
 457        for prop in [ "depotFile", "rev", "action", "type" ]:
 458            details["%s%s" % (prop, fileCnt)] = info[prop]
 459
 460        fileCnt = fileCnt + 1
 461
 462    details["change"] = newestRevision
 463
 464    try:
 465        commit(details, extractFilesFromCommit(details), branch, globalPrefix)
 466    except:
 467        print gitError.read()
 468
 469else:
 470    changes = []
 471
 472    if len(changesFile) > 0:
 473        output = open(changesFile).readlines()
 474        changeSet = Set()
 475        for line in output:
 476            changeSet.add(int(line))
 477
 478        for change in changeSet:
 479            changes.append(change)
 480
 481        changes.sort()
 482    else:
 483        output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
 484
 485        for line in output:
 486            changeNum = line.split(" ")[1]
 487            changes.append(changeNum)
 488
 489        changes.reverse()
 490
 491    if len(changes) == 0:
 492        if not silent:
 493            print "no changes to import!"
 494        sys.exit(1)
 495
 496    cnt = 1
 497    for change in changes:
 498        description = p4Cmd("describe %s" % change)
 499
 500        if not silent:
 501            sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
 502            sys.stdout.flush()
 503        cnt = cnt + 1
 504
 505#        try:
 506        files = extractFilesFromCommit(description)
 507        if detectBranches:
 508            for branch in branchesForCommit(files):
 509                knownBranches.add(branch)
 510                branchPrefix = globalPrefix + branch + "/"
 511
 512                filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
 513
 514                parent = ""
 515                ########### remove cnt!!!
 516                if branch not in createdBranches and cnt > 2:
 517                    createdBranches.add(branch)
 518                    parent = findBranchParent(branchPrefix, files)
 519                    if parent == branch:
 520                        parent = ""
 521#                    elif len(parent) > 0:
 522#                        print "%s branched off of %s" % (branch, parent)
 523
 524                if len(parent) == 0:
 525                    parent = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
 526                    if len(parent) > 0:
 527                        print "change %s could be a merge from %s into %s" % (description["change"], parent, branch)
 528                        if not changeIsBranchMerge(parent, branch, description["change"]):
 529                            parent = ""
 530
 531                branch = "refs/heads/" + branch
 532                if len(parent) > 0:
 533                    parent = "refs/heads/" + parent
 534                commit(description, files, branch, branchPrefix, parent)
 535        else:
 536            commit(description, filesForCommit, branch, globalPrefix, initialParent)
 537            initialParent = ""
 538#        except:
 539#            print gitError.read()
 540#            sys.exit(1)
 541
 542if not silent:
 543    print ""
 544
 545gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
 546gitStream.write("from %s\n\n" % branch);
 547
 548
 549gitStream.close()
 550gitOutput.close()
 551gitError.close()
 552
 553os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
 554if len(initialTag) > 0:
 555    os.popen("git tag -d %s" % initialTag).read()
 556
 557sys.exit(0)