d0832e8c3d618a4f8d73053adefd62af14b3dae1
   1#!/usr/bin/python
   2#
   3# p4-fast-export.py
   4#
   5# Author: Simon Hausmann <hausmann@kde.org>
   6# License: MIT <http://www.opensource.org/licenses/mit-license.php>
   7#
   8# TODO:
   9#       - support integrations (at least p4i)
  10#       - support p4 submit (hah!)
  11#       - emulate p4's delete behavior: if a directory becomes empty delete it. continue
  12#         with parent dir until non-empty dir is found.
  13#
  14import os, string, sys, time, os.path
  15import marshal, popen2, getopt, sha
  16from sets import Set;
  17
  18cacheDebug = False
  19
  20silent = False
  21knownBranches = Set()
  22createdBranches = Set()
  23committedChanges = Set()
  24branch = "refs/heads/master"
  25globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
  26detectBranches = False
  27changesFile = ""
  28if len(globalPrefix) != 0:
  29    globalPrefix = globalPrefix[:-1]
  30
  31try:
  32    opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
  33                                                   "cache-debug" ])
  34except getopt.GetoptError:
  35    print "fixme, syntax error"
  36    sys.exit(1)
  37
  38for o, a in opts:
  39    if o == "--branch":
  40        branch = "refs/heads/" + a
  41    elif o == "--detect-branches":
  42        detectBranches = True
  43    elif o == "--changesfile":
  44        changesFile = a
  45    elif o == "--silent":
  46        silent= True
  47    elif o == "--known-branches":
  48        for branch in open(a).readlines():
  49            knownBranches.add(branch[:-1])
  50    elif o == "--cache-debug":
  51        cacheDebug = True
  52
  53if len(args) == 0 and len(globalPrefix) != 0:
  54    if not silent:
  55        print "[using previously specified depot path %s]" % globalPrefix
  56elif len(args) != 1:
  57    print "usage: %s //depot/path[@revRange]" % sys.argv[0]
  58    print "\n    example:"
  59    print "    %s //depot/my/project/ -- to import the current head"
  60    print "    %s //depot/my/project/@all -- to import everything"
  61    print "    %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
  62    print ""
  63    print "    (a ... is not needed in the path p4 specification, it's added implicitly)"
  64    print ""
  65    sys.exit(1)
  66else:
  67    if len(globalPrefix) != 0 and globalPrefix != args[0]:
  68        print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
  69        sys.exit(1)
  70    globalPrefix = args[0]
  71
  72changeRange = ""
  73revision = ""
  74users = {}
  75initialParent = ""
  76lastChange = 0
  77initialTag = ""
  78
  79if globalPrefix.find("@") != -1:
  80    atIdx = globalPrefix.index("@")
  81    changeRange = globalPrefix[atIdx:]
  82    if changeRange == "@all":
  83        changeRange = ""
  84    elif changeRange.find(",") == -1:
  85        revision = changeRange
  86        changeRange = ""
  87    globalPrefix = globalPrefix[0:atIdx]
  88elif globalPrefix.find("#") != -1:
  89    hashIdx = globalPrefix.index("#")
  90    revision = globalPrefix[hashIdx:]
  91    globalPrefix = globalPrefix[0:hashIdx]
  92elif len(previousDepotPath) == 0:
  93    revision = "#head"
  94
  95if globalPrefix.endswith("..."):
  96    globalPrefix = globalPrefix[:-3]
  97
  98if not globalPrefix.endswith("/"):
  99    globalPrefix += "/"
 100
 101def p4File(depotPath):
 102    cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
 103
 104    data = 0
 105    try:
 106        if not cacheDebug:
 107            raise
 108        data = open(cacheKey, "rb").read()
 109    except:
 110        data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
 111        if cacheDebug:
 112            open(cacheKey, "wb").write(data)
 113
 114    return data
 115
 116def p4CmdList(cmd):
 117    fullCmd = "p4 -G %s" % cmd;
 118
 119    cacheKey = sha.new(fullCmd).hexdigest()
 120    cacheKey = "/tmp/p4cache/cmd-" + cacheKey
 121
 122    cached = True
 123    pipe = 0
 124    try:
 125        if not cacheDebug:
 126            raise
 127        pipe = open(cacheKey, "rb")
 128    except:
 129        cached = False
 130        pipe = os.popen(fullCmd, "rb")
 131
 132    result = []
 133    try:
 134        while True:
 135            entry = marshal.load(pipe)
 136            result.append(entry)
 137    except EOFError:
 138        pass
 139    pipe.close()
 140
 141    if not cached and cacheDebug:
 142        pipe = open(cacheKey, "wb")
 143        for r in result:
 144            marshal.dump(r, pipe)
 145        pipe.close()
 146
 147    return result
 148
 149def p4Cmd(cmd):
 150    list = p4CmdList(cmd)
 151    result = {}
 152    for entry in list:
 153        result.update(entry)
 154    return result;
 155
 156def extractFilesFromCommit(commit):
 157    files = []
 158    fnum = 0
 159    while commit.has_key("depotFile%s" % fnum):
 160        path =  commit["depotFile%s" % fnum]
 161        if not path.startswith(globalPrefix):
 162#            if not silent:
 163#                print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
 164            fnum = fnum + 1
 165            continue
 166
 167        file = {}
 168        file["path"] = path
 169        file["rev"] = commit["rev%s" % fnum]
 170        file["action"] = commit["action%s" % fnum]
 171        file["type"] = commit["type%s" % fnum]
 172        files.append(file)
 173        fnum = fnum + 1
 174    return files
 175
 176def isSubPathOf(first, second):
 177    if not first.startswith(second):
 178        return False
 179    if first == second:
 180        return True
 181    return first[len(second)] == "/"
 182
 183def branchesForCommit(files):
 184    global knownBranches
 185    branches = Set()
 186
 187    for file in files:
 188        relativePath = file["path"][len(globalPrefix):]
 189        # strip off the filename
 190        relativePath = relativePath[0:relativePath.rfind("/")]
 191
 192#        if len(branches) == 0:
 193#            branches.add(relativePath)
 194#            knownBranches.add(relativePath)
 195#            continue
 196
 197        ###### this needs more testing :)
 198        knownBranch = False
 199        for branch in branches:
 200            if relativePath == branch:
 201                knownBranch = True
 202                break
 203#            if relativePath.startswith(branch):
 204            if isSubPathOf(relativePath, branch):
 205                knownBranch = True
 206                break
 207#            if branch.startswith(relativePath):
 208            if isSubPathOf(branch, relativePath):
 209                branches.remove(branch)
 210                break
 211
 212        if knownBranch:
 213            continue
 214
 215        for branch in knownBranches:
 216            #if relativePath.startswith(branch):
 217            if isSubPathOf(relativePath, branch):
 218                if len(branches) == 0:
 219                    relativePath = branch
 220                else:
 221                    knownBranch = True
 222                break
 223
 224        if knownBranch:
 225            continue
 226
 227        branches.add(relativePath)
 228        knownBranches.add(relativePath)
 229
 230    return branches
 231
 232def findBranchParent(branchPrefix, files):
 233    for file in files:
 234        path = file["path"]
 235        if not path.startswith(branchPrefix):
 236            continue
 237        action = file["action"]
 238        if action != "integrate" and action != "branch":
 239            continue
 240        rev = file["rev"]
 241        depotPath = path + "#" + rev
 242
 243        log = p4CmdList("filelog \"%s\"" % depotPath)
 244        if len(log) != 1:
 245            print "eek! I got confused by the filelog of %s" % depotPath
 246            sys.exit(1);
 247
 248        log = log[0]
 249        if log["action0"] != action:
 250            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 251            sys.exit(1);
 252
 253        branchAction = log["how0,0"]
 254#        if branchAction == "branch into" or branchAction == "ignored":
 255#            continue # ignore for branching
 256
 257        if not branchAction.endswith(" from"):
 258            continue # ignore for branching
 259#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 260#            sys.exit(1);
 261
 262        source = log["file0,0"]
 263        if source.startswith(branchPrefix):
 264            continue
 265
 266        lastSourceRev = log["erev0,0"]
 267
 268        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 269        if len(sourceLog) != 1:
 270            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 271            sys.exit(1);
 272        sourceLog = sourceLog[0]
 273
 274        relPath = source[len(globalPrefix):]
 275        # strip off the filename
 276        relPath = relPath[0:relPath.rfind("/")]
 277
 278        for branch in knownBranches:
 279            if isSubPathOf(relPath, branch):
 280#                print "determined parent branch branch %s due to change in file %s" % (branch, source)
 281                return branch
 282#            else:
 283#                print "%s is not a subpath of branch %s" % (relPath, branch)
 284
 285    return ""
 286
 287def commit(details, files, branch, branchPrefix, parent, merged = ""):
 288    global users
 289    global lastChange
 290    global committedChanges
 291
 292    epoch = details["time"]
 293    author = details["user"]
 294
 295    gitStream.write("commit %s\n" % branch)
 296#    gitStream.write("mark :%s\n" % details["change"])
 297    committedChanges.add(int(details["change"]))
 298    committer = ""
 299    if author in users:
 300        committer = "%s %s %s" % (users[author], epoch, tz)
 301    else:
 302        committer = "%s <a@b> %s %s" % (author, epoch, tz)
 303
 304    gitStream.write("committer %s\n" % committer)
 305
 306    gitStream.write("data <<EOT\n")
 307    gitStream.write(details["desc"])
 308    gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
 309    gitStream.write("EOT\n\n")
 310
 311    if len(parent) > 0:
 312        gitStream.write("from %s\n" % parent)
 313
 314    if len(merged) > 0:
 315        gitStream.write("merge %s\n" % merged)
 316
 317    for file in files:
 318        path = file["path"]
 319        if not path.startswith(branchPrefix):
 320#            if not silent:
 321#                print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
 322            continue
 323        rev = file["rev"]
 324        depotPath = path + "#" + rev
 325        relPath = path[len(branchPrefix):]
 326        action = file["action"]
 327
 328        if action == "delete":
 329            gitStream.write("D %s\n" % relPath)
 330        else:
 331            mode = 644
 332            if file["type"].startswith("x"):
 333                mode = 755
 334
 335            data = p4File(depotPath)
 336
 337            gitStream.write("M %s inline %s\n" % (mode, relPath))
 338            gitStream.write("data %s\n" % len(data))
 339            gitStream.write(data)
 340            gitStream.write("\n")
 341
 342    gitStream.write("\n")
 343
 344    lastChange = int(details["change"])
 345
 346def extractFilesInCommitToBranch(files, branchPrefix):
 347    newFiles = []
 348
 349    for file in files:
 350        path = file["path"]
 351        if path.startswith(branchPrefix):
 352            newFiles.append(file)
 353
 354    return newFiles
 355
 356def findBranchSourceHeuristic(files, branch, branchPrefix):
 357    for file in files:
 358        action = file["action"]
 359        if action != "integrate" and action != "branch":
 360            continue
 361        path = file["path"]
 362        rev = file["rev"]
 363        depotPath = path + "#" + rev
 364
 365        log = p4CmdList("filelog \"%s\"" % depotPath)
 366        if len(log) != 1:
 367            print "eek! I got confused by the filelog of %s" % depotPath
 368            sys.exit(1);
 369
 370        log = log[0]
 371        if log["action0"] != action:
 372            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 373            sys.exit(1);
 374
 375        branchAction = log["how0,0"]
 376
 377        if not branchAction.endswith(" from"):
 378            continue # ignore for branching
 379#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 380#            sys.exit(1);
 381
 382        source = log["file0,0"]
 383        if source.startswith(branchPrefix):
 384            continue
 385
 386        lastSourceRev = log["erev0,0"]
 387
 388        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 389        if len(sourceLog) != 1:
 390            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 391            sys.exit(1);
 392        sourceLog = sourceLog[0]
 393
 394        relPath = source[len(globalPrefix):]
 395        # strip off the filename
 396        relPath = relPath[0:relPath.rfind("/")]
 397
 398        for candidate in knownBranches:
 399            if isSubPathOf(relPath, candidate) and candidate != branch:
 400                return candidate
 401
 402    return ""
 403
 404def changeIsBranchMerge(sourceBranch, destinationBranch, change):
 405    sourceFiles = {}
 406    for file in p4CmdList("files %s...@%s" % (globalPrefix + sourceBranch + "/", change)):
 407        if file["action"] == "delete":
 408            continue
 409        sourceFiles[file["depotFile"]] = file
 410
 411    destinationFiles = {}
 412    for file in p4CmdList("files %s...@%s" % (globalPrefix + destinationBranch + "/", change)):
 413        destinationFiles[file["depotFile"]] = file
 414
 415    for fileName in sourceFiles.keys():
 416        integrations = []
 417        deleted = False
 418        for integration in p4CmdList("integrated \"%s\"" % fileName):
 419            toFile = integration["fromFile"] # yes, it's true, it's fromFile
 420            if not toFile in destinationFiles:
 421                continue
 422            destFile = destinationFiles[toFile]
 423            if destFile["action"] == "delete":
 424#                print "file %s has been deleted in %s" % (fileName, toFile)
 425                deleted = True
 426                break
 427
 428            if int(integration["change"]) == change:
 429                integrations.append(integration)
 430                continue
 431
 432            destRev = int(destFile["rev"])
 433
 434            startRev = integration["startFromRev"][1:]
 435            if startRev == "none":
 436                startRev = 0
 437            else:
 438                startRev = int(startRev)
 439
 440            endRev = integration["endFromRev"][1:]
 441            if endRev == "none":
 442                endRev = 0
 443            else:
 444                endRev = int(endRev)
 445
 446            initialBranch = (destRev == 1 and integration["how"] != "branch into")
 447            inRange = (destRev >= startRev and destRev <= endRev)
 448            newer = (destRev > startRev and destRev > endRev)
 449
 450            if initialBranch or inRange or newer:
 451                integrations.append(integration)
 452
 453        if deleted:
 454            continue
 455
 456        if len(integrations) == 0:
 457            print "file %s was not integrated from %s into %s" % (fileName, sourceBranch, destinationBranch)
 458            return False
 459
 460    return True
 461
 462def getUserMap():
 463    users = {}
 464
 465    for output in p4CmdList("users"):
 466        if not output.has_key("User"):
 467            continue
 468        users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
 469    return users
 470
 471users = getUserMap()
 472
 473if len(changeRange) == 0:
 474    try:
 475        sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
 476        output = sout.read()
 477        if output.endswith("\n"):
 478            output = output[:-1]
 479        tagIdx = output.index(" tags/p4/")
 480        caretIdx = output.find("^")
 481        endPos = len(output)
 482        if caretIdx != -1:
 483            endPos = caretIdx
 484        rev = int(output[tagIdx + 9 : endPos]) + 1
 485        changeRange = "@%s,#head" % rev
 486        initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
 487        initialTag = "p4/%s" % (int(rev) - 1)
 488    except:
 489        pass
 490
 491tz = - time.timezone / 36
 492tzsign = ("%s" % tz)[0]
 493if tzsign != '+' and tzsign != '-':
 494    tz = "+" + ("%s" % tz)
 495
 496gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
 497
 498if len(revision) > 0:
 499    print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
 500
 501    details = { "user" : "git perforce import user", "time" : int(time.time()) }
 502    details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
 503    details["change"] = revision
 504    newestRevision = 0
 505
 506    fileCnt = 0
 507    for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
 508        change = int(info["change"])
 509        if change > newestRevision:
 510            newestRevision = change
 511
 512        if info["action"] == "delete":
 513            continue
 514
 515        for prop in [ "depotFile", "rev", "action", "type" ]:
 516            details["%s%s" % (prop, fileCnt)] = info[prop]
 517
 518        fileCnt = fileCnt + 1
 519
 520    details["change"] = newestRevision
 521
 522    try:
 523        commit(details, extractFilesFromCommit(details), branch, globalPrefix)
 524    except:
 525        print gitError.read()
 526
 527else:
 528    changes = []
 529
 530    if len(changesFile) > 0:
 531        output = open(changesFile).readlines()
 532        changeSet = Set()
 533        for line in output:
 534            changeSet.add(int(line))
 535
 536        for change in changeSet:
 537            changes.append(change)
 538
 539        changes.sort()
 540    else:
 541        output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
 542
 543        for line in output:
 544            changeNum = line.split(" ")[1]
 545            changes.append(changeNum)
 546
 547        changes.reverse()
 548
 549    if len(changes) == 0:
 550        if not silent:
 551            print "no changes to import!"
 552        sys.exit(1)
 553
 554    cnt = 1
 555    for change in changes:
 556        description = p4Cmd("describe %s" % change)
 557
 558        if not silent:
 559            sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
 560            sys.stdout.flush()
 561        cnt = cnt + 1
 562
 563        try:
 564            files = extractFilesFromCommit(description)
 565            if detectBranches:
 566                for branch in branchesForCommit(files):
 567                    knownBranches.add(branch)
 568                    branchPrefix = globalPrefix + branch + "/"
 569
 570                    filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
 571
 572                    merged = ""
 573                    parent = ""
 574                    ########### remove cnt!!!
 575                    if branch not in createdBranches and cnt > 2:
 576                        createdBranches.add(branch)
 577                        parent = findBranchParent(branchPrefix, files)
 578                        if parent == branch:
 579                            parent = ""
 580    #                    elif len(parent) > 0:
 581    #                        print "%s branched off of %s" % (branch, parent)
 582
 583                    if len(parent) == 0:
 584                        merged = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
 585                        if len(merged) > 0:
 586                            print "change %s could be a merge from %s into %s" % (description["change"], merged, branch)
 587                            if not changeIsBranchMerge(merged, branch, int(description["change"])):
 588                                merged = ""
 589
 590                    branch = "refs/heads/" + branch
 591                    if len(parent) > 0:
 592                        parent = "refs/heads/" + parent
 593                    if len(merged) > 0:
 594                        merged = "refs/heads/" + merged
 595                    commit(description, files, branch, branchPrefix, parent, merged)
 596            else:
 597                commit(description, filesForCommit, branch, globalPrefix, initialParent)
 598                initialParent = ""
 599        except IOError:
 600            print gitError.read()
 601            sys.exit(1)
 602
 603if not silent:
 604    print ""
 605
 606gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
 607gitStream.write("from %s\n\n" % branch);
 608
 609
 610gitStream.close()
 611gitOutput.close()
 612gitError.close()
 613
 614os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
 615if len(initialTag) > 0:
 616    os.popen("git tag -d %s" % initialTag).read()
 617
 618sys.exit(0)