contrib / fast-import / p4-fast-export.pyon commit First (untested) attempt at migrating p4-git-sync into the final git-p4 script (4f5cf76)
   1#!/usr/bin/env python
   2#
   3# p4-fast-export.py
   4#
   5# Author: Simon Hausmann <hausmann@kde.org>
   6# License: MIT <http://www.opensource.org/licenses/mit-license.php>
   7#
   8# TODO:
   9#       - support integrations (at least p4i)
  10#       - support p4 submit (hah!)
  11#       - emulate p4's delete behavior: if a directory becomes empty delete it. continue
  12#         with parent dir until non-empty dir is found.
  13#
  14import os, string, sys, time, os.path
  15import marshal, popen2, getopt, sha
  16from sets import Set;
  17
  18dataCache = False
  19commandCache = False
  20
  21silent = False
  22knownBranches = Set()
  23createdBranches = Set()
  24committedChanges = Set()
  25branch = "refs/heads/master"
  26globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
  27detectBranches = False
  28changesFile = ""
  29if len(globalPrefix) != 0:
  30    globalPrefix = globalPrefix[:-1]
  31
  32try:
  33    opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
  34                                                   "cache", "command-cache" ])
  35except getopt.GetoptError:
  36    print "fixme, syntax error"
  37    sys.exit(1)
  38
  39for o, a in opts:
  40    if o == "--branch":
  41        branch = "refs/heads/" + a
  42    elif o == "--detect-branches":
  43        detectBranches = True
  44    elif o == "--changesfile":
  45        changesFile = a
  46    elif o == "--silent":
  47        silent= True
  48    elif o == "--known-branches":
  49        for branch in open(a).readlines():
  50            knownBranches.add(branch[:-1])
  51    elif o == "--cache":
  52        dataCache = True
  53        commandCache = True
  54    elif o == "--command-cache":
  55        commandCache = True
  56
  57if len(args) == 0 and len(globalPrefix) != 0:
  58    if not silent:
  59        print "[using previously specified depot path %s]" % globalPrefix
  60elif len(args) != 1:
  61    print "usage: %s //depot/path[@revRange]" % sys.argv[0]
  62    print "\n    example:"
  63    print "    %s //depot/my/project/ -- to import the current head"
  64    print "    %s //depot/my/project/@all -- to import everything"
  65    print "    %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
  66    print ""
  67    print "    (a ... is not needed in the path p4 specification, it's added implicitly)"
  68    print ""
  69    sys.exit(1)
  70else:
  71    if len(globalPrefix) != 0 and globalPrefix != args[0]:
  72        print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
  73        sys.exit(1)
  74    globalPrefix = args[0]
  75
  76changeRange = ""
  77revision = ""
  78users = {}
  79initialParent = ""
  80lastChange = 0
  81initialTag = ""
  82
  83if globalPrefix.find("@") != -1:
  84    atIdx = globalPrefix.index("@")
  85    changeRange = globalPrefix[atIdx:]
  86    if changeRange == "@all":
  87        changeRange = ""
  88    elif changeRange.find(",") == -1:
  89        revision = changeRange
  90        changeRange = ""
  91    globalPrefix = globalPrefix[0:atIdx]
  92elif globalPrefix.find("#") != -1:
  93    hashIdx = globalPrefix.index("#")
  94    revision = globalPrefix[hashIdx:]
  95    globalPrefix = globalPrefix[0:hashIdx]
  96elif len(previousDepotPath) == 0:
  97    revision = "#head"
  98
  99if globalPrefix.endswith("..."):
 100    globalPrefix = globalPrefix[:-3]
 101
 102if not globalPrefix.endswith("/"):
 103    globalPrefix += "/"
 104
 105def p4File(depotPath):
 106    cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
 107
 108    data = 0
 109    try:
 110        if not dataCache:
 111            raise
 112        data = open(cacheKey, "rb").read()
 113    except:
 114        data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
 115        if dataCache:
 116            open(cacheKey, "wb").write(data)
 117
 118    return data
 119
 120def p4CmdList(cmd):
 121    fullCmd = "p4 -G %s" % cmd;
 122
 123    cacheKey = sha.new(fullCmd).hexdigest()
 124    cacheKey = "/tmp/p4cache/cmd-" + cacheKey
 125
 126    cached = True
 127    pipe = 0
 128    try:
 129        if not commandCache:
 130            raise
 131        pipe = open(cacheKey, "rb")
 132    except:
 133        cached = False
 134        pipe = os.popen(fullCmd, "rb")
 135
 136    result = []
 137    try:
 138        while True:
 139            entry = marshal.load(pipe)
 140            result.append(entry)
 141    except EOFError:
 142        pass
 143    pipe.close()
 144
 145    if not cached and commandCache:
 146        pipe = open(cacheKey, "wb")
 147        for r in result:
 148            marshal.dump(r, pipe)
 149        pipe.close()
 150
 151    return result
 152
 153def p4Cmd(cmd):
 154    list = p4CmdList(cmd)
 155    result = {}
 156    for entry in list:
 157        result.update(entry)
 158    return result;
 159
 160def extractFilesFromCommit(commit):
 161    files = []
 162    fnum = 0
 163    while commit.has_key("depotFile%s" % fnum):
 164        path =  commit["depotFile%s" % fnum]
 165        if not path.startswith(globalPrefix):
 166#            if not silent:
 167#                print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
 168            fnum = fnum + 1
 169            continue
 170
 171        file = {}
 172        file["path"] = path
 173        file["rev"] = commit["rev%s" % fnum]
 174        file["action"] = commit["action%s" % fnum]
 175        file["type"] = commit["type%s" % fnum]
 176        files.append(file)
 177        fnum = fnum + 1
 178    return files
 179
 180def isSubPathOf(first, second):
 181    if not first.startswith(second):
 182        return False
 183    if first == second:
 184        return True
 185    return first[len(second)] == "/"
 186
 187def branchesForCommit(files):
 188    global knownBranches
 189    branches = Set()
 190
 191    for file in files:
 192        relativePath = file["path"][len(globalPrefix):]
 193        # strip off the filename
 194        relativePath = relativePath[0:relativePath.rfind("/")]
 195
 196#        if len(branches) == 0:
 197#            branches.add(relativePath)
 198#            knownBranches.add(relativePath)
 199#            continue
 200
 201        ###### this needs more testing :)
 202        knownBranch = False
 203        for branch in branches:
 204            if relativePath == branch:
 205                knownBranch = True
 206                break
 207#            if relativePath.startswith(branch):
 208            if isSubPathOf(relativePath, branch):
 209                knownBranch = True
 210                break
 211#            if branch.startswith(relativePath):
 212            if isSubPathOf(branch, relativePath):
 213                branches.remove(branch)
 214                break
 215
 216        if knownBranch:
 217            continue
 218
 219        for branch in knownBranches:
 220            #if relativePath.startswith(branch):
 221            if isSubPathOf(relativePath, branch):
 222                if len(branches) == 0:
 223                    relativePath = branch
 224                else:
 225                    knownBranch = True
 226                break
 227
 228        if knownBranch:
 229            continue
 230
 231        branches.add(relativePath)
 232        knownBranches.add(relativePath)
 233
 234    return branches
 235
 236def findBranchParent(branchPrefix, files):
 237    for file in files:
 238        path = file["path"]
 239        if not path.startswith(branchPrefix):
 240            continue
 241        action = file["action"]
 242        if action != "integrate" and action != "branch":
 243            continue
 244        rev = file["rev"]
 245        depotPath = path + "#" + rev
 246
 247        log = p4CmdList("filelog \"%s\"" % depotPath)
 248        if len(log) != 1:
 249            print "eek! I got confused by the filelog of %s" % depotPath
 250            sys.exit(1);
 251
 252        log = log[0]
 253        if log["action0"] != action:
 254            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 255            sys.exit(1);
 256
 257        branchAction = log["how0,0"]
 258#        if branchAction == "branch into" or branchAction == "ignored":
 259#            continue # ignore for branching
 260
 261        if not branchAction.endswith(" from"):
 262            continue # ignore for branching
 263#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 264#            sys.exit(1);
 265
 266        source = log["file0,0"]
 267        if source.startswith(branchPrefix):
 268            continue
 269
 270        lastSourceRev = log["erev0,0"]
 271
 272        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 273        if len(sourceLog) != 1:
 274            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 275            sys.exit(1);
 276        sourceLog = sourceLog[0]
 277
 278        relPath = source[len(globalPrefix):]
 279        # strip off the filename
 280        relPath = relPath[0:relPath.rfind("/")]
 281
 282        for branch in knownBranches:
 283            if isSubPathOf(relPath, branch):
 284#                print "determined parent branch branch %s due to change in file %s" % (branch, source)
 285                return branch
 286#            else:
 287#                print "%s is not a subpath of branch %s" % (relPath, branch)
 288
 289    return ""
 290
 291def commit(details, files, branch, branchPrefix, parent, merged = ""):
 292    global users
 293    global lastChange
 294    global committedChanges
 295
 296    epoch = details["time"]
 297    author = details["user"]
 298
 299    gitStream.write("commit %s\n" % branch)
 300#    gitStream.write("mark :%s\n" % details["change"])
 301    committedChanges.add(int(details["change"]))
 302    committer = ""
 303    if author in users:
 304        committer = "%s %s %s" % (users[author], epoch, tz)
 305    else:
 306        committer = "%s <a@b> %s %s" % (author, epoch, tz)
 307
 308    gitStream.write("committer %s\n" % committer)
 309
 310    gitStream.write("data <<EOT\n")
 311    gitStream.write(details["desc"])
 312    gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
 313    gitStream.write("EOT\n\n")
 314
 315    if len(parent) > 0:
 316        gitStream.write("from %s\n" % parent)
 317
 318    if len(merged) > 0:
 319        gitStream.write("merge %s\n" % merged)
 320
 321    for file in files:
 322        path = file["path"]
 323        if not path.startswith(branchPrefix):
 324#            if not silent:
 325#                print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
 326            continue
 327        rev = file["rev"]
 328        depotPath = path + "#" + rev
 329        relPath = path[len(branchPrefix):]
 330        action = file["action"]
 331
 332        if file["type"] == "apple":
 333            print "\nfile %s is a strange apple file that forks. Ignoring!" % path
 334            continue
 335
 336        if action == "delete":
 337            gitStream.write("D %s\n" % relPath)
 338        else:
 339            mode = 644
 340            if file["type"].startswith("x"):
 341                mode = 755
 342
 343            data = p4File(depotPath)
 344
 345            gitStream.write("M %s inline %s\n" % (mode, relPath))
 346            gitStream.write("data %s\n" % len(data))
 347            gitStream.write(data)
 348            gitStream.write("\n")
 349
 350    gitStream.write("\n")
 351
 352    lastChange = int(details["change"])
 353
 354def extractFilesInCommitToBranch(files, branchPrefix):
 355    newFiles = []
 356
 357    for file in files:
 358        path = file["path"]
 359        if path.startswith(branchPrefix):
 360            newFiles.append(file)
 361
 362    return newFiles
 363
 364def findBranchSourceHeuristic(files, branch, branchPrefix):
 365    for file in files:
 366        action = file["action"]
 367        if action != "integrate" and action != "branch":
 368            continue
 369        path = file["path"]
 370        rev = file["rev"]
 371        depotPath = path + "#" + rev
 372
 373        log = p4CmdList("filelog \"%s\"" % depotPath)
 374        if len(log) != 1:
 375            print "eek! I got confused by the filelog of %s" % depotPath
 376            sys.exit(1);
 377
 378        log = log[0]
 379        if log["action0"] != action:
 380            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 381            sys.exit(1);
 382
 383        branchAction = log["how0,0"]
 384
 385        if not branchAction.endswith(" from"):
 386            continue # ignore for branching
 387#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 388#            sys.exit(1);
 389
 390        source = log["file0,0"]
 391        if source.startswith(branchPrefix):
 392            continue
 393
 394        lastSourceRev = log["erev0,0"]
 395
 396        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 397        if len(sourceLog) != 1:
 398            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 399            sys.exit(1);
 400        sourceLog = sourceLog[0]
 401
 402        relPath = source[len(globalPrefix):]
 403        # strip off the filename
 404        relPath = relPath[0:relPath.rfind("/")]
 405
 406        for candidate in knownBranches:
 407            if isSubPathOf(relPath, candidate) and candidate != branch:
 408                return candidate
 409
 410    return ""
 411
 412def changeIsBranchMerge(sourceBranch, destinationBranch, change):
 413    sourceFiles = {}
 414    for file in p4CmdList("files %s...@%s" % (globalPrefix + sourceBranch + "/", change)):
 415        if file["action"] == "delete":
 416            continue
 417        sourceFiles[file["depotFile"]] = file
 418
 419    destinationFiles = {}
 420    for file in p4CmdList("files %s...@%s" % (globalPrefix + destinationBranch + "/", change)):
 421        destinationFiles[file["depotFile"]] = file
 422
 423    for fileName in sourceFiles.keys():
 424        integrations = []
 425        deleted = False
 426        integrationCount = 0
 427        for integration in p4CmdList("integrated \"%s\"" % fileName):
 428            toFile = integration["fromFile"] # yes, it's true, it's fromFile
 429            if not toFile in destinationFiles:
 430                continue
 431            destFile = destinationFiles[toFile]
 432            if destFile["action"] == "delete":
 433#                print "file %s has been deleted in %s" % (fileName, toFile)
 434                deleted = True
 435                break
 436            integrationCount += 1
 437            if integration["how"] == "branch from":
 438                continue
 439
 440            if int(integration["change"]) == change:
 441                integrations.append(integration)
 442                continue
 443            if int(integration["change"]) > change:
 444                continue
 445
 446            destRev = int(destFile["rev"])
 447
 448            startRev = integration["startFromRev"][1:]
 449            if startRev == "none":
 450                startRev = 0
 451            else:
 452                startRev = int(startRev)
 453
 454            endRev = integration["endFromRev"][1:]
 455            if endRev == "none":
 456                endRev = 0
 457            else:
 458                endRev = int(endRev)
 459
 460            initialBranch = (destRev == 1 and integration["how"] != "branch into")
 461            inRange = (destRev >= startRev and destRev <= endRev)
 462            newer = (destRev > startRev and destRev > endRev)
 463
 464            if initialBranch or inRange or newer:
 465                integrations.append(integration)
 466
 467        if deleted:
 468            continue
 469
 470        if len(integrations) == 0 and integrationCount > 1:
 471            print "file %s was not integrated from %s into %s" % (fileName, sourceBranch, destinationBranch)
 472            return False
 473
 474    return True
 475
 476def getUserMap():
 477    users = {}
 478
 479    for output in p4CmdList("users"):
 480        if not output.has_key("User"):
 481            continue
 482        users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
 483    return users
 484
 485users = getUserMap()
 486
 487if len(changeRange) == 0:
 488    try:
 489        sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
 490        output = sout.read()
 491        if output.endswith("\n"):
 492            output = output[:-1]
 493        tagIdx = output.index(" tags/p4/")
 494        caretIdx = output.find("^")
 495        endPos = len(output)
 496        if caretIdx != -1:
 497            endPos = caretIdx
 498        rev = int(output[tagIdx + 9 : endPos]) + 1
 499        changeRange = "@%s,#head" % rev
 500        initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
 501        initialTag = "p4/%s" % (int(rev) - 1)
 502    except:
 503        pass
 504
 505tz = - time.timezone / 36
 506tzsign = ("%s" % tz)[0]
 507if tzsign != '+' and tzsign != '-':
 508    tz = "+" + ("%s" % tz)
 509
 510gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
 511
 512if len(revision) > 0:
 513    print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
 514
 515    details = { "user" : "git perforce import user", "time" : int(time.time()) }
 516    details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
 517    details["change"] = revision
 518    newestRevision = 0
 519
 520    fileCnt = 0
 521    for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
 522        change = int(info["change"])
 523        if change > newestRevision:
 524            newestRevision = change
 525
 526        if info["action"] == "delete":
 527            continue
 528
 529        for prop in [ "depotFile", "rev", "action", "type" ]:
 530            details["%s%s" % (prop, fileCnt)] = info[prop]
 531
 532        fileCnt = fileCnt + 1
 533
 534    details["change"] = newestRevision
 535
 536    try:
 537        commit(details, extractFilesFromCommit(details), branch, globalPrefix)
 538    except:
 539        print gitError.read()
 540
 541else:
 542    changes = []
 543
 544    if len(changesFile) > 0:
 545        output = open(changesFile).readlines()
 546        changeSet = Set()
 547        for line in output:
 548            changeSet.add(int(line))
 549
 550        for change in changeSet:
 551            changes.append(change)
 552
 553        changes.sort()
 554    else:
 555        output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
 556
 557        for line in output:
 558            changeNum = line.split(" ")[1]
 559            changes.append(changeNum)
 560
 561        changes.reverse()
 562
 563    if len(changes) == 0:
 564        if not silent:
 565            print "no changes to import!"
 566        sys.exit(1)
 567
 568    cnt = 1
 569    for change in changes:
 570        description = p4Cmd("describe %s" % change)
 571
 572        if not silent:
 573            sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
 574            sys.stdout.flush()
 575        cnt = cnt + 1
 576
 577        try:
 578            files = extractFilesFromCommit(description)
 579            if detectBranches:
 580                for branch in branchesForCommit(files):
 581                    knownBranches.add(branch)
 582                    branchPrefix = globalPrefix + branch + "/"
 583
 584                    filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
 585
 586                    merged = ""
 587                    parent = ""
 588                    ########### remove cnt!!!
 589                    if branch not in createdBranches and cnt > 2:
 590                        createdBranches.add(branch)
 591                        parent = findBranchParent(branchPrefix, files)
 592                        if parent == branch:
 593                            parent = ""
 594    #                    elif len(parent) > 0:
 595    #                        print "%s branched off of %s" % (branch, parent)
 596
 597                    if len(parent) == 0:
 598                        merged = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
 599                        if len(merged) > 0:
 600                            print "change %s could be a merge from %s into %s" % (description["change"], merged, branch)
 601                            if not changeIsBranchMerge(merged, branch, int(description["change"])):
 602                                merged = ""
 603
 604                    branch = "refs/heads/" + branch
 605                    if len(parent) > 0:
 606                        parent = "refs/heads/" + parent
 607                    if len(merged) > 0:
 608                        merged = "refs/heads/" + merged
 609                    commit(description, files, branch, branchPrefix, parent, merged)
 610            else:
 611                commit(description, files, branch, globalPrefix, initialParent)
 612                initialParent = ""
 613        except IOError:
 614            print gitError.read()
 615            sys.exit(1)
 616
 617if not silent:
 618    print ""
 619
 620gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
 621gitStream.write("from %s\n\n" % branch);
 622
 623
 624gitStream.close()
 625gitOutput.close()
 626gitError.close()
 627
 628os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
 629if len(initialTag) > 0:
 630    os.popen("git tag -d %s" % initialTag).read()
 631
 632sys.exit(0)