contrib / fast-import / p4-fast-export.pyon commit Reduce the number of false "merges" by skipping "branch from" entries in the integrated output as well as by ignoring integrations of future (newer) changes. (dd87020)
   1#!/usr/bin/python
   2#
   3# p4-fast-export.py
   4#
   5# Author: Simon Hausmann <hausmann@kde.org>
   6# License: MIT <http://www.opensource.org/licenses/mit-license.php>
   7#
   8# TODO:
   9#       - support integrations (at least p4i)
  10#       - support p4 submit (hah!)
  11#       - emulate p4's delete behavior: if a directory becomes empty delete it. continue
  12#         with parent dir until non-empty dir is found.
  13#
  14import os, string, sys, time, os.path
  15import marshal, popen2, getopt, sha
  16from sets import Set;
  17
  18cacheDebug = False
  19
  20silent = False
  21knownBranches = Set()
  22createdBranches = Set()
  23committedChanges = Set()
  24branch = "refs/heads/master"
  25globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
  26detectBranches = False
  27changesFile = ""
  28if len(globalPrefix) != 0:
  29    globalPrefix = globalPrefix[:-1]
  30
  31try:
  32    opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
  33                                                   "cache-debug" ])
  34except getopt.GetoptError:
  35    print "fixme, syntax error"
  36    sys.exit(1)
  37
  38for o, a in opts:
  39    if o == "--branch":
  40        branch = "refs/heads/" + a
  41    elif o == "--detect-branches":
  42        detectBranches = True
  43    elif o == "--changesfile":
  44        changesFile = a
  45    elif o == "--silent":
  46        silent= True
  47    elif o == "--known-branches":
  48        for branch in open(a).readlines():
  49            knownBranches.add(branch[:-1])
  50    elif o == "--cache-debug":
  51        cacheDebug = True
  52
  53if len(args) == 0 and len(globalPrefix) != 0:
  54    if not silent:
  55        print "[using previously specified depot path %s]" % globalPrefix
  56elif len(args) != 1:
  57    print "usage: %s //depot/path[@revRange]" % sys.argv[0]
  58    print "\n    example:"
  59    print "    %s //depot/my/project/ -- to import the current head"
  60    print "    %s //depot/my/project/@all -- to import everything"
  61    print "    %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
  62    print ""
  63    print "    (a ... is not needed in the path p4 specification, it's added implicitly)"
  64    print ""
  65    sys.exit(1)
  66else:
  67    if len(globalPrefix) != 0 and globalPrefix != args[0]:
  68        print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
  69        sys.exit(1)
  70    globalPrefix = args[0]
  71
  72changeRange = ""
  73revision = ""
  74users = {}
  75initialParent = ""
  76lastChange = 0
  77initialTag = ""
  78
  79if globalPrefix.find("@") != -1:
  80    atIdx = globalPrefix.index("@")
  81    changeRange = globalPrefix[atIdx:]
  82    if changeRange == "@all":
  83        changeRange = ""
  84    elif changeRange.find(",") == -1:
  85        revision = changeRange
  86        changeRange = ""
  87    globalPrefix = globalPrefix[0:atIdx]
  88elif globalPrefix.find("#") != -1:
  89    hashIdx = globalPrefix.index("#")
  90    revision = globalPrefix[hashIdx:]
  91    globalPrefix = globalPrefix[0:hashIdx]
  92elif len(previousDepotPath) == 0:
  93    revision = "#head"
  94
  95if globalPrefix.endswith("..."):
  96    globalPrefix = globalPrefix[:-3]
  97
  98if not globalPrefix.endswith("/"):
  99    globalPrefix += "/"
 100
 101def p4File(depotPath):
 102    cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
 103
 104    data = 0
 105    try:
 106        if not cacheDebug:
 107            raise
 108        data = open(cacheKey, "rb").read()
 109    except:
 110        data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
 111        if cacheDebug:
 112            open(cacheKey, "wb").write(data)
 113
 114    return data
 115
 116def p4CmdList(cmd):
 117    fullCmd = "p4 -G %s" % cmd;
 118
 119    cacheKey = sha.new(fullCmd).hexdigest()
 120    cacheKey = "/tmp/p4cache/cmd-" + cacheKey
 121
 122    cached = True
 123    pipe = 0
 124    try:
 125        if not cacheDebug:
 126            raise
 127        pipe = open(cacheKey, "rb")
 128    except:
 129        cached = False
 130        pipe = os.popen(fullCmd, "rb")
 131
 132    result = []
 133    try:
 134        while True:
 135            entry = marshal.load(pipe)
 136            result.append(entry)
 137    except EOFError:
 138        pass
 139    pipe.close()
 140
 141    if not cached and cacheDebug:
 142        pipe = open(cacheKey, "wb")
 143        for r in result:
 144            marshal.dump(r, pipe)
 145        pipe.close()
 146
 147    return result
 148
 149def p4Cmd(cmd):
 150    list = p4CmdList(cmd)
 151    result = {}
 152    for entry in list:
 153        result.update(entry)
 154    return result;
 155
 156def extractFilesFromCommit(commit):
 157    files = []
 158    fnum = 0
 159    while commit.has_key("depotFile%s" % fnum):
 160        path =  commit["depotFile%s" % fnum]
 161        if not path.startswith(globalPrefix):
 162#            if not silent:
 163#                print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
 164            fnum = fnum + 1
 165            continue
 166
 167        file = {}
 168        file["path"] = path
 169        file["rev"] = commit["rev%s" % fnum]
 170        file["action"] = commit["action%s" % fnum]
 171        file["type"] = commit["type%s" % fnum]
 172        files.append(file)
 173        fnum = fnum + 1
 174    return files
 175
 176def isSubPathOf(first, second):
 177    if not first.startswith(second):
 178        return False
 179    if first == second:
 180        return True
 181    return first[len(second)] == "/"
 182
 183def branchesForCommit(files):
 184    global knownBranches
 185    branches = Set()
 186
 187    for file in files:
 188        relativePath = file["path"][len(globalPrefix):]
 189        # strip off the filename
 190        relativePath = relativePath[0:relativePath.rfind("/")]
 191
 192#        if len(branches) == 0:
 193#            branches.add(relativePath)
 194#            knownBranches.add(relativePath)
 195#            continue
 196
 197        ###### this needs more testing :)
 198        knownBranch = False
 199        for branch in branches:
 200            if relativePath == branch:
 201                knownBranch = True
 202                break
 203#            if relativePath.startswith(branch):
 204            if isSubPathOf(relativePath, branch):
 205                knownBranch = True
 206                break
 207#            if branch.startswith(relativePath):
 208            if isSubPathOf(branch, relativePath):
 209                branches.remove(branch)
 210                break
 211
 212        if knownBranch:
 213            continue
 214
 215        for branch in knownBranches:
 216            #if relativePath.startswith(branch):
 217            if isSubPathOf(relativePath, branch):
 218                if len(branches) == 0:
 219                    relativePath = branch
 220                else:
 221                    knownBranch = True
 222                break
 223
 224        if knownBranch:
 225            continue
 226
 227        branches.add(relativePath)
 228        knownBranches.add(relativePath)
 229
 230    return branches
 231
 232def findBranchParent(branchPrefix, files):
 233    for file in files:
 234        path = file["path"]
 235        if not path.startswith(branchPrefix):
 236            continue
 237        action = file["action"]
 238        if action != "integrate" and action != "branch":
 239            continue
 240        rev = file["rev"]
 241        depotPath = path + "#" + rev
 242
 243        log = p4CmdList("filelog \"%s\"" % depotPath)
 244        if len(log) != 1:
 245            print "eek! I got confused by the filelog of %s" % depotPath
 246            sys.exit(1);
 247
 248        log = log[0]
 249        if log["action0"] != action:
 250            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 251            sys.exit(1);
 252
 253        branchAction = log["how0,0"]
 254#        if branchAction == "branch into" or branchAction == "ignored":
 255#            continue # ignore for branching
 256
 257        if not branchAction.endswith(" from"):
 258            continue # ignore for branching
 259#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 260#            sys.exit(1);
 261
 262        source = log["file0,0"]
 263        if source.startswith(branchPrefix):
 264            continue
 265
 266        lastSourceRev = log["erev0,0"]
 267
 268        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 269        if len(sourceLog) != 1:
 270            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 271            sys.exit(1);
 272        sourceLog = sourceLog[0]
 273
 274        relPath = source[len(globalPrefix):]
 275        # strip off the filename
 276        relPath = relPath[0:relPath.rfind("/")]
 277
 278        for branch in knownBranches:
 279            if isSubPathOf(relPath, branch):
 280#                print "determined parent branch branch %s due to change in file %s" % (branch, source)
 281                return branch
 282#            else:
 283#                print "%s is not a subpath of branch %s" % (relPath, branch)
 284
 285    return ""
 286
 287def commit(details, files, branch, branchPrefix, parent, merged = ""):
 288    global users
 289    global lastChange
 290    global committedChanges
 291
 292    epoch = details["time"]
 293    author = details["user"]
 294
 295    gitStream.write("commit %s\n" % branch)
 296#    gitStream.write("mark :%s\n" % details["change"])
 297    committedChanges.add(int(details["change"]))
 298    committer = ""
 299    if author in users:
 300        committer = "%s %s %s" % (users[author], epoch, tz)
 301    else:
 302        committer = "%s <a@b> %s %s" % (author, epoch, tz)
 303
 304    gitStream.write("committer %s\n" % committer)
 305
 306    gitStream.write("data <<EOT\n")
 307    gitStream.write(details["desc"])
 308    gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
 309    gitStream.write("EOT\n\n")
 310
 311    if len(parent) > 0:
 312        gitStream.write("from %s\n" % parent)
 313
 314    if len(merged) > 0:
 315        gitStream.write("merge %s\n" % merged)
 316
 317    for file in files:
 318        path = file["path"]
 319        if not path.startswith(branchPrefix):
 320#            if not silent:
 321#                print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
 322            continue
 323        rev = file["rev"]
 324        depotPath = path + "#" + rev
 325        relPath = path[len(branchPrefix):]
 326        action = file["action"]
 327
 328        if action == "delete":
 329            gitStream.write("D %s\n" % relPath)
 330        else:
 331            mode = 644
 332            if file["type"].startswith("x"):
 333                mode = 755
 334
 335            data = p4File(depotPath)
 336
 337            gitStream.write("M %s inline %s\n" % (mode, relPath))
 338            gitStream.write("data %s\n" % len(data))
 339            gitStream.write(data)
 340            gitStream.write("\n")
 341
 342    gitStream.write("\n")
 343
 344    lastChange = int(details["change"])
 345
 346def extractFilesInCommitToBranch(files, branchPrefix):
 347    newFiles = []
 348
 349    for file in files:
 350        path = file["path"]
 351        if path.startswith(branchPrefix):
 352            newFiles.append(file)
 353
 354    return newFiles
 355
 356def findBranchSourceHeuristic(files, branch, branchPrefix):
 357    for file in files:
 358        action = file["action"]
 359        if action != "integrate" and action != "branch":
 360            continue
 361        path = file["path"]
 362        rev = file["rev"]
 363        depotPath = path + "#" + rev
 364
 365        log = p4CmdList("filelog \"%s\"" % depotPath)
 366        if len(log) != 1:
 367            print "eek! I got confused by the filelog of %s" % depotPath
 368            sys.exit(1);
 369
 370        log = log[0]
 371        if log["action0"] != action:
 372            print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
 373            sys.exit(1);
 374
 375        branchAction = log["how0,0"]
 376
 377        if not branchAction.endswith(" from"):
 378            continue # ignore for branching
 379#            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
 380#            sys.exit(1);
 381
 382        source = log["file0,0"]
 383        if source.startswith(branchPrefix):
 384            continue
 385
 386        lastSourceRev = log["erev0,0"]
 387
 388        sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
 389        if len(sourceLog) != 1:
 390            print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
 391            sys.exit(1);
 392        sourceLog = sourceLog[0]
 393
 394        relPath = source[len(globalPrefix):]
 395        # strip off the filename
 396        relPath = relPath[0:relPath.rfind("/")]
 397
 398        for candidate in knownBranches:
 399            if isSubPathOf(relPath, candidate) and candidate != branch:
 400                return candidate
 401
 402    return ""
 403
 404def changeIsBranchMerge(sourceBranch, destinationBranch, change):
 405    sourceFiles = {}
 406    for file in p4CmdList("files %s...@%s" % (globalPrefix + sourceBranch + "/", change)):
 407        if file["action"] == "delete":
 408            continue
 409        sourceFiles[file["depotFile"]] = file
 410
 411    destinationFiles = {}
 412    for file in p4CmdList("files %s...@%s" % (globalPrefix + destinationBranch + "/", change)):
 413        destinationFiles[file["depotFile"]] = file
 414
 415    for fileName in sourceFiles.keys():
 416        integrations = []
 417        deleted = False
 418        integrationCount = 0
 419        for integration in p4CmdList("integrated \"%s\"" % fileName):
 420            toFile = integration["fromFile"] # yes, it's true, it's fromFile
 421            if not toFile in destinationFiles:
 422                continue
 423            destFile = destinationFiles[toFile]
 424            if destFile["action"] == "delete":
 425#                print "file %s has been deleted in %s" % (fileName, toFile)
 426                deleted = True
 427                break
 428            integrationCount += 1
 429            if integration["how"] == "branch from":
 430                continue
 431
 432            if int(integration["change"]) == change:
 433                integrations.append(integration)
 434                continue
 435            if int(integration["change"]) > change:
 436                continue
 437
 438            destRev = int(destFile["rev"])
 439
 440            startRev = integration["startFromRev"][1:]
 441            if startRev == "none":
 442                startRev = 0
 443            else:
 444                startRev = int(startRev)
 445
 446            endRev = integration["endFromRev"][1:]
 447            if endRev == "none":
 448                endRev = 0
 449            else:
 450                endRev = int(endRev)
 451
 452            initialBranch = (destRev == 1 and integration["how"] != "branch into")
 453            inRange = (destRev >= startRev and destRev <= endRev)
 454            newer = (destRev > startRev and destRev > endRev)
 455
 456            if initialBranch or inRange or newer:
 457                integrations.append(integration)
 458
 459        if deleted:
 460            continue
 461
 462        if len(integrations) == 0 and integrationCount > 1:
 463            print "file %s was not integrated from %s into %s" % (fileName, sourceBranch, destinationBranch)
 464            return False
 465
 466    return True
 467
 468def getUserMap():
 469    users = {}
 470
 471    for output in p4CmdList("users"):
 472        if not output.has_key("User"):
 473            continue
 474        users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
 475    return users
 476
 477users = getUserMap()
 478
 479if len(changeRange) == 0:
 480    try:
 481        sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
 482        output = sout.read()
 483        if output.endswith("\n"):
 484            output = output[:-1]
 485        tagIdx = output.index(" tags/p4/")
 486        caretIdx = output.find("^")
 487        endPos = len(output)
 488        if caretIdx != -1:
 489            endPos = caretIdx
 490        rev = int(output[tagIdx + 9 : endPos]) + 1
 491        changeRange = "@%s,#head" % rev
 492        initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
 493        initialTag = "p4/%s" % (int(rev) - 1)
 494    except:
 495        pass
 496
 497tz = - time.timezone / 36
 498tzsign = ("%s" % tz)[0]
 499if tzsign != '+' and tzsign != '-':
 500    tz = "+" + ("%s" % tz)
 501
 502gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
 503
 504if len(revision) > 0:
 505    print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
 506
 507    details = { "user" : "git perforce import user", "time" : int(time.time()) }
 508    details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
 509    details["change"] = revision
 510    newestRevision = 0
 511
 512    fileCnt = 0
 513    for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
 514        change = int(info["change"])
 515        if change > newestRevision:
 516            newestRevision = change
 517
 518        if info["action"] == "delete":
 519            continue
 520
 521        for prop in [ "depotFile", "rev", "action", "type" ]:
 522            details["%s%s" % (prop, fileCnt)] = info[prop]
 523
 524        fileCnt = fileCnt + 1
 525
 526    details["change"] = newestRevision
 527
 528    try:
 529        commit(details, extractFilesFromCommit(details), branch, globalPrefix)
 530    except:
 531        print gitError.read()
 532
 533else:
 534    changes = []
 535
 536    if len(changesFile) > 0:
 537        output = open(changesFile).readlines()
 538        changeSet = Set()
 539        for line in output:
 540            changeSet.add(int(line))
 541
 542        for change in changeSet:
 543            changes.append(change)
 544
 545        changes.sort()
 546    else:
 547        output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
 548
 549        for line in output:
 550            changeNum = line.split(" ")[1]
 551            changes.append(changeNum)
 552
 553        changes.reverse()
 554
 555    if len(changes) == 0:
 556        if not silent:
 557            print "no changes to import!"
 558        sys.exit(1)
 559
 560    cnt = 1
 561    for change in changes:
 562        description = p4Cmd("describe %s" % change)
 563
 564        if not silent:
 565            sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
 566            sys.stdout.flush()
 567        cnt = cnt + 1
 568
 569        try:
 570            files = extractFilesFromCommit(description)
 571            if detectBranches:
 572                for branch in branchesForCommit(files):
 573                    knownBranches.add(branch)
 574                    branchPrefix = globalPrefix + branch + "/"
 575
 576                    filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
 577
 578                    merged = ""
 579                    parent = ""
 580                    ########### remove cnt!!!
 581                    if branch not in createdBranches and cnt > 2:
 582                        createdBranches.add(branch)
 583                        parent = findBranchParent(branchPrefix, files)
 584                        if parent == branch:
 585                            parent = ""
 586    #                    elif len(parent) > 0:
 587    #                        print "%s branched off of %s" % (branch, parent)
 588
 589                    if len(parent) == 0:
 590                        merged = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
 591                        if len(merged) > 0:
 592                            print "change %s could be a merge from %s into %s" % (description["change"], merged, branch)
 593                            if not changeIsBranchMerge(merged, branch, int(description["change"])):
 594                                merged = ""
 595
 596                    branch = "refs/heads/" + branch
 597                    if len(parent) > 0:
 598                        parent = "refs/heads/" + parent
 599                    if len(merged) > 0:
 600                        merged = "refs/heads/" + merged
 601                    commit(description, files, branch, branchPrefix, parent, merged)
 602            else:
 603                commit(description, filesForCommit, branch, globalPrefix, initialParent)
 604                initialParent = ""
 605        except IOError:
 606            print gitError.read()
 607            sys.exit(1)
 608
 609if not silent:
 610    print ""
 611
 612gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
 613gitStream.write("from %s\n\n" % branch);
 614
 615
 616gitStream.close()
 617gitOutput.close()
 618gitError.close()
 619
 620os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
 621if len(initialTag) > 0:
 622    os.popen("git tag -d %s" % initialTag).read()
 623
 624sys.exit(0)