Merge branch 'jk/repack-no-explode-objects-from-old-pack' into maint
[gitweb.git] / contrib / fast-import / git-p4
index 3e1aa276cff93172cb476c54369c35fd255647b8..c5362c4c11d00e169781ff7da89c88a3051f6227 100755 (executable)
@@ -10,7 +10,7 @@
 
 import optparse, sys, os, marshal, subprocess, shelve
 import tempfile, getopt, os.path, time, platform
-import re
+import re, shutil
 
 verbose = False
 
@@ -38,7 +38,7 @@ def p4_build_cmd(cmd):
 
     host = gitConfig("git-p4.host")
     if len(host) > 0:
-        real_cmd += ["-h", host]
+        real_cmd += ["-H", host]
 
     client = gitConfig("git-p4.client")
     if len(client) > 0:
@@ -186,6 +186,47 @@ def split_p4_type(p4type):
         mods = s[1]
     return (base, mods)
 
+#
+# return the raw p4 type of a file (text, text+ko, etc)
+#
+def p4_type(file):
+    results = p4CmdList(["fstat", "-T", "headType", file])
+    return results[0]['headType']
+
+#
+# Given a type base and modifier, return a regexp matching
+# the keywords that can be expanded in the file
+#
+def p4_keywords_regexp_for_type(base, type_mods):
+    if base in ("text", "unicode", "binary"):
+        kwords = None
+        if "ko" in type_mods:
+            kwords = 'Id|Header'
+        elif "k" in type_mods:
+            kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
+        else:
+            return None
+        pattern = r"""
+            \$              # Starts with a dollar, followed by...
+            (%s)            # one of the keywords, followed by...
+            (:[^$]+)?       # possibly an old expansion, followed by...
+            \$              # another dollar
+            """ % kwords
+        return pattern
+    else:
+        return None
+
+#
+# Given a file, return a regexp matching the possible
+# RCS keywords that will be expanded, or None for files
+# with kw expansion turned off.
+#
+def p4_keywords_regexp_for_file(file):
+    if not os.path.exists(file):
+        return None
+    else:
+        (type_base, type_mods) = split_p4_type(p4_type(file))
+        return p4_keywords_regexp_for_type(type_base, type_mods)
 
 def setP4ExecBit(file, mode):
     # Reopens an already open file and changes the execute bit to match
@@ -555,6 +596,46 @@ def p4PathStartsWith(path, prefix):
         return path.lower().startswith(prefix.lower())
     return path.startswith(prefix)
 
+def getClientSpec():
+    """Look at the p4 client spec, create a View() object that contains
+       all the mappings, and return it."""
+
+    specList = p4CmdList("client -o")
+    if len(specList) != 1:
+        die('Output from "client -o" is %d lines, expecting 1' %
+            len(specList))
+
+    # dictionary of all client parameters
+    entry = specList[0]
+
+    # just the keys that start with "View"
+    view_keys = [ k for k in entry.keys() if k.startswith("View") ]
+
+    # hold this new View
+    view = View()
+
+    # append the lines, in order, to the view
+    for view_num in range(len(view_keys)):
+        k = "View%d" % view_num
+        if k not in view_keys:
+            die("Expected view key %s missing" % k)
+        view.append(entry[k])
+
+    return view
+
+def getClientRoot():
+    """Grab the client directory."""
+
+    output = p4CmdList("client -o")
+    if len(output) != 1:
+        die('Output from "client -o" is %d lines, expecting 1' % len(output))
+
+    entry = output[0]
+    if "Root" not in entry:
+        die('Client has no "Root"')
+
+    return entry["Root"]
+
 class Command:
     def __init__(self):
         self.usage = "usage: %prog [options]"
@@ -563,6 +644,26 @@ class Command:
 class P4UserMap:
     def __init__(self):
         self.userMapFromPerforceServer = False
+        self.myP4UserId = None
+
+    def p4UserId(self):
+        if self.myP4UserId:
+            return self.myP4UserId
+
+        results = p4CmdList("user -o")
+        for r in results:
+            if r.has_key('User'):
+                self.myP4UserId = r['User']
+                return r['User']
+        die("Could not find your p4 user id")
+
+    def p4UserIsMe(self, p4User):
+        # return True if the given p4 user is actually me
+        me = self.p4UserId()
+        if not p4User or p4User != me:
+            return False
+        else:
+            return True
 
     def getUserCacheFilename(self):
         home = os.environ.get("HOME", os.environ.get("USERPROFILE"))
@@ -700,7 +801,6 @@ class P4Submit(Command, P4UserMap):
         self.verbose = False
         self.preserveUser = gitConfig("git-p4.preserveUser").lower() == "true"
         self.isWindows = (platform.system() == "Windows")
-        self.myP4UserId = None
 
     def check(self):
         if len(p4CmdList("opened ...")) > 0:
@@ -734,6 +834,29 @@ class P4Submit(Command, P4UserMap):
 
         return result
 
+    def patchRCSKeywords(self, file, pattern):
+        # Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern
+        (handle, outFileName) = tempfile.mkstemp(dir='.')
+        try:
+            outFile = os.fdopen(handle, "w+")
+            inFile = open(file, "r")
+            regexp = re.compile(pattern, re.VERBOSE)
+            for line in inFile.readlines():
+                line = regexp.sub(r'$\1$', line)
+                outFile.write(line)
+            inFile.close()
+            outFile.close()
+            # Forcibly overwrite the original file
+            os.unlink(file)
+            shutil.move(outFileName, file)
+        except:
+            # cleanup our temporary file
+            os.unlink(outFileName)
+            print "Failed to strip RCS keywords in %s" % file
+            raise
+
+        print "Patched up RCS keywords in %s" % file
+
     def p4UserForCommit(self,id):
         # Return the tuple (perforce user,git email) for a given git commit id
         self.getUserMapFromPerforceServer()
@@ -799,7 +922,7 @@ class P4Submit(Command, P4UserMap):
     def canChangeChangelists(self):
         # check to see if we have p4 admin or super-user permissions, either of
         # which are required to modify changelists.
-        results = p4CmdList("protects %s" % self.depotPath)
+        results = p4CmdList(["protects", self.depotPath])
         for r in results:
             if r.has_key('perm'):
                 if r['perm'] == 'admin':
@@ -808,25 +931,6 @@ class P4Submit(Command, P4UserMap):
                     return 1
         return 0
 
-    def p4UserId(self):
-        if self.myP4UserId:
-            return self.myP4UserId
-
-        results = p4CmdList("user -o")
-        for r in results:
-            if r.has_key('User'):
-                self.myP4UserId = r['User']
-                return r['User']
-        die("Could not find your p4 user id")
-
-    def p4UserIsMe(self, p4User):
-        # return True if the given p4 user is actually me
-        me = self.p4UserId()
-        if not p4User or p4User != me:
-            return False
-        else:
-            return True
-
     def prepareSubmitTemplate(self):
         # remove lines in the Files section that show changes to files outside the depot path we're committing into
         template = ""
@@ -918,6 +1022,7 @@ class P4Submit(Command, P4UserMap):
         filesToDelete = set()
         editedFiles = set()
         filesToChangeExecBit = {}
+
         for line in diff:
             diff = parseDiffTreeEntry(line)
             modifier = diff['status']
@@ -964,9 +1069,45 @@ class P4Submit(Command, P4UserMap):
         patchcmd = diffcmd + " | git apply "
         tryPatchCmd = patchcmd + "--check -"
         applyPatchCmd = patchcmd + "--check --apply -"
+        patch_succeeded = True
 
         if os.system(tryPatchCmd) != 0:
+            fixed_rcs_keywords = False
+            patch_succeeded = False
             print "Unfortunately applying the change failed!"
+
+            # Patch failed, maybe it's just RCS keyword woes. Look through
+            # the patch to see if that's possible.
+            if gitConfig("git-p4.attemptRCSCleanup","--bool") == "true":
+                file = None
+                pattern = None
+                kwfiles = {}
+                for file in editedFiles | filesToDelete:
+                    # did this file's delta contain RCS keywords?
+                    pattern = p4_keywords_regexp_for_file(file)
+
+                    if pattern:
+                        # this file is a possibility...look for RCS keywords.
+                        regexp = re.compile(pattern, re.VERBOSE)
+                        for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
+                            if regexp.search(line):
+                                if verbose:
+                                    print "got keyword match on %s in %s in %s" % (pattern, line, file)
+                                kwfiles[file] = pattern
+                                break
+
+                for file in kwfiles:
+                    if verbose:
+                        print "zapping %s with %s" % (line,pattern)
+                    self.patchRCSKeywords(file, kwfiles[file])
+                    fixed_rcs_keywords = True
+
+            if fixed_rcs_keywords:
+                print "Retrying the patch with RCS keywords cleaned up"
+                if os.system(tryPatchCmd) == 0:
+                    patch_succeeded = True
+
+        if not patch_succeeded:
             print "What do you want to do?"
             response = "x"
             while response != "s" and response != "a" and response != "w":
@@ -1119,11 +1260,20 @@ class P4Submit(Command, P4UserMap):
             print "Internal error: cannot locate perforce depot path from existing branches"
             sys.exit(128)
 
-        self.clientPath = p4Where(self.depotPath)
+        self.useClientSpec = False
+        if gitConfig("git-p4.useclientspec", "--bool") == "true":
+            self.useClientSpec = True
+        if self.useClientSpec:
+            self.clientSpecDirs = getClientSpec()
 
-        if len(self.clientPath) == 0:
-            print "Error: Cannot locate perforce checkout of %s in client view" % self.depotPath
-            sys.exit(128)
+        if self.useClientSpec:
+            # all files are relative to the client spec
+            self.clientPath = getClientRoot()
+        else:
+            self.clientPath = p4Where(self.depotPath)
+
+        if self.clientPath == "":
+            die("Error: Cannot locate perforce checkout of %s in client view" % self.depotPath)
 
         print "Perforce checkout for depot path %s located at %s" % (self.depotPath, self.clientPath)
         self.oldWorkingDirectory = os.getcwd()
@@ -1207,8 +1357,8 @@ class View(object):
                 die("Can't handle * wildcards in view: %s" % self.path)
             triple_dot_index = self.path.find("...")
             if triple_dot_index >= 0:
-                if not self.path.endswith("..."):
-                    die("Can handle ... wildcard only at end of path: %s" %
+                if triple_dot_index != len(self.path) - 3:
+                    die("Can handle only single ... wildcard, at end: %s" %
                         self.path)
                 self.ends_triple_dot = True
 
@@ -1263,7 +1413,7 @@ class View(object):
             if self.exclude:
                 c = "-"
             return "View.Mapping: %s%s -> %s" % \
-                   (c, self.depot_side, self.client_side)
+                   (c, self.depot_side.path, self.client_side.path)
 
         def map_depot_to_client(self, depot_path):
             """Calculate the client path if using this mapping on the
@@ -1363,7 +1513,8 @@ class View(object):
             else:
                 # This mapping matched; no need to search any further.
                 # But, the mapping could be rejected if the client path
-                # has already been claimed by an earlier mapping.
+                # has already been claimed by an earlier mapping (i.e.
+                # one later in the list, which we are walking backwards).
                 already_mapped_in_client = False
                 for f in paths_filled:
                     # this is View.Path.match
@@ -1428,7 +1579,10 @@ class P4Sync(Command, P4UserMap):
         self.p4BranchesInGit = []
         self.cloneExclude = []
         self.useClientSpec = False
+        self.useClientSpec_from_options = False
         self.clientSpecDirs = None
+        self.tempBranches = []
+        self.tempBranchLocation = "git-p4-tmp"
 
         if gitConfig("git-p4.syncFromOrigin") == "false":
             self.syncWithOrigin = False
@@ -1450,6 +1604,14 @@ class P4Sync(Command, P4UserMap):
                    .replace("%25", "%")
         return path
 
+    # Force a checkpoint in fast-import and wait for it to finish
+    def checkpoint(self):
+        self.gitStream.write("checkpoint\n\n")
+        self.gitStream.write("progress checkpoint\n\n")
+        out = self.gitOutput.readline()
+        if self.verbose:
+            print "checkpoint finished: " + out
+
     def extractFilesFromCommit(self, commit):
         self.cloneExclude = [re.sub(r"\.\.\.$", "", path)
                              for path in self.cloneExclude]
@@ -1574,15 +1736,12 @@ class P4Sync(Command, P4UserMap):
 
         # Note that we do not try to de-mangle keywords on utf16 files,
         # even though in theory somebody may want that.
-        if type_base in ("text", "unicode", "binary"):
-            if "ko" in type_mods:
-                text = ''.join(contents)
-                text = re.sub(r'\$(Id|Header):[^$]*\$', r'$\1$', text)
-                contents = [ text ]
-            elif "k" in type_mods:
-                text = ''.join(contents)
-                text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$]*\$', r'$\1$', text)
-                contents = [ text ]
+        pattern = p4_keywords_regexp_for_type(type_base, type_mods)
+        if pattern:
+            regexp = re.compile(pattern, re.VERBOSE)
+            text = ''.join(contents)
+            text = regexp.sub(r'$\1$', text)
+            contents = [ text ]
 
         self.gitStream.write("M %s inline %s\n" % (git_mode, relPath))
 
@@ -1664,6 +1823,12 @@ class P4Sync(Command, P4UserMap):
             if self.stream_file.has_key('depotFile'):
                 self.streamOneP4File(self.stream_file, self.stream_contents)
 
+    def make_email(self, userid):
+        if userid in self.users:
+            return self.users[userid]
+        else:
+            return "%s <a@b>" % userid
+
     def commit(self, details, files, branch, branchPrefixes, parent = ""):
         epoch = details["time"]
         author = details["user"]
@@ -1687,10 +1852,7 @@ class P4Sync(Command, P4UserMap):
         committer = ""
         if author not in self.users:
             self.getUserMapFromPerforceServer()
-        if author in self.users:
-            committer = "%s %s %s" % (self.users[author], epoch, self.tz)
-        else:
-            committer = "%s <a@b> %s %s" % (author, epoch, self.tz)
+        committer = "%s %s %s" % (self.make_email(author), epoch, self.tz)
 
         self.gitStream.write("committer %s\n" % committer)
 
@@ -1735,15 +1897,21 @@ class P4Sync(Command, P4UserMap):
                     self.gitStream.write("from %s\n" % branch)
 
                     owner = labelDetails["Owner"]
-                    tagger = ""
-                    if author in self.users:
-                        tagger = "%s %s %s" % (self.users[owner], epoch, self.tz)
+
+                    # Try to use the owner of the p4 label, or failing that,
+                    # the current p4 user id.
+                    if owner:
+                        email = self.make_email(owner)
                     else:
-                        tagger = "%s <a@b> %s %s" % (owner, epoch, self.tz)
+                        email = self.make_email(self.p4UserId())
+                    tagger = "%s %s %s" % (email, epoch, self.tz)
+
                     self.gitStream.write("tagger %s\n" % tagger)
-                    self.gitStream.write("data <<EOT\n")
-                    self.gitStream.write(labelDetails["Description"])
-                    self.gitStream.write("EOT\n\n")
+
+                    description = labelDetails["Description"]
+                    self.gitStream.write("data %d\n" % len(description))
+                    self.gitStream.write(description)
+                    self.gitStream.write("\n")
 
                 else:
                     if not self.silent:
@@ -1758,7 +1926,7 @@ class P4Sync(Command, P4UserMap):
     def getLabels(self):
         self.labels = {}
 
-        l = p4CmdList("labels %s..." % ' '.join (self.depotPaths))
+        l = p4CmdList(["labels"] + ["%s..." % p for p in self.depotPaths])
         if len(l) > 0 and not self.silent:
             print "Finding files belonging to labels in %s" % `self.depotPaths`
 
@@ -1800,7 +1968,7 @@ class P4Sync(Command, P4UserMap):
             command = "branches"
 
         for info in p4CmdList(command):
-            details = p4Cmd("branch -o %s" % info["branch"])
+            details = p4Cmd(["branch", "-o", info["branch"]])
             viewIdx = 0
             while details.has_key("View%s" % viewIdx):
                 paths = details["View%s" % viewIdx].split(" ")
@@ -1938,7 +2106,7 @@ class P4Sync(Command, P4UserMap):
         sourceRef = self.gitRefForBranch(sourceBranch)
         #print "source " + sourceBranch
 
-        branchParentChange = int(p4Cmd("changes -m 1 %s...@1,%s" % (sourceDepotPath, firstChange))["change"])
+        branchParentChange = int(p4Cmd(["changes", "-m", "1", "%s...@1,%s" % (sourceDepotPath, firstChange)])["change"])
         #print "branch parent: %s" % branchParentChange
         gitParent = self.gitCommitByP4Change(sourceRef, branchParentChange)
         if len(gitParent) > 0:
@@ -1948,10 +2116,24 @@ class P4Sync(Command, P4UserMap):
         self.importChanges(changes)
         return True
 
+    def searchParent(self, parent, branch, target):
+        parentFound = False
+        for blob in read_pipe_lines(["git", "rev-list", "--reverse", "--no-merges", parent]):
+            blob = blob.strip()
+            if len(read_pipe(["git", "diff-tree", blob, target])) == 0:
+                parentFound = True
+                if self.verbose:
+                    print "Found parent of %s in commit %s" % (branch, blob)
+                break
+        if parentFound:
+            return blob
+        else:
+            return None
+
     def importChanges(self, changes):
         cnt = 1
         for change in changes:
-            description = p4Cmd("describe %s" % change)
+            description = p4Cmd(["describe", str(change)])
             self.updateOptionDict(description)
 
             if not self.silent:
@@ -2004,7 +2186,21 @@ class P4Sync(Command, P4UserMap):
                             parent = self.initialParents[branch]
                             del self.initialParents[branch]
 
-                        self.commit(description, filesForCommit, branch, [branchPrefix], parent)
+                        blob = None
+                        if len(parent) > 0:
+                            tempBranch = os.path.join(self.tempBranchLocation, "%d" % (change))
+                            if self.verbose:
+                                print "Creating temporary branch: " + tempBranch
+                            self.commit(description, filesForCommit, tempBranch, [branchPrefix])
+                            self.tempBranches.append(tempBranch)
+                            self.checkpoint()
+                            blob = self.searchParent(parent, branch, tempBranch)
+                        if blob:
+                            self.commit(description, filesForCommit, branch, [branchPrefix], blob)
+                        else:
+                            if self.verbose:
+                                print "Parent of %s not found. Committing into head of %s" % (branch, parent)
+                            self.commit(description, filesForCommit, branch, [branchPrefix], parent)
                 else:
                     files = self.extractFilesFromCommit(description)
                     self.commit(description, files, self.branch, self.depotPaths,
@@ -2077,33 +2273,6 @@ class P4Sync(Command, P4UserMap):
             print self.gitError.read()
 
 
-    def getClientSpec(self):
-        specList = p4CmdList("client -o")
-        if len(specList) != 1:
-            die('Output from "client -o" is %d lines, expecting 1' %
-                len(specList))
-
-        # dictionary of all client parameters
-        entry = specList[0]
-
-        # just the keys that start with "View"
-        view_keys = [ k for k in entry.keys() if k.startswith("View") ]
-
-        # hold this new View
-        view = View()
-
-        # append the lines, in order, to the view
-        for view_num in range(len(view_keys)):
-            k = "View%d" % view_num
-            if k not in view_keys:
-                die("Expected view key %s missing" % k)
-            view.append(entry[k])
-
-        self.clientSpecDirs = view
-        if self.verbose:
-            for i, m in enumerate(self.clientSpecDirs.mappings):
-                    print "clientSpecDirs %d: %s" % (i, str(m))
-
     def run(self, args):
         self.depotPaths = []
         self.changeRange = ""
@@ -2136,11 +2305,15 @@ class P4Sync(Command, P4UserMap):
             if not gitBranchExists(self.refPrefix + "HEAD") and self.importIntoRemotes and gitBranchExists(self.branch):
                 system("git symbolic-ref %sHEAD %s" % (self.refPrefix, self.branch))
 
-        if not self.useClientSpec:
+        # accept either the command-line option, or the configuration variable
+        if self.useClientSpec:
+            # will use this after clone to set the variable
+            self.useClientSpec_from_options = True
+        else:
             if gitConfig("git-p4.useclientspec", "--bool") == "true":
                 self.useClientSpec = True
         if self.useClientSpec:
-            self.getClientSpec()
+            self.clientSpecDirs = getClientSpec()
 
         # TODO: should always look at previous commits,
         # merge with previous imports, if possible.
@@ -2339,6 +2512,12 @@ class P4Sync(Command, P4UserMap):
         self.gitOutput.close()
         self.gitError.close()
 
+        # Cleanup temporary branches created during import
+        if self.tempBranches != []:
+            for branch in self.tempBranches:
+                read_pipe("git update-ref -d %s" % branch)
+            os.rmdir(os.path.join(os.environ.get("GIT_DIR", ".git"), self.tempBranchLocation))
+
         return True
 
 class P4Rebase(Command):
@@ -2455,6 +2634,10 @@ class P4Clone(P4Sync):
             else:
                 print "Could not detect main branch. No checkout/master branch created."
 
+        # auto-set this variable if invoked with --use-client-spec
+        if self.useClientSpec_from_options:
+            system("git config --bool git-p4.useclientspec true")
+
         return True
 
 class P4Branches(Command):