git-p4: add initial support for RCS keywords
[gitweb.git] / contrib / fast-import / git-p4
index 20208bfbc0c38e10f3265cbd80592628a76f3ff1..c8b6c8abe47ec1c1b8a71ebc78db2d76aa43c444 100755 (executable)
@@ -10,7 +10,7 @@
 
 import optparse, sys, os, marshal, subprocess, shelve
 import tempfile, getopt, os.path, time, platform
-import re
+import re, shutil
 
 verbose = False
 
@@ -186,6 +186,47 @@ def split_p4_type(p4type):
         mods = s[1]
     return (base, mods)
 
+#
+# return the raw p4 type of a file (text, text+ko, etc)
+#
+def p4_type(file):
+    results = p4CmdList(["fstat", "-T", "headType", file])
+    return results[0]['headType']
+
+#
+# Given a type base and modifier, return a regexp matching
+# the keywords that can be expanded in the file
+#
+def p4_keywords_regexp_for_type(base, type_mods):
+    if base in ("text", "unicode", "binary"):
+        kwords = None
+        if "ko" in type_mods:
+            kwords = 'Id|Header'
+        elif "k" in type_mods:
+            kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
+        else:
+            return None
+        pattern = r"""
+            \$              # Starts with a dollar, followed by...
+            (%s)            # one of the keywords, followed by...
+            (:[^$]+)?       # possibly an old expansion, followed by...
+            \$              # another dollar
+            """ % kwords
+        return pattern
+    else:
+        return None
+
+#
+# Given a file, return a regexp matching the possible
+# RCS keywords that will be expanded, or None for files
+# with kw expansion turned off.
+#
+def p4_keywords_regexp_for_file(file):
+    if not os.path.exists(file):
+        return None
+    else:
+        (type_base, type_mods) = split_p4_type(p4_type(file))
+        return p4_keywords_regexp_for_type(type_base, type_mods)
 
 def setP4ExecBit(file, mode):
     # Reopens an already open file and changes the execute bit to match
@@ -563,6 +604,26 @@ class Command:
 class P4UserMap:
     def __init__(self):
         self.userMapFromPerforceServer = False
+        self.myP4UserId = None
+
+    def p4UserId(self):
+        if self.myP4UserId:
+            return self.myP4UserId
+
+        results = p4CmdList("user -o")
+        for r in results:
+            if r.has_key('User'):
+                self.myP4UserId = r['User']
+                return r['User']
+        die("Could not find your p4 user id")
+
+    def p4UserIsMe(self, p4User):
+        # return True if the given p4 user is actually me
+        me = self.p4UserId()
+        if not p4User or p4User != me:
+            return False
+        else:
+            return True
 
     def getUserCacheFilename(self):
         home = os.environ.get("HOME", os.environ.get("USERPROFILE"))
@@ -700,7 +761,6 @@ class P4Submit(Command, P4UserMap):
         self.verbose = False
         self.preserveUser = gitConfig("git-p4.preserveUser").lower() == "true"
         self.isWindows = (platform.system() == "Windows")
-        self.myP4UserId = None
 
     def check(self):
         if len(p4CmdList("opened ...")) > 0:
@@ -734,6 +794,29 @@ class P4Submit(Command, P4UserMap):
 
         return result
 
+    def patchRCSKeywords(self, file, pattern):
+        # Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern
+        (handle, outFileName) = tempfile.mkstemp(dir='.')
+        try:
+            outFile = os.fdopen(handle, "w+")
+            inFile = open(file, "r")
+            regexp = re.compile(pattern, re.VERBOSE)
+            for line in inFile.readlines():
+                line = regexp.sub(r'$\1$', line)
+                outFile.write(line)
+            inFile.close()
+            outFile.close()
+            # Forcibly overwrite the original file
+            os.unlink(file)
+            shutil.move(outFileName, file)
+        except:
+            # cleanup our temporary file
+            os.unlink(outFileName)
+            print "Failed to strip RCS keywords in %s" % file
+            raise
+
+        print "Patched up RCS keywords in %s" % file
+
     def p4UserForCommit(self,id):
         # Return the tuple (perforce user,git email) for a given git commit id
         self.getUserMapFromPerforceServer()
@@ -799,7 +882,7 @@ class P4Submit(Command, P4UserMap):
     def canChangeChangelists(self):
         # check to see if we have p4 admin or super-user permissions, either of
         # which are required to modify changelists.
-        results = p4CmdList("protects %s" % self.depotPath)
+        results = p4CmdList(["protects", self.depotPath])
         for r in results:
             if r.has_key('perm'):
                 if r['perm'] == 'admin':
@@ -808,25 +891,6 @@ class P4Submit(Command, P4UserMap):
                     return 1
         return 0
 
-    def p4UserId(self):
-        if self.myP4UserId:
-            return self.myP4UserId
-
-        results = p4CmdList("user -o")
-        for r in results:
-            if r.has_key('User'):
-                self.myP4UserId = r['User']
-                return r['User']
-        die("Could not find your p4 user id")
-
-    def p4UserIsMe(self, p4User):
-        # return True if the given p4 user is actually me
-        me = self.p4UserId()
-        if not p4User or p4User != me:
-            return False
-        else:
-            return True
-
     def prepareSubmitTemplate(self):
         # remove lines in the Files section that show changes to files outside the depot path we're committing into
         template = ""
@@ -918,6 +982,7 @@ class P4Submit(Command, P4UserMap):
         filesToDelete = set()
         editedFiles = set()
         filesToChangeExecBit = {}
+
         for line in diff:
             diff = parseDiffTreeEntry(line)
             modifier = diff['status']
@@ -964,9 +1029,45 @@ class P4Submit(Command, P4UserMap):
         patchcmd = diffcmd + " | git apply "
         tryPatchCmd = patchcmd + "--check -"
         applyPatchCmd = patchcmd + "--check --apply -"
+        patch_succeeded = True
 
         if os.system(tryPatchCmd) != 0:
+            fixed_rcs_keywords = False
+            patch_succeeded = False
             print "Unfortunately applying the change failed!"
+
+            # Patch failed, maybe it's just RCS keyword woes. Look through
+            # the patch to see if that's possible.
+            if gitConfig("git-p4.attemptRCSCleanup","--bool") == "true":
+                file = None
+                pattern = None
+                kwfiles = {}
+                for file in editedFiles | filesToDelete:
+                    # did this file's delta contain RCS keywords?
+                    pattern = p4_keywords_regexp_for_file(file)
+
+                    if pattern:
+                        # this file is a possibility...look for RCS keywords.
+                        regexp = re.compile(pattern, re.VERBOSE)
+                        for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
+                            if regexp.search(line):
+                                if verbose:
+                                    print "got keyword match on %s in %s in %s" % (pattern, line, file)
+                                kwfiles[file] = pattern
+                                break
+
+                for file in kwfiles:
+                    if verbose:
+                        print "zapping %s with %s" % (line,pattern)
+                    self.patchRCSKeywords(file, kwfiles[file])
+                    fixed_rcs_keywords = True
+
+            if fixed_rcs_keywords:
+                print "Retrying the patch with RCS keywords cleaned up"
+                if os.system(tryPatchCmd) == 0:
+                    patch_succeeded = True
+
+        if not patch_succeeded:
             print "What do you want to do?"
             response = "x"
             while response != "s" and response != "a" and response != "w":
@@ -1263,7 +1364,7 @@ class View(object):
             if self.exclude:
                 c = "-"
             return "View.Mapping: %s%s -> %s" % \
-                   (c, self.depot_side, self.client_side)
+                   (c, self.depot_side.path, self.client_side.path)
 
         def map_depot_to_client(self, depot_path):
             """Calculate the client path if using this mapping on the
@@ -1363,7 +1464,8 @@ class View(object):
             else:
                 # This mapping matched; no need to search any further.
                 # But, the mapping could be rejected if the client path
-                # has already been claimed by an earlier mapping.
+                # has already been claimed by an earlier mapping (i.e.
+                # one later in the list, which we are walking backwards).
                 already_mapped_in_client = False
                 for f in paths_filled:
                     # this is View.Path.match
@@ -1429,6 +1531,8 @@ class P4Sync(Command, P4UserMap):
         self.cloneExclude = []
         self.useClientSpec = False
         self.clientSpecDirs = None
+        self.tempBranches = []
+        self.tempBranchLocation = "git-p4-tmp"
 
         if gitConfig("git-p4.syncFromOrigin") == "false":
             self.syncWithOrigin = False
@@ -1450,6 +1554,14 @@ class P4Sync(Command, P4UserMap):
                    .replace("%25", "%")
         return path
 
+    # Force a checkpoint in fast-import and wait for it to finish
+    def checkpoint(self):
+        self.gitStream.write("checkpoint\n\n")
+        self.gitStream.write("progress checkpoint\n\n")
+        out = self.gitOutput.readline()
+        if self.verbose:
+            print "checkpoint finished: " + out
+
     def extractFilesFromCommit(self, commit):
         self.cloneExclude = [re.sub(r"\.\.\.$", "", path)
                              for path in self.cloneExclude]
@@ -1574,15 +1686,12 @@ class P4Sync(Command, P4UserMap):
 
         # Note that we do not try to de-mangle keywords on utf16 files,
         # even though in theory somebody may want that.
-        if type_base in ("text", "unicode", "binary"):
-            if "ko" in type_mods:
-                text = ''.join(contents)
-                text = re.sub(r'\$(Id|Header):[^$]*\$', r'$\1$', text)
-                contents = [ text ]
-            elif "k" in type_mods:
-                text = ''.join(contents)
-                text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$]*\$', r'$\1$', text)
-                contents = [ text ]
+        pattern = p4_keywords_regexp_for_type(type_base, type_mods)
+        if pattern:
+            regexp = re.compile(pattern, re.VERBOSE)
+            text = ''.join(contents)
+            text = regexp.sub(r'$\1$', text)
+            contents = [ text ]
 
         self.gitStream.write("M %s inline %s\n" % (git_mode, relPath))
 
@@ -1664,6 +1773,12 @@ class P4Sync(Command, P4UserMap):
             if self.stream_file.has_key('depotFile'):
                 self.streamOneP4File(self.stream_file, self.stream_contents)
 
+    def make_email(self, userid):
+        if userid in self.users:
+            return self.users[userid]
+        else:
+            return "%s <a@b>" % userid
+
     def commit(self, details, files, branch, branchPrefixes, parent = ""):
         epoch = details["time"]
         author = details["user"]
@@ -1687,10 +1802,7 @@ class P4Sync(Command, P4UserMap):
         committer = ""
         if author not in self.users:
             self.getUserMapFromPerforceServer()
-        if author in self.users:
-            committer = "%s %s %s" % (self.users[author], epoch, self.tz)
-        else:
-            committer = "%s <a@b> %s %s" % (author, epoch, self.tz)
+        committer = "%s %s %s" % (self.make_email(author), epoch, self.tz)
 
         self.gitStream.write("committer %s\n" % committer)
 
@@ -1735,15 +1847,21 @@ class P4Sync(Command, P4UserMap):
                     self.gitStream.write("from %s\n" % branch)
 
                     owner = labelDetails["Owner"]
-                    tagger = ""
-                    if author in self.users:
-                        tagger = "%s %s %s" % (self.users[owner], epoch, self.tz)
+
+                    # Try to use the owner of the p4 label, or failing that,
+                    # the current p4 user id.
+                    if owner:
+                        email = self.make_email(owner)
                     else:
-                        tagger = "%s <a@b> %s %s" % (owner, epoch, self.tz)
+                        email = self.make_email(self.p4UserId())
+                    tagger = "%s %s %s" % (email, epoch, self.tz)
+
                     self.gitStream.write("tagger %s\n" % tagger)
-                    self.gitStream.write("data <<EOT\n")
-                    self.gitStream.write(labelDetails["Description"])
-                    self.gitStream.write("EOT\n\n")
+
+                    description = labelDetails["Description"]
+                    self.gitStream.write("data %d\n" % len(description))
+                    self.gitStream.write(description)
+                    self.gitStream.write("\n")
 
                 else:
                     if not self.silent:
@@ -1758,7 +1876,7 @@ class P4Sync(Command, P4UserMap):
     def getLabels(self):
         self.labels = {}
 
-        l = p4CmdList("labels %s..." % ' '.join (self.depotPaths))
+        l = p4CmdList(["labels"] + ["%s..." % p for p in self.depotPaths])
         if len(l) > 0 and not self.silent:
             print "Finding files belonging to labels in %s" % `self.depotPaths`
 
@@ -1800,7 +1918,7 @@ class P4Sync(Command, P4UserMap):
             command = "branches"
 
         for info in p4CmdList(command):
-            details = p4Cmd("branch -o %s" % info["branch"])
+            details = p4Cmd(["branch", "-o", info["branch"]])
             viewIdx = 0
             while details.has_key("View%s" % viewIdx):
                 paths = details["View%s" % viewIdx].split(" ")
@@ -1938,7 +2056,7 @@ class P4Sync(Command, P4UserMap):
         sourceRef = self.gitRefForBranch(sourceBranch)
         #print "source " + sourceBranch
 
-        branchParentChange = int(p4Cmd("changes -m 1 %s...@1,%s" % (sourceDepotPath, firstChange))["change"])
+        branchParentChange = int(p4Cmd(["changes", "-m", "1", "%s...@1,%s" % (sourceDepotPath, firstChange)])["change"])
         #print "branch parent: %s" % branchParentChange
         gitParent = self.gitCommitByP4Change(sourceRef, branchParentChange)
         if len(gitParent) > 0:
@@ -1948,10 +2066,24 @@ class P4Sync(Command, P4UserMap):
         self.importChanges(changes)
         return True
 
+    def searchParent(self, parent, branch, target):
+        parentFound = False
+        for blob in read_pipe_lines(["git", "rev-list", "--reverse", "--no-merges", parent]):
+            blob = blob.strip()
+            if len(read_pipe(["git", "diff-tree", blob, target])) == 0:
+                parentFound = True
+                if self.verbose:
+                    print "Found parent of %s in commit %s" % (branch, blob)
+                break
+        if parentFound:
+            return blob
+        else:
+            return None
+
     def importChanges(self, changes):
         cnt = 1
         for change in changes:
-            description = p4Cmd("describe %s" % change)
+            description = p4Cmd(["describe", str(change)])
             self.updateOptionDict(description)
 
             if not self.silent:
@@ -2004,7 +2136,21 @@ class P4Sync(Command, P4UserMap):
                             parent = self.initialParents[branch]
                             del self.initialParents[branch]
 
-                        self.commit(description, filesForCommit, branch, [branchPrefix], parent)
+                        blob = None
+                        if len(parent) > 0:
+                            tempBranch = os.path.join(self.tempBranchLocation, "%d" % (change))
+                            if self.verbose:
+                                print "Creating temporary branch: " + tempBranch
+                            self.commit(description, filesForCommit, tempBranch, [branchPrefix])
+                            self.tempBranches.append(tempBranch)
+                            self.checkpoint()
+                            blob = self.searchParent(parent, branch, tempBranch)
+                        if blob:
+                            self.commit(description, filesForCommit, branch, [branchPrefix], blob)
+                        else:
+                            if self.verbose:
+                                print "Parent of %s not found. Committing into head of %s" % (branch, parent)
+                            self.commit(description, filesForCommit, branch, [branchPrefix], parent)
                 else:
                     files = self.extractFilesFromCommit(description)
                     self.commit(description, files, self.branch, self.depotPaths,
@@ -2339,6 +2485,12 @@ class P4Sync(Command, P4UserMap):
         self.gitOutput.close()
         self.gitError.close()
 
+        # Cleanup temporary branches created during import
+        if self.tempBranches != []:
+            for branch in self.tempBranches:
+                read_pipe("git update-ref -d %s" % branch)
+            os.rmdir(os.path.join(os.environ.get("GIT_DIR", ".git"), self.tempBranchLocation))
+
         return True
 
 class P4Rebase(Command):