Merge branch 'ls/p4-path-encoding'
authorJunio C Hamano <gitster@pobox.com>
Thu, 16 Feb 2017 22:45:12 +0000 (14:45 -0800)
committerJunio C Hamano <gitster@pobox.com>
Thu, 16 Feb 2017 22:45:12 +0000 (14:45 -0800)
When "git p4" imports changelist that removes paths, it failed to
convert pathnames when the p4 used encoding different from the one
used on the Git side. This has been corrected.

* ls/p4-path-encoding:
git-p4: fix git-p4.pathEncoding for removed files

1  2 
git-p4.py
diff --combined git-p4.py
index 9695d2ed3ec2071ed7a279eac785b7e280b718e4,5c6d8c20dac3fafc96d9f82ca1c486c4bfe4cd3e..eab319d76e4597ebbb27019695fd9ce14d145285
+++ b/git-p4.py
@@@ -25,7 -25,6 +25,7 @@@ import sta
  import zipfile
  import zlib
  import ctypes
 +import errno
  
  try:
      from subprocess import CalledProcessError
@@@ -79,13 -78,6 +79,13 @@@ def p4_build_cmd(cmd)
      if len(client) > 0:
          real_cmd += ["-c", client]
  
 +    retries = gitConfigInt("git-p4.retries")
 +    if retries is None:
 +        # Perform 3 retries by default
 +        retries = 3
 +    if retries > 0:
 +        # Provide a way to not pass this option by setting git-p4.retries to 0
 +        real_cmd += ["-r", str(retries)]
  
      if isinstance(cmd,basestring):
          real_cmd = ' '.join(real_cmd) + ' ' + cmd
          real_cmd += cmd
      return real_cmd
  
 +def git_dir(path):
 +    """ Return TRUE if the given path is a git directory (/path/to/dir/.git).
 +        This won't automatically add ".git" to a directory.
 +    """
 +    d = read_pipe(["git", "--git-dir", path, "rev-parse", "--git-dir"], True).strip()
 +    if not d or len(d) == 0:
 +        return None
 +    else:
 +        return d
 +
  def chdir(path, is_client_path=False):
      """Do chdir to the given path, and set the PWD environment
         variable for use by P4.  It does not look at getcwd() output.
@@@ -271,8 -253,8 +271,8 @@@ def p4_add(f)
  def p4_delete(f):
      p4_system(["delete", wildcard_encode(f)])
  
 -def p4_edit(f):
 -    p4_system(["edit"wildcard_encode(f)])
 +def p4_edit(f, *options):
 +    p4_system(["edit"] + list(options) + [wildcard_encode(f)])
  
  def p4_revert(f):
      p4_system(["revert", wildcard_encode(f)])
  def p4_reopen(type, f):
      p4_system(["reopen", "-t", type, wildcard_encode(f)])
  
 +def p4_reopen_in_change(changelist, files):
 +    cmd = ["reopen", "-c", str(changelist)] + files
 +    p4_system(cmd)
 +
  def p4_move(src, dest):
      p4_system(["move", "-k", wildcard_encode(src), wildcard_encode(dest)])
  
@@@ -585,7 -563,10 +585,7 @@@ def currentGitBranch()
          return read_pipe(["git", "name-rev", "HEAD"]).split(" ")[1].strip()
  
  def isValidGitDir(path):
 -    if (os.path.exists(path + "/HEAD")
 -        and os.path.exists(path + "/refs") and os.path.exists(path + "/objects")):
 -        return True;
 -    return False
 +    return git_dir(path) != None
  
  def parseRevision(ref):
      return read_pipe("git rev-parse %s" % ref).strip()
@@@ -674,7 -655,7 +674,7 @@@ def gitConfigInt(key)
  def gitConfigList(key):
      if not _gitConfig.has_key(key):
          s = read_pipe(["git", "config", "--get-all", key], ignore_error=True)
 -        _gitConfig[key] = s.strip().split(os.linesep)
 +        _gitConfig[key] = s.strip().splitlines()
          if _gitConfig[key] == ['']:
              _gitConfig[key] = []
      return _gitConfig[key]
@@@ -841,7 -822,7 +841,7 @@@ def p4ChangesForPaths(depotPaths, chang
                  die("cannot use --changes-block-size with non-numeric revisions")
              block_size = None
  
 -    changes = []
 +    changes = set()
  
      # Retrieve changes a block at a time, to prevent running
      # into a MaxResults/MaxScanRows error from the server.
  
          # Insert changes in chronological order
          for line in reversed(p4_read_pipe_lines(cmd)):
 -            changes.append(int(line.split(" ")[1]))
 +            changes.add(int(line.split(" ")[1]))
  
          if not block_size:
              break
@@@ -1024,20 -1005,18 +1024,20 @@@ class LargeFileSystem(object)
             steps."""
          if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath):
              contentTempFile = self.generateTempFile(contents)
 -            (git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile)
 -
 -            # Move temp file to final location in large file system
 -            largeFileDir = os.path.dirname(localLargeFile)
 -            if not os.path.isdir(largeFileDir):
 -                os.makedirs(largeFileDir)
 -            shutil.move(contentTempFile, localLargeFile)
 -            self.addLargeFile(relPath)
 -            if gitConfigBool('git-p4.largeFilePush'):
 -                self.pushFile(localLargeFile)
 -            if verbose:
 -                sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile))
 +            (pointer_git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile)
 +            if pointer_git_mode:
 +                git_mode = pointer_git_mode
 +            if localLargeFile:
 +                # Move temp file to final location in large file system
 +                largeFileDir = os.path.dirname(localLargeFile)
 +                if not os.path.isdir(largeFileDir):
 +                    os.makedirs(largeFileDir)
 +                shutil.move(contentTempFile, localLargeFile)
 +                self.addLargeFile(relPath)
 +                if gitConfigBool('git-p4.largeFilePush'):
 +                    self.pushFile(localLargeFile)
 +                if verbose:
 +                    sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile))
          return (git_mode, contents)
  
  class MockLFS(LargeFileSystem):
@@@ -1077,9 -1056,6 +1077,9 @@@ class GitLFS(LargeFileSystem)
             the actual content. Return also the new location of the actual
             content.
             """
 +        if os.path.getsize(contentFile) == 0:
 +            return (None, '', None)
 +
          pointerProcess = subprocess.Popen(
              ['git', 'lfs', 'pointer', '--file=' + contentFile],
              stdout=subprocess.PIPE
          if pointerProcess.wait():
              os.remove(contentFile)
              die('git-lfs pointer command failed. Did you install the extension?')
 -        pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]]
 -        oid = pointerContents[1].split(' ')[1].split(':')[1][:-1]
 +
 +        # Git LFS removed the preamble in the output of the 'pointer' command
 +        # starting from version 1.2.0. Check for the preamble here to support
 +        # earlier versions.
 +        # c.f. https://github.com/github/git-lfs/commit/da2935d9a739592bc775c98d8ef4df9c72ea3b43
 +        if pointerFile.startswith('Git LFS pointer for'):
 +            pointerFile = re.sub(r'Git LFS pointer for.*\n\n', '', pointerFile)
 +
 +        oid = re.search(r'^oid \w+:(\w+)', pointerFile, re.MULTILINE).group(1)
          localLargeFile = os.path.join(
              os.getcwd(),
              '.git', 'lfs', 'objects', oid[:2], oid[2:4],
          )
          # LFS Spec states that pointer files should not have the executable bit set.
          gitMode = '100644'
 -        return (gitMode, pointerContents, localLargeFile)
 +        return (gitMode, pointerFile, localLargeFile)
  
      def pushFile(self, localLargeFile):
          uploadProcess = subprocess.Popen(
                  '# Git LFS (see https://git-lfs.github.com/)\n',
                  '#\n',
              ] +
 -            ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n'
 +            ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs diff=lfs merge=lfs -text\n'
                  for f in sorted(gitConfigList('git-p4.largeFileExtensions'))
              ] +
 -            ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n'
 +            ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs diff=lfs merge=lfs -text\n'
                  for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f)
              ]
          )
@@@ -1191,15 -1160,6 +1191,15 @@@ class P4UserMap
              self.users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
              self.emails[output["Email"]] = output["User"]
  
 +        mapUserConfigRegex = re.compile(r"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$", re.VERBOSE)
 +        for mapUserConfig in gitConfigList("git-p4.mapUser"):
 +            mapUser = mapUserConfigRegex.findall(mapUserConfig)
 +            if mapUser and len(mapUser[0]) == 3:
 +                user = mapUser[0][0]
 +                fullname = mapUser[0][1]
 +                email = mapUser[0][2]
 +                self.users[user] = fullname + " <" + email + ">"
 +                self.emails[email] = user
  
          s = ''
          for (key, val) in self.users.items():
@@@ -1313,12 -1273,6 +1313,12 @@@ class P4Submit(Command, P4UserMap)
                  optparse.make_option("--conflict", dest="conflict_behavior",
                                       choices=self.conflict_behavior_choices),
                  optparse.make_option("--branch", dest="branch"),
 +                optparse.make_option("--shelve", dest="shelve", action="store_true",
 +                                     help="Shelve instead of submit. Shelved files are reverted, "
 +                                     "restoring the workspace to the state before the shelve"),
 +                optparse.make_option("--update-shelve", dest="update_shelve", action="store", type="int",
 +                                     metavar="CHANGELIST",
 +                                     help="update an existing shelved changelist, implies --shelve")
          ]
          self.description = "Submit changes from git to the perforce depot."
          self.usage += " [name of git branch to submit into perforce depot]"
          self.detectRenames = False
          self.preserveUser = gitConfigBool("git-p4.preserveUser")
          self.dry_run = False
 +        self.shelve = False
 +        self.update_shelve = None
          self.prepare_p4_only = False
          self.conflict_behavior = None
          self.isWindows = (platform.system() == "Windows")
                      return 1
          return 0
  
 -    def prepareSubmitTemplate(self):
 +    def prepareSubmitTemplate(self, changelist=None):
          """Run "p4 change -o" to grab a change specification template.
             This does not use "p4 -G", as it is nice to keep the submission
             template in original order, since a human might edit it.
  
          template = ""
          inFilesSection = False
 -        for line in p4_read_pipe_lines(['change', '-o']):
 +        args = ['change', '-o']
 +        if changelist:
 +            args.append(str(changelist))
 +
 +        for line in p4_read_pipe_lines(args):
              if line.endswith("\r\n"):
                  line = line[:-2] + "\n"
              if inFilesSection:
              if response == 'n':
                  return False
  
 -    def get_diff_description(self, editedFiles, filesToAdd):
 +    def get_diff_description(self, editedFiles, filesToAdd, symlinks):
          # diff
          if os.environ.has_key("P4DIFF"):
              del(os.environ["P4DIFF"])
              newdiff += "==== new file ====\n"
              newdiff += "--- /dev/null\n"
              newdiff += "+++ %s\n" % newFile
 -            f = open(newFile, "r")
 -            for line in f.readlines():
 -                newdiff += "+" + line
 -            f.close()
 +
 +            is_link = os.path.islink(newFile)
 +            expect_link = newFile in symlinks
 +
 +            if is_link and expect_link:
 +                newdiff += "+%s\n" % os.readlink(newFile)
 +            else:
 +                f = open(newFile, "r")
 +                for line in f.readlines():
 +                    newdiff += "+" + line
 +                f.close()
  
          return (diff + newdiff).replace('\r\n', '\n')
  
  
          diff = read_pipe_lines("git diff-tree -r %s \"%s^\" \"%s\"" % (self.diffOpts, id, id))
          filesToAdd = set()
 +        filesToChangeType = set()
          filesToDelete = set()
          editedFiles = set()
          pureRenameCopy = set()
 +        symlinks = set()
          filesToChangeExecBit = {}
 +        all_files = list()
  
          for line in diff:
              diff = parseDiffTreeEntry(line)
              modifier = diff['status']
              path = diff['src']
 +            all_files.append(path)
 +
              if modifier == "M":
                  p4_edit(path)
                  if isModeExecChanged(diff['src_mode'], diff['dst_mode']):
                  filesToChangeExecBit[path] = diff['dst_mode']
                  if path in filesToDelete:
                      filesToDelete.remove(path)
 +
 +                dst_mode = int(diff['dst_mode'], 8)
 +                if dst_mode == 0120000:
 +                    symlinks.add(path)
 +
              elif modifier == "D":
                  filesToDelete.add(path)
                  if path in filesToAdd:
                      os.unlink(dest)
                      filesToDelete.add(src)
                  editedFiles.add(dest)
 +            elif modifier == "T":
 +                filesToChangeType.add(path)
              else:
                  die("unknown modifier %s for %s" % (modifier, path))
  
          #
          system(applyPatchCmd)
  
 +        for f in filesToChangeType:
 +            p4_edit(f, "-t", "auto")
          for f in filesToAdd:
              p4_add(f)
          for f in filesToDelete:
              mode = filesToChangeExecBit[f]
              setP4ExecBit(f, mode)
  
 +        if self.update_shelve:
 +            print("all_files = %s" % str(all_files))
 +            p4_reopen_in_change(self.update_shelve, all_files)
 +
          #
          # Build p4 change description, starting with the contents
          # of the git commit message.
          logMessage = logMessage.strip()
          (logMessage, jobs) = self.separate_jobs_from_description(logMessage)
  
 -        template = self.prepareSubmitTemplate()
 +        template = self.prepareSubmitTemplate(self.update_shelve)
          submitTemplate = self.prepareLogMessage(template, logMessage, jobs)
  
          if self.preserveUser:
          separatorLine = "######## everything below this line is just the diff #######\n"
          if not self.prepare_p4_only:
              submitTemplate += separatorLine
 -            submitTemplate += self.get_diff_description(editedFiles, filesToAdd)
 +            submitTemplate += self.get_diff_description(editedFiles, filesToAdd, symlinks)
  
          (handle, fileName) = tempfile.mkstemp()
          tmpFile = os.fdopen(handle, "w+b")
                  if self.isWindows:
                      message = message.replace("\r\n", "\n")
                  submitTemplate = message[:message.index(separatorLine)]
 -                p4_write_pipe(['submit', '-i'], submitTemplate)
 +
 +                if self.update_shelve:
 +                    p4_write_pipe(['shelve', '-r', '-i'], submitTemplate)
 +                elif self.shelve:
 +                    p4_write_pipe(['shelve', '-i'], submitTemplate)
 +                else:
 +                    p4_write_pipe(['submit', '-i'], submitTemplate)
 +                    # The rename/copy happened by applying a patch that created a
 +                    # new file.  This leaves it writable, which confuses p4.
 +                    for f in pureRenameCopy:
 +                        p4_sync(f, "-f")
  
                  if self.preserveUser:
                      if p4User:
                          changelist = self.lastP4Changelist()
                          self.modifyChangelistUser(changelist, p4User)
  
 -                # The rename/copy happened by applying a patch that created a
 -                # new file.  This leaves it writable, which confuses p4.
 -                for f in pureRenameCopy:
 -                    p4_sync(f, "-f")
                  submitted = True
  
          finally:
              # skip this patch
 -            if not submitted:
 -                print "Submission cancelled, undoing p4 changes."
 -                for f in editedFiles:
 +            if not submitted or self.shelve:
 +                if self.shelve:
 +                    print ("Reverting shelved files.")
 +                else:
 +                    print ("Submission cancelled, undoing p4 changes.")
 +                for f in editedFiles | filesToDelete:
                      p4_revert(f)
                  for f in filesToAdd:
                      p4_revert(f)
                      os.remove(f)
 -                for f in filesToDelete:
 -                    p4_revert(f)
  
          os.remove(fileName)
          return submitted
          if len(self.origin) == 0:
              self.origin = upstream
  
 +        if self.update_shelve:
 +            self.shelve = True
 +
          if self.preserveUser:
              if not self.canChangeChangelists():
                  die("Cannot preserve user names without p4 super-user or admin permissions")
          if self.useClientSpec:
              self.clientSpecDirs = getClientSpec()
  
 -        # Check for the existance of P4 branches
 +        # Check for the existence of P4 branches
          branchesDetected = (len(p4BranchesInGit().keys()) > 1)
  
          if self.useClientSpec and not branchesDetected:
                          break
  
          chdir(self.oldWorkingDirectory)
 -
 +        shelved_applied = "shelved" if self.shelve else "applied"
          if self.dry_run:
              pass
          elif self.prepare_p4_only:
              pass
          elif len(commits) == len(applied):
 -            print "All commits applied!"
 +            print ("All commits {0}!".format(shelved_applied))
  
              sync = P4Sync()
              if self.branch:
  
          else:
              if len(applied) == 0:
 -                print "No commits applied."
 +                print ("No commits {0}.".format(shelved_applied))
              else:
 -                print "Applied only the commits marked with '*':"
 +                print ("{0} only the commits marked with '*':".format(shelved_applied.capitalize()))
                  for c in commits:
                      if c in applied:
                          star = "*"
@@@ -2340,7 -2253,7 +2340,7 @@@ class P4Sync(Command, P4UserMap)
          self.useClientSpec_from_options = False
          self.clientSpecDirs = None
          self.tempBranches = []
 -        self.tempBranchLocation = "git-p4-tmp"
 +        self.tempBranchLocation = "refs/git-p4-tmp"
          self.largeFileSystem = None
  
          if gitConfig('git-p4.largeFileSystem'):
              fnum = fnum + 1
          return files
  
 +    def extractJobsFromCommit(self, commit):
 +        jobs = []
 +        jnum = 0
 +        while commit.has_key("job%s" % jnum):
 +            job = commit["job%s" % jnum]
 +            jobs.append(job)
 +            jnum = jnum + 1
 +        return jobs
 +
      def stripRepoPath(self, path, prefixes):
          """When streaming files, this is called to map a p4 depot path
             to where it should go in git.  The prefixes are either
              self.gitStream.write(d)
          self.gitStream.write('\n')
  
+     def encodeWithUTF8(self, path):
+         try:
+             path.decode('ascii')
+         except:
+             encoding = 'utf8'
+             if gitConfig('git-p4.pathEncoding'):
+                 encoding = gitConfig('git-p4.pathEncoding')
+             path = path.decode(encoding, 'replace').encode('utf8', 'replace')
+             if self.verbose:
+                 print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, path)
+         return path
      # output one file from the P4 stream
      # - helper for streamP4Files
  
      def streamOneP4File(self, file, contents):
          relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
+         relPath = self.encodeWithUTF8(relPath)
          if verbose:
              size = int(self.stream_file['fileSize'])
              sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024))
              text = regexp.sub(r'$\1$', text)
              contents = [ text ]
  
-         try:
-             relPath.decode('ascii')
-         except:
-             encoding = 'utf8'
-             if gitConfig('git-p4.pathEncoding'):
-                 encoding = gitConfig('git-p4.pathEncoding')
-             relPath = relPath.decode(encoding, 'replace').encode('utf8', 'replace')
-             if self.verbose:
-                 print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath)
          if self.largeFileSystem:
              (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
  
  
      def streamOneP4Deletion(self, file):
          relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
+         relPath = self.encodeWithUTF8(relPath)
          if verbose:
              sys.stdout.write("delete %s\n" % relPath)
              sys.stdout.flush()
              return True
          hasPrefix = [p for p in self.branchPrefixes
                          if p4PathStartsWith(path, p)]
 -        if hasPrefix and self.verbose:
 +        if not hasPrefix and self.verbose:
              print('Ignoring file outside of prefix: {0}'.format(path))
          return hasPrefix
  
      def commit(self, details, files, branch, parent = ""):
          epoch = details["time"]
          author = details["user"]
 +        jobs = self.extractJobsFromCommit(details)
  
          if self.verbose:
              print('commit into {0}'.format(branch))
  
          self.gitStream.write("data <<EOT\n")
          self.gitStream.write(details["desc"])
 +        if len(jobs) > 0:
 +            self.gitStream.write("\nJobs: %s" % (' '.join(jobs)))
          self.gitStream.write("\n[git-p4: depot-paths = \"%s\": change = %s" %
                               (','.join(self.branchPrefixes), details["change"]))
          if len(details['options']) > 0:
@@@ -3748,7 -3653,6 +3752,7 @@@ def main()
          if cmd.gitdir == None:
              cmd.gitdir = os.path.abspath(".git")
              if not isValidGitDir(cmd.gitdir):
 +                # "rev-parse --git-dir" without arguments will try $PWD/.git
                  cmd.gitdir = read_pipe("git rev-parse --git-dir").strip()
                  if os.path.exists(cmd.gitdir):
                      cdup = read_pipe("git rev-parse --show-cdup").strip()
              else:
                  die("fatal: cannot locate git repository at %s" % cmd.gitdir)
  
 +        # so git commands invoked from the P4 workspace will succeed
          os.environ["GIT_DIR"] = cmd.gitdir
  
      if not cmd.run(args):