From: Junio C Hamano Date: Thu, 15 Oct 2015 22:43:52 +0000 (-0700) Subject: Merge branch 'ls/p4-lfs' X-Git-Tag: v2.7.0-rc0~96 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/6ff518f593729a39f1314c9c05e3758f5ec19162?hp=-c Merge branch 'ls/p4-lfs' Teach "git p4" to send large blobs outside the repository by talking to Git LFS. * ls/p4-lfs: git-p4: add Git LFS backend for large file system git-p4: add support for large file systems git-p4: check free space during streaming git-p4: add file streaming progress in verbose mode git-p4: return an empty list if a list config has no values git-p4: add gitConfigInt reader git-p4: add optional type specifier to gitConfig reader --- 6ff518f593729a39f1314c9c05e3758f5ec19162 diff --combined Documentation/git-p4.txt index 12a57d49f4,f34d16d71c..c3ff7d0d9b --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@@ -510,13 -510,38 +510,45 @@@ git-p4.useClientSpec: option '--use-client-spec'. See the "CLIENT SPEC" section above. This variable is a boolean, not the name of a p4 client. +git-p4.pathEncoding:: + Perforce keeps the encoding of a path as given by the originating OS. + Git expects paths encoded as UTF-8. Use this config to tell git-p4 + what encoding Perforce had used for the paths. This encoding is used + to transcode the paths to UTF-8. As an example, Perforce on Windows + often uses “cp1252” to encode path names. + + git-p4.largeFileSystem:: + Specify the system that is used for large (binary) files. Please note + that large file systems do not support the 'git p4 submit' command. + Only Git LFS [1] is implemented right now. Download + and install the Git LFS command line extension to use this option + and configure it like this: + + + ------------- + git config git-p4.largeFileSystem GitLFS + ------------- + + + [1] https://git-lfs.github.com/ + + git-p4.largeFileExtensions:: + All files matching a file extension in the list will be processed + by the large file system. Do not prefix the extensions with '.'. + + git-p4.largeFileThreshold:: + All files with an uncompressed size exceeding the threshold will be + processed by the large file system. By default the threshold is + defined in bytes. Add the suffix k, m, or g to change the unit. + + git-p4.largeFileCompressedThreshold:: + All files with a compressed size exceeding the threshold will be + processed by the large file system. This option might slow down + your clone/sync process. By default the threshold is defined in + bytes. Add the suffix k, m, or g to change the unit. + + git-p4.largeFilePush:: + Boolean variable which defines if large files are automatically + pushed to a server. + Submit variables ~~~~~~~~~~~~~~~~ git-p4.detectRenames:: diff --combined git-p4.py index 4915e83d76,765ea5f064..daa60c60d6 --- a/git-p4.py +++ b/git-p4.py @@@ -22,6 -22,8 +22,8 @@@ import platfor import re import shutil import stat + import zipfile + import zlib try: from subprocess import CalledProcessError @@@ -104,6 -106,16 +106,16 @@@ def chdir(path, is_client_path=False) path = os.getcwd() os.environ['PWD'] = path + def calcDiskFree(): + """Return free space in bytes on the disk of the given dirname.""" + if platform.system() == 'Windows': + free_bytes = ctypes.c_ulonglong(0) + ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(os.getcwd()), None, None, ctypes.pointer(free_bytes)) + return free_bytes.value + else: + st = os.statvfs(os.getcwd()) + return st.f_bavail * st.f_frsize + def die(msg): if verbose: raise Exception(msg) @@@ -134,11 -146,13 +146,11 @@@ def read_pipe(c, ignore_error=False) sys.stderr.write('Reading pipe: %s\n' % str(c)) expand = isinstance(c,basestring) - p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand) - pipe = p.stdout - val = pipe.read() - if p.wait() and not ignore_error: - die('Command failed: %s' % str(c)) - - return val + p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand) + (out, err) = p.communicate() + if p.returncode != 0 and not ignore_error: + die('Command failed: %s\nError: %s' % (str(c), err)) + return out def p4_read_pipe(c, ignore_error=False): real_cmd = p4_build_cmd(c) @@@ -602,9 -616,12 +614,12 @@@ def gitBranchExists(branch) _gitConfig = {} - def gitConfig(key): + def gitConfig(key, typeSpecifier=None): if not _gitConfig.has_key(key): - cmd = [ "git", "config", key ] + cmd = [ "git", "config" ] + if typeSpecifier: + cmd += [ typeSpecifier ] + cmd += [ key ] s = read_pipe(cmd, ignore_error=True) _gitConfig[key] = s.strip() return _gitConfig[key] @@@ -615,16 -632,26 +630,26 @@@ def gitConfigBool(key) in the config.""" if not _gitConfig.has_key(key): - cmd = [ "git", "config", "--bool", key ] + _gitConfig[key] = gitConfig(key, '--bool') == "true" + return _gitConfig[key] + + def gitConfigInt(key): + if not _gitConfig.has_key(key): + cmd = [ "git", "config", "--int", key ] s = read_pipe(cmd, ignore_error=True) v = s.strip() - _gitConfig[key] = v == "true" + try: + _gitConfig[key] = int(gitConfig(key, '--int')) + except ValueError: + _gitConfig[key] = None return _gitConfig[key] def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) _gitConfig[key] = s.strip().split(os.linesep) + if _gitConfig[key] == ['']: + _gitConfig[key] = [] return _gitConfig[key] def p4BranchesInGit(branchesAreInRemotes=True): @@@ -907,6 -934,182 +932,182 @@@ def wildcard_present(path) m = re.search("[*#@%]", path) return m is not None + class LargeFileSystem(object): + """Base class for large file system support.""" + + def __init__(self, writeToGitStream): + self.largeFiles = set() + self.writeToGitStream = writeToGitStream + + def generatePointer(self, cloneDestination, contentFile): + """Return the content of a pointer file that is stored in Git instead of + the actual content.""" + assert False, "Method 'generatePointer' required in " + self.__class__.__name__ + + def pushFile(self, localLargeFile): + """Push the actual content which is not stored in the Git repository to + a server.""" + assert False, "Method 'pushFile' required in " + self.__class__.__name__ + + def hasLargeFileExtension(self, relPath): + return reduce( + lambda a, b: a or b, + [relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')], + False + ) + + def generateTempFile(self, contents): + contentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + for d in contents: + contentFile.write(d) + contentFile.close() + return contentFile.name + + def exceedsLargeFileThreshold(self, relPath, contents): + if gitConfigInt('git-p4.largeFileThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize > gitConfigInt('git-p4.largeFileThreshold'): + return True + if gitConfigInt('git-p4.largeFileCompressedThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize <= gitConfigInt('git-p4.largeFileCompressedThreshold'): + return False + contentTempFile = self.generateTempFile(contents) + compressedContentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + zf = zipfile.ZipFile(compressedContentFile.name, mode='w') + zf.write(contentTempFile, compress_type=zipfile.ZIP_DEFLATED) + zf.close() + compressedContentsSize = zf.infolist()[0].compress_size + os.remove(contentTempFile) + os.remove(compressedContentFile.name) + if compressedContentsSize > gitConfigInt('git-p4.largeFileCompressedThreshold'): + return True + return False + + def addLargeFile(self, relPath): + self.largeFiles.add(relPath) + + def removeLargeFile(self, relPath): + self.largeFiles.remove(relPath) + + def isLargeFile(self, relPath): + return relPath in self.largeFiles + + def processContent(self, git_mode, relPath, contents): + """Processes the content of git fast import. This method decides if a + file is stored in the large file system and handles all necessary + steps.""" + if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath): + contentTempFile = self.generateTempFile(contents) + (git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile) + + # Move temp file to final location in large file system + largeFileDir = os.path.dirname(localLargeFile) + if not os.path.isdir(largeFileDir): + os.makedirs(largeFileDir) + shutil.move(contentTempFile, localLargeFile) + self.addLargeFile(relPath) + if gitConfigBool('git-p4.largeFilePush'): + self.pushFile(localLargeFile) + if verbose: + sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile)) + return (git_mode, contents) + + class MockLFS(LargeFileSystem): + """Mock large file system for testing.""" + + def generatePointer(self, contentFile): + """The pointer content is the original content prefixed with "pointer-". + The local filename of the large file storage is derived from the file content. + """ + with open(contentFile, 'r') as f: + content = next(f) + gitMode = '100644' + pointerContents = 'pointer-' + content + localLargeFile = os.path.join(os.getcwd(), '.git', 'mock-storage', 'local', content[:-1]) + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + """The remote filename of the large file storage is the same as the local + one but in a different directory. + """ + remotePath = os.path.join(os.path.dirname(localLargeFile), '..', 'remote') + if not os.path.exists(remotePath): + os.makedirs(remotePath) + shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) + + class GitLFS(LargeFileSystem): + """Git LFS as backend for the git-p4 large file system. + See https://git-lfs.github.com/ for details.""" + + def __init__(self, *args): + LargeFileSystem.__init__(self, *args) + self.baseGitAttributes = [] + + def generatePointer(self, contentFile): + """Generate a Git LFS pointer for the content. Return LFS Pointer file + mode and content which is stored in the Git repository instead of + the actual content. Return also the new location of the actual + content. + """ + pointerProcess = subprocess.Popen( + ['git', 'lfs', 'pointer', '--file=' + contentFile], + stdout=subprocess.PIPE + ) + pointerFile = pointerProcess.stdout.read() + if pointerProcess.wait(): + os.remove(contentFile) + die('git-lfs pointer command failed. Did you install the extension?') + pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]] + oid = pointerContents[1].split(' ')[1].split(':')[1][:-1] + localLargeFile = os.path.join( + os.getcwd(), + '.git', 'lfs', 'objects', oid[:2], oid[2:4], + oid, + ) + # LFS Spec states that pointer files should not have the executable bit set. + gitMode = '100644' + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + uploadProcess = subprocess.Popen( + ['git', 'lfs', 'push', '--object-id', 'origin', os.path.basename(localLargeFile)] + ) + if uploadProcess.wait(): + die('git-lfs push command failed. Did you define a remote?') + + def generateGitAttributes(self): + return ( + self.baseGitAttributes + + [ + '\n', + '#\n', + '# Git LFS (see https://git-lfs.github.com/)\n', + '#\n', + ] + + ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(gitConfigList('git-p4.largeFileExtensions')) + ] + + ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f) + ] + ) + + def addLargeFile(self, relPath): + LargeFileSystem.addLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def removeLargeFile(self, relPath): + LargeFileSystem.removeLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def processContent(self, git_mode, relPath, contents): + if relPath == '.gitattributes': + self.baseGitAttributes = contents + return (git_mode, self.generateGitAttributes()) + else: + return LargeFileSystem.processContent(self, git_mode, relPath, contents) + class Command: def __init__(self): self.usage = "usage: %prog [options]" @@@ -1080,6 -1283,9 +1281,9 @@@ class P4Submit(Command, P4UserMap) self.p4HasMoveCommand = p4_has_move_command() self.branch = None + if gitConfig('git-p4.largeFileSystem'): + die("Large file system not supported for git-p4 submit command. Please remove it from config.") + def check(self): if len(p4CmdList("opened ...")) > 0: die("You have files opened with perforce! Close them before starting the sync.") @@@ -2030,6 -2236,13 +2234,13 @@@ class P4Sync(Command, P4UserMap) self.clientSpecDirs = None self.tempBranches = [] self.tempBranchLocation = "git-p4-tmp" + self.largeFileSystem = None + + if gitConfig('git-p4.largeFileSystem'): + largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')] + self.largeFileSystem = largeFileSystemConstructor( + lambda git_mode, relPath, contents: self.writeToGitStream(git_mode, relPath, contents) + ) if gitConfig("git-p4.syncFromOrigin") == "false": self.syncWithOrigin = False @@@ -2150,13 -2363,22 +2361,22 @@@ return branches + def writeToGitStream(self, gitMode, relPath, contents): + self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) + for d in contents: + self.gitStream.write(d) + self.gitStream.write('\n') + # output one file from the P4 stream # - helper for streamP4Files def streamOneP4File(self, file, contents): relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) if verbose: - sys.stderr.write("%s\n" % relPath) + size = int(self.stream_file['fileSize']) + sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024)) + sys.stdout.flush() (type_base, type_mods) = split_p4_type(file["type"]) @@@ -2191,17 -2413,10 +2411,17 @@@ # them back too. This is not needed to the cygwin windows version, # just the native "NT" type. # - text = p4_read_pipe(['print', '-q', '-o', '-', "%s@%s" % (file['depotFile'], file['change']) ]) - if p4_version_string().find("/NT") >= 0: - text = text.replace("\r\n", "\n") - contents = [ text ] + try: + text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])]) + except Exception as e: + if 'Translation of file content failed' in str(e): + type_base = 'binary' + else: + raise e + else: + if p4_version_string().find('/NT') >= 0: + text = text.replace('\r\n', '\n') + contents = [ text ] if type_base == "apple": # Apple filetype files will be streamed as a concatenation of @@@ -2225,34 -2440,21 +2445,31 @@@ text = regexp.sub(r'$\1$', text) contents = [ text ] + try: + relPath.decode('ascii') + except: + encoding = 'utf8' + if gitConfig('git-p4.pathEncoding'): + encoding = gitConfig('git-p4.pathEncoding') + relPath = relPath.decode(encoding, 'replace').encode('utf8', 'replace') + if self.verbose: + print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath) + - self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) + if self.largeFileSystem: + (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) - # total length... - length = 0 - for d in contents: - length = length + len(d) - - self.gitStream.write("data %d\n" % length) - for d in contents: - self.gitStream.write(d) - self.gitStream.write("\n") + self.writeToGitStream(git_mode, relPath, contents) def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) if verbose: - sys.stderr.write("delete %s\n" % relPath) + sys.stdout.write("delete %s\n" % relPath) + sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) + if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): + self.largeFileSystem.removeLargeFile(relPath) + # handle another chunk of streaming data def streamP4FilesCb(self, marshalled): @@@ -2262,6 -2464,14 +2479,14 @@@ if marshalled["code"] == "error": if "data" in marshalled: err = marshalled["data"].rstrip() + + if not err and 'fileSize' in self.stream_file: + required_bytes = int((4 * int(self.stream_file["fileSize"])) - calcDiskFree()) + if required_bytes > 0: + err = 'Not enough space left on %s! Free at least %i MB.' % ( + os.getcwd(), required_bytes/1024/1024 + ) + if err: f = None if self.stream_have_file_info: @@@ -2290,10 -2500,23 +2515,23 @@@ # 'data' field we need to append to our array for k in marshalled.keys(): if k == 'data': + if 'streamContentSize' not in self.stream_file: + self.stream_file['streamContentSize'] = 0 + self.stream_file['streamContentSize'] += len(marshalled['data']) self.stream_contents.append(marshalled['data']) else: self.stream_file[k] = marshalled[k] + if (verbose and + 'streamContentSize' in self.stream_file and + 'fileSize' in self.stream_file and + 'depotFile' in self.stream_file): + size = int(self.stream_file["fileSize"]) + if size > 0: + progress = 100*self.stream_file['streamContentSize']/size + sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024))) + sys.stdout.flush() + self.stream_have_file_info = True # Stream directly from "p4 files" into "git fast-import" @@@ -2344,11 -2567,8 +2582,11 @@@ else: return "%s " % userid - # Stream a p4 tag def streamTag(self, gitStream, labelName, labelDetails, commit, epoch): + """ Stream a p4 tag. + commit is either a git commit, or a fast-import mark, ":" + """ + if verbose: print "writing tag %s for commit %s" % (labelName, commit) gitStream.write("tag %s\n" % labelName) @@@ -2399,7 -2619,7 +2637,7 @@@ self.clientSpecDirs.update_client_spec_path_cache(files) self.gitStream.write("commit %s\n" % branch) -# gitStream.write("mark :%s\n" % details["change"]) + self.gitStream.write("mark :%s\n" % details["change"]) self.committedChanges.add(int(details["change"])) committer = "" if author not in self.users: @@@ -2518,19 -2738,13 +2756,19 @@@ if change.has_key('change'): # find the corresponding git commit; take the oldest commit changelist = int(change['change']) - gitCommit = read_pipe(["git", "rev-list", "--max-count=1", - "--reverse", ":/\[git-p4:.*change = %d\]" % changelist]) - if len(gitCommit) == 0: - print "could not find git commit for changelist %d" % changelist - else: - gitCommit = gitCommit.strip() + if changelist in self.committedChanges: + gitCommit = ":%d" % changelist # use a fast-import mark commitFound = True + else: + gitCommit = read_pipe(["git", "rev-list", "--max-count=1", + "--reverse", ":/\[git-p4:.*change = %d\]" % changelist], ignore_error=True) + if len(gitCommit) == 0: + print "importing label %s: could not find git commit for changelist %d" % (name, changelist) + else: + commitFound = True + gitCommit = gitCommit.strip() + + if commitFound: # Convert from p4 time format try: tmwhen = time.strptime(labelDetails['Update'], "%Y/%m/%d %H:%M:%S")