#!/usr/bin/env python # # Copyright (c) 2012 Felipe Contreras # # Inspired by Rocco Rutte's hg-fast-export # Just copy to your ~/bin, or anywhere in your $PATH. # Then you can clone with: # git clone hg::/path/to/mercurial/repo/ # # For remote repositories a local clone is stored in # "$GIT_DIR/hg/origin/clone/.hg/". from mercurial import hg, ui, bookmarks, context, encoding, node, error, extensions import re import sys import os import json import shutil import subprocess import urllib import atexit import urlparse, hashlib # # If you want to switch to hg-git compatibility mode: # git config --global remote-hg.hg-git-compat true # # If you are not in hg-git-compat mode and want to disable the tracking of # named branches: # git config --global remote-hg.track-branches false # # If you don't want to force pushes (and thus risk creating new remote heads): # git config --global remote-hg.force-push false # # If you want the equivalent of hg's clone/pull--insecure option: # git config remote-hg.insecure true # # git: # Sensible defaults for git. # hg bookmarks are exported as git branches, hg branches are prefixed # with 'branches/', HEAD is a special case. # # hg: # Emulate hg-git. # Only hg bookmarks are exported as git branches. # Commits are modified to preserve hg information and allow bidirectionality. # NAME_RE = re.compile('^([^<>]+)') AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$') EMAIL_RE = re.compile('^([^<>]+[^ \\\t<>])?\\b(?:[ \\t<>]*?)\\b([^ \\t<>]+@[^ \\t<>]+)') AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$') RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)') def die(msg, *args): sys.stderr.write('ERROR: %s\n' % (msg % args)) sys.exit(1) def warn(msg, *args): sys.stderr.write('WARNING: %s\n' % (msg % args)) def gitmode(flags): return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' def gittz(tz): return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60) def hgmode(mode): m = { '100755': 'x', '120000': 'l' } return m.get(mode, '') def hghex(node): return hg.node.hex(node) def hgref(ref): return ref.replace('___', ' ') def gitref(ref): return ref.replace(' ', '___') def get_config(config): cmd = ['git', 'config', '--get', config] process = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, _ = process.communicate() return output class Marks: def __init__(self, path): self.path = path self.tips = {} self.marks = {} self.rev_marks = {} self.last_mark = 0 self.load() def load(self): if not os.path.exists(self.path): return tmp = json.load(open(self.path)) self.tips = tmp['tips'] self.marks = tmp['marks'] self.last_mark = tmp['last-mark'] for rev, mark in self.marks.iteritems(): self.rev_marks[mark] = int(rev) def dict(self): return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark } def store(self): json.dump(self.dict(), open(self.path, 'w')) def __str__(self): return str(self.dict()) def from_rev(self, rev): return self.marks[str(rev)] def to_rev(self, mark): return self.rev_marks[mark] def next_mark(self): self.last_mark += 1 return self.last_mark def get_mark(self, rev): self.last_mark += 1 self.marks[str(rev)] = self.last_mark return self.last_mark def new_mark(self, rev, mark): self.marks[str(rev)] = mark self.rev_marks[mark] = rev self.last_mark = mark def is_marked(self, rev): return str(rev) in self.marks def get_tip(self, branch): return self.tips.get(branch, 0) def set_tip(self, branch, tip): self.tips[branch] = tip class Parser: def __init__(self, repo): self.repo = repo self.line = self.get_line() def get_line(self): return sys.stdin.readline().strip() def __getitem__(self, i): return self.line.split()[i] def check(self, word): return self.line.startswith(word) def each_block(self, separator): while self.line != separator: yield self.line self.line = self.get_line() def __iter__(self): return self.each_block('') def next(self): self.line = self.get_line() if self.line == 'done': self.line = None def get_mark(self): i = self.line.index(':') + 1 return int(self.line[i:]) def get_data(self): if not self.check('data'): return None i = self.line.index(' ') + 1 size = int(self.line[i:]) return sys.stdin.read(size) def get_author(self): global bad_mail ex = None m = RAW_AUTHOR_RE.match(self.line) if not m: return None _, name, email, date, tz = m.groups() if name and 'ext:' in name: m = re.match('^(.+?) ext:\((.+)\)$', name) if m: name = m.group(1) ex = urllib.unquote(m.group(2)) if email != bad_mail: if name: user = '%s <%s>' % (name, email) else: user = '<%s>' % (email) else: user = name if ex: user += ex tz = int(tz) tz = ((tz / 100) * 3600) + ((tz % 100) * 60) return (user, int(date), -tz) def fix_file_path(path): if not os.path.isabs(path): return path return os.path.relpath(path, '/') def export_files(files): global marks, filenodes final = [] for f in files: fid = node.hex(f.filenode()) if fid in filenodes: mark = filenodes[fid] else: mark = marks.next_mark() filenodes[fid] = mark d = f.data() print "blob" print "mark :%u" % mark print "data %d" % len(d) print d path = fix_file_path(f.path()) final.append((gitmode(f.flags()), mark, path)) return final def get_filechanges(repo, ctx, parent): modified = set() added = set() removed = set() # load earliest manifest first for caching reasons prev = repo[parent].manifest().copy() cur = ctx.manifest() for fn in cur: if fn in prev: if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]): modified.add(fn) del prev[fn] else: added.add(fn) removed |= set(prev.keys()) return added | modified, removed def fixup_user_git(user): name = mail = None user = user.replace('"', '') m = AUTHOR_RE.match(user) if m: name = m.group(1) mail = m.group(2).strip() else: m = EMAIL_RE.match(user) if m: name = m.group(1) mail = m.group(2) else: m = NAME_RE.match(user) if m: name = m.group(1).strip() return (name, mail) def fixup_user_hg(user): def sanitize(name): # stole this from hg-git return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> ')) m = AUTHOR_HG_RE.match(user) if m: name = sanitize(m.group(1)) mail = sanitize(m.group(2)) ex = m.group(3) if ex: name += ' ext:(' + urllib.quote(ex) + ')' else: name = sanitize(user) if '@' in user: mail = name else: mail = None return (name, mail) def fixup_user(user): global mode, bad_mail if mode == 'git': name, mail = fixup_user_git(user) else: name, mail = fixup_user_hg(user) if not name: name = bad_name if not mail: mail = bad_mail return '%s <%s>' % (name, mail) def get_repo(url, alias): global dirname, peer myui = ui.ui() myui.setconfig('ui', 'interactive', 'off') myui.fout = sys.stderr try: if get_config('remote-hg.insecure') == 'true\n': myui.setconfig('web', 'cacerts', '') except subprocess.CalledProcessError: pass try: mod = extensions.load(myui, 'hgext.schemes', None) mod.extsetup(myui) except ImportError: pass if hg.islocal(url): repo = hg.repository(myui, url) else: local_path = os.path.join(dirname, 'clone') if not os.path.exists(local_path): try: peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True) except: die('Repository error') repo = dstpeer.local() else: repo = hg.repository(myui, local_path) try: peer = hg.peer(myui, {}, url) except: die('Repository error') repo.pull(peer, heads=None, force=True) return repo def rev_to_mark(rev): global marks return marks.from_rev(rev) def mark_to_rev(mark): global marks return marks.to_rev(mark) def export_ref(repo, name, kind, head): global prefix, marks, mode ename = '%s/%s' % (kind, name) tip = marks.get_tip(ename) revs = xrange(tip, head.rev() + 1) count = 0 revs = [rev for rev in revs if not marks.is_marked(rev)] for rev in revs: c = repo[rev] (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node()) rev_branch = extra['branch'] author = "%s %d %s" % (fixup_user(user), time, gittz(tz)) if 'committer' in extra: user, time, tz = extra['committer'].rsplit(' ', 2) committer = "%s %s %s" % (user, time, gittz(int(tz))) else: committer = author parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0] if len(parents) == 0: modified = c.manifest().keys() removed = [] else: modified, removed = get_filechanges(repo, c, parents[0]) desc += '\n' if mode == 'hg': extra_msg = '' if rev_branch != 'default': extra_msg += 'branch : %s\n' % rev_branch renames = [] for f in c.files(): if f not in c.manifest(): continue rename = c.filectx(f).renamed() if rename: renames.append((rename[0], f)) for e in renames: extra_msg += "rename : %s => %s\n" % e for key, value in extra.iteritems(): if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'): continue else: extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value)) if extra_msg: desc += '\n--HG--\n' + extra_msg if len(parents) == 0 and rev: print 'reset %s/%s' % (prefix, ename) modified_final = export_files(c.filectx(f) for f in modified) print "commit %s/%s" % (prefix, ename) print "mark :%d" % (marks.get_mark(rev)) print "author %s" % (author) print "committer %s" % (committer) print "data %d" % (len(desc)) print desc if len(parents) > 0: print "from :%s" % (rev_to_mark(parents[0])) if len(parents) > 1: print "merge :%s" % (rev_to_mark(parents[1])) for f in modified_final: print "M %s :%u %s" % f for f in removed: print "D %s" % (fix_file_path(f)) print count += 1 if (count % 100 == 0): print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs)) # make sure the ref is updated print "reset %s/%s" % (prefix, ename) print "from :%u" % rev_to_mark(rev) print marks.set_tip(ename, rev) def export_tag(repo, tag): export_ref(repo, tag, 'tags', repo[hgref(tag)]) def export_bookmark(repo, bmark): head = bmarks[hgref(bmark)] export_ref(repo, bmark, 'bookmarks', head) def export_branch(repo, branch): tip = get_branch_tip(repo, branch) head = repo[tip] export_ref(repo, branch, 'branches', head) def export_head(repo): global g_head export_ref(repo, g_head[0], 'bookmarks', g_head[1]) def do_capabilities(parser): global prefix, dirname print "import" print "export" print "refspec refs/heads/branches/*:%s/branches/*" % prefix print "refspec refs/heads/*:%s/bookmarks/*" % prefix print "refspec refs/tags/*:%s/tags/*" % prefix path = os.path.join(dirname, 'marks-git') if os.path.exists(path): print "*import-marks %s" % path print "*export-marks %s" % path print def branch_tip(repo, branch): # older versions of mercurial don't have this if hasattr(repo, 'branchtip'): return repo.branchtip(branch) else: return repo.branchtags()[branch] def get_branch_tip(repo, branch): global branches heads = branches.get(hgref(branch), None) if not heads: return None # verify there's only one head if (len(heads) > 1): warn("Branch '%s' has more than one head, consider merging" % branch) return branch_tip(repo, hgref(branch)) return heads[0] def list_head(repo, cur): global g_head, bmarks head = bookmarks.readcurrent(repo) if head: node = repo[head] else: # fake bookmark from current branch head = cur node = repo['.'] if not node: node = repo['tip'] if not node: return if head == 'default': head = 'master' bmarks[head] = node head = gitref(head) print "@refs/heads/%s HEAD" % head g_head = (head, node) def do_list(parser): global branches, bmarks, track_branches repo = parser.repo for bmark, node in bookmarks.listbookmarks(repo).iteritems(): bmarks[bmark] = repo[node] cur = repo.dirstate.branch() list_head(repo, cur) if track_branches: for branch in repo.branchmap(): heads = repo.branchheads(branch) if len(heads): branches[branch] = heads for branch in branches: print "? refs/heads/branches/%s" % gitref(branch) for bmark in bmarks: print "? refs/heads/%s" % gitref(bmark) for tag, node in repo.tagslist(): if tag == 'tip': continue print "? refs/tags/%s" % gitref(tag) print def do_import(parser): repo = parser.repo path = os.path.join(dirname, 'marks-git') print "feature done" if os.path.exists(path): print "feature import-marks=%s" % path print "feature export-marks=%s" % path sys.stdout.flush() tmp = encoding.encoding encoding.encoding = 'utf-8' # lets get all the import lines while parser.check('import'): ref = parser[1] if (ref == 'HEAD'): export_head(repo) elif ref.startswith('refs/heads/branches/'): branch = ref[len('refs/heads/branches/'):] export_branch(repo, branch) elif ref.startswith('refs/heads/'): bmark = ref[len('refs/heads/'):] export_bookmark(repo, bmark) elif ref.startswith('refs/tags/'): tag = ref[len('refs/tags/'):] export_tag(repo, tag) parser.next() encoding.encoding = tmp print 'done' def parse_blob(parser): global blob_marks parser.next() mark = parser.get_mark() parser.next() data = parser.get_data() blob_marks[mark] = data parser.next() def get_merge_files(repo, p1, p2, files): for e in repo[p1].files(): if e not in files: if e not in repo[p1].manifest(): continue f = { 'ctx' : repo[p1][e] } files[e] = f def parse_commit(parser): global marks, blob_marks, parsed_refs global mode from_mark = merge_mark = None ref = parser[1] parser.next() commit_mark = parser.get_mark() parser.next() author = parser.get_author() parser.next() committer = parser.get_author() parser.next() data = parser.get_data() parser.next() if parser.check('from'): from_mark = parser.get_mark() parser.next() if parser.check('merge'): merge_mark = parser.get_mark() parser.next() if parser.check('merge'): die('octopus merges are not supported yet') # fast-export adds an extra newline if data[-1] == '\n': data = data[:-1] files = {} for line in parser: if parser.check('M'): t, m, mark_ref, path = line.split(' ', 3) mark = int(mark_ref[1:]) f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] } elif parser.check('D'): t, path = line.split(' ', 1) f = { 'deleted' : True } else: die('Unknown file command: %s' % line) files[path] = f def getfilectx(repo, memctx, f): of = files[f] if 'deleted' in of: raise IOError if 'ctx' in of: return of['ctx'] is_exec = of['mode'] == 'x' is_link = of['mode'] == 'l' rename = of.get('rename', None) return context.memfilectx(f, of['data'], is_link, is_exec, rename) repo = parser.repo user, date, tz = author extra = {} if committer != author: extra['committer'] = "%s %u %u" % committer if from_mark: p1 = repo.changelog.node(mark_to_rev(from_mark)) else: p1 = '\0' * 20 if merge_mark: p2 = repo.changelog.node(mark_to_rev(merge_mark)) else: p2 = '\0' * 20 # # If files changed from any of the parents, hg wants to know, but in git if # nothing changed from the first parent, nothing changed. # if merge_mark: get_merge_files(repo, p1, p2, files) # Check if the ref is supposed to be a named branch if ref.startswith('refs/heads/branches/'): branch = ref[len('refs/heads/branches/'):] extra['branch'] = hgref(branch) if mode == 'hg': i = data.find('\n--HG--\n') if i >= 0: tmp = data[i + len('\n--HG--\n'):].strip() for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]: if k == 'rename': old, new = v.split(' => ', 1) files[new]['rename'] = old elif k == 'branch': extra[k] = v elif k == 'extra': ek, ev = v.split(' : ', 1) extra[ek] = urllib.unquote(ev) data = data[:i] ctx = context.memctx(repo, (p1, p2), data, files.keys(), getfilectx, user, (date, tz), extra) tmp = encoding.encoding encoding.encoding = 'utf-8' node = repo.commitctx(ctx) encoding.encoding = tmp rev = repo[node].rev() parsed_refs[ref] = node marks.new_mark(rev, commit_mark) def parse_reset(parser): global parsed_refs ref = parser[1] parser.next() # ugh if parser.check('commit'): parse_commit(parser) return if not parser.check('from'): return from_mark = parser.get_mark() parser.next() node = parser.repo.changelog.node(mark_to_rev(from_mark)) parsed_refs[ref] = node def parse_tag(parser): name = parser[1] parser.next() from_mark = parser.get_mark() parser.next() tagger = parser.get_author() parser.next() data = parser.get_data() parser.next() parsed_tags[name] = (tagger, data) def write_tag(repo, tag, node, msg, author): branch = repo[node].branch() tip = branch_tip(repo, branch) tip = repo[tip] def getfilectx(repo, memctx, f): try: fctx = tip.filectx(f) data = fctx.data() except error.ManifestLookupError: data = "" content = data + "%s %s\n" % (hghex(node), tag) return context.memfilectx(f, content, False, False, None) p1 = tip.hex() p2 = '\0' * 20 if not author: author = (None, 0, 0) user, date, tz = author ctx = context.memctx(repo, (p1, p2), msg, ['.hgtags'], getfilectx, user, (date, tz), {'branch' : branch}) tmp = encoding.encoding encoding.encoding = 'utf-8' tagnode = repo.commitctx(ctx) encoding.encoding = tmp return tagnode def do_export(parser): global parsed_refs, bmarks, peer p_bmarks = [] parser.next() for line in parser.each_block('done'): if parser.check('blob'): parse_blob(parser) elif parser.check('commit'): parse_commit(parser) elif parser.check('reset'): parse_reset(parser) elif parser.check('tag'): parse_tag(parser) elif parser.check('feature'): pass else: die('unhandled export command: %s' % line) for ref, node in parsed_refs.iteritems(): if ref.startswith('refs/heads/branches'): branch = ref[len('refs/heads/branches/'):] if branch in branches and node in branches[branch]: # up to date continue print "ok %s" % ref elif ref.startswith('refs/heads/'): bmark = ref[len('refs/heads/'):] p_bmarks.append((bmark, node)) continue elif ref.startswith('refs/tags/'): tag = ref[len('refs/tags/'):] tag = hgref(tag) author, msg = parsed_tags.get(tag, (None, None)) if mode == 'git': if not msg: msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6])); write_tag(parser.repo, tag, node, msg, author) else: fp = parser.repo.opener('localtags', 'a') fp.write('%s %s\n' % (hghex(node), tag)) fp.close() print "ok %s" % ref else: # transport-helper/fast-export bugs continue if peer: parser.repo.push(peer, force=force_push) # handle bookmarks for bmark, node in p_bmarks: ref = 'refs/heads/' + bmark new = hghex(node) if bmark in bmarks: old = bmarks[bmark].hex() else: old = '' if old == new: continue if bmark == 'master' and 'master' not in parser.repo._bookmarks: # fake bookmark pass elif bookmarks.pushbookmark(parser.repo, bmark, old, new): # updated locally pass else: print "error %s" % ref continue if peer: rb = peer.listkeys('bookmarks') old = rb.get(bmark, '') if not peer.pushkey('bookmarks', bmark, old, new): print "error %s" % ref continue print "ok %s" % ref print def fix_path(alias, repo, orig_url): url = urlparse.urlparse(orig_url, 'file') if url.scheme != 'file' or os.path.isabs(url.path): return abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url) cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % abs_url] subprocess.call(cmd) def main(args): global prefix, dirname, branches, bmarks global marks, blob_marks, parsed_refs global peer, mode, bad_mail, bad_name global track_branches, force_push, is_tmp global parsed_tags global filenodes alias = args[1] url = args[2] peer = None hg_git_compat = False track_branches = True force_push = True try: if get_config('remote-hg.hg-git-compat') == 'true\n': hg_git_compat = True track_branches = False if get_config('remote-hg.track-branches') == 'false\n': track_branches = False if get_config('remote-hg.force-push') == 'false\n': force_push = False except subprocess.CalledProcessError: pass if hg_git_compat: mode = 'hg' bad_mail = 'none@none' bad_name = '' else: mode = 'git' bad_mail = 'unknown' bad_name = 'Unknown' if alias[4:] == url: is_tmp = True alias = hashlib.sha1(alias).hexdigest() else: is_tmp = False gitdir = os.environ['GIT_DIR'] dirname = os.path.join(gitdir, 'hg', alias) branches = {} bmarks = {} blob_marks = {} parsed_refs = {} marks = None parsed_tags = {} filenodes = {} repo = get_repo(url, alias) prefix = 'refs/hg/%s' % alias if not is_tmp: fix_path(alias, peer or repo, url) if not os.path.exists(dirname): os.makedirs(dirname) marks_path = os.path.join(dirname, 'marks-hg') marks = Marks(marks_path) parser = Parser(repo) for line in parser: if parser.check('capabilities'): do_capabilities(parser) elif parser.check('list'): do_list(parser) elif parser.check('import'): do_import(parser) elif parser.check('export'): do_export(parser) else: die('unhandled command: %s' % line) sys.stdout.flush() def bye(): if not marks: return if not is_tmp: marks.store() else: shutil.rmtree(dirname) atexit.register(bye) sys.exit(main(sys.argv))