e37e278c26d6f9f3cd0bd897212deb61152d9df7
   1#!/usr/bin/env python
   2#
   3# Copyright (c) 2012 Felipe Contreras
   4#
   5
   6# Inspired by Rocco Rutte's hg-fast-export
   7
   8# Just copy to your ~/bin, or anywhere in your $PATH.
   9# Then you can clone with:
  10# git clone hg::/path/to/mercurial/repo/
  11
  12from mercurial import hg, ui, bookmarks
  13
  14import re
  15import sys
  16import os
  17import json
  18
  19NAME_RE = re.compile('^([^<>]+)')
  20AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]+)>$')
  21
  22def die(msg, *args):
  23    sys.stderr.write('ERROR: %s\n' % (msg % args))
  24    sys.exit(1)
  25
  26def warn(msg, *args):
  27    sys.stderr.write('WARNING: %s\n' % (msg % args))
  28
  29def gitmode(flags):
  30    return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
  31
  32def gittz(tz):
  33    return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
  34
  35class Marks:
  36
  37    def __init__(self, path):
  38        self.path = path
  39        self.tips = {}
  40        self.marks = {}
  41        self.last_mark = 0
  42
  43        self.load()
  44
  45    def load(self):
  46        if not os.path.exists(self.path):
  47            return
  48
  49        tmp = json.load(open(self.path))
  50
  51        self.tips = tmp['tips']
  52        self.marks = tmp['marks']
  53        self.last_mark = tmp['last-mark']
  54
  55    def dict(self):
  56        return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
  57
  58    def store(self):
  59        json.dump(self.dict(), open(self.path, 'w'))
  60
  61    def __str__(self):
  62        return str(self.dict())
  63
  64    def from_rev(self, rev):
  65        return self.marks[str(rev)]
  66
  67    def get_mark(self, rev):
  68        self.last_mark += 1
  69        self.marks[str(rev)] = self.last_mark
  70        return self.last_mark
  71
  72    def is_marked(self, rev):
  73        return self.marks.has_key(str(rev))
  74
  75    def get_tip(self, branch):
  76        return self.tips.get(branch, 0)
  77
  78    def set_tip(self, branch, tip):
  79        self.tips[branch] = tip
  80
  81class Parser:
  82
  83    def __init__(self, repo):
  84        self.repo = repo
  85        self.line = self.get_line()
  86
  87    def get_line(self):
  88        return sys.stdin.readline().strip()
  89
  90    def __getitem__(self, i):
  91        return self.line.split()[i]
  92
  93    def check(self, word):
  94        return self.line.startswith(word)
  95
  96    def each_block(self, separator):
  97        while self.line != separator:
  98            yield self.line
  99            self.line = self.get_line()
 100
 101    def __iter__(self):
 102        return self.each_block('')
 103
 104    def next(self):
 105        self.line = self.get_line()
 106        if self.line == 'done':
 107            self.line = None
 108
 109def export_file(fc):
 110    d = fc.data()
 111    print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
 112    print "data %d" % len(d)
 113    print d
 114
 115def get_filechanges(repo, ctx, parent):
 116    modified = set()
 117    added = set()
 118    removed = set()
 119
 120    cur = ctx.manifest()
 121    prev = repo[parent].manifest().copy()
 122
 123    for fn in cur:
 124        if fn in prev:
 125            if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
 126                modified.add(fn)
 127            del prev[fn]
 128        else:
 129            added.add(fn)
 130    removed |= set(prev.keys())
 131
 132    return added | modified, removed
 133
 134def fixup_user(user):
 135    user = user.replace('"', '')
 136    name = mail = None
 137    m = AUTHOR_RE.match(user)
 138    if m:
 139        name = m.group(1)
 140        mail = m.group(2).strip()
 141    else:
 142        m = NAME_RE.match(user)
 143        if m:
 144            name = m.group(1).strip()
 145
 146    if not name:
 147        name = 'Unknown'
 148    if not mail:
 149        mail = 'unknown'
 150
 151    return '%s <%s>' % (name, mail)
 152
 153def get_repo(url, alias):
 154    global dirname
 155
 156    myui = ui.ui()
 157    myui.setconfig('ui', 'interactive', 'off')
 158
 159    if hg.islocal(url):
 160        repo = hg.repository(myui, url)
 161    else:
 162        local_path = os.path.join(dirname, 'clone')
 163        if not os.path.exists(local_path):
 164            peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
 165            repo = dstpeer.local()
 166        else:
 167            repo = hg.repository(myui, local_path)
 168            peer = hg.peer(myui, {}, url)
 169            repo.pull(peer, heads=None, force=True)
 170
 171    return repo
 172
 173def rev_to_mark(rev):
 174    global marks
 175    return marks.from_rev(rev)
 176
 177def export_ref(repo, name, kind, head):
 178    global prefix, marks
 179
 180    ename = '%s/%s' % (kind, name)
 181    tip = marks.get_tip(ename)
 182
 183    # mercurial takes too much time checking this
 184    if tip and tip == head.rev():
 185        # nothing to do
 186        return
 187    revs = repo.revs('%u:%u' % (tip, head))
 188    count = 0
 189
 190    revs = [rev for rev in revs if not marks.is_marked(rev)]
 191
 192    for rev in revs:
 193
 194        c = repo[rev]
 195        (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
 196        rev_branch = extra['branch']
 197
 198        author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
 199        if 'committer' in extra:
 200            user, time, tz = extra['committer'].rsplit(' ', 2)
 201            committer = "%s %s %s" % (user, time, gittz(int(tz)))
 202        else:
 203            committer = author
 204
 205        parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
 206
 207        if len(parents) == 0:
 208            modified = c.manifest().keys()
 209            removed = []
 210        else:
 211            modified, removed = get_filechanges(repo, c, parents[0])
 212
 213        if len(parents) == 0 and rev:
 214            print 'reset %s/%s' % (prefix, ename)
 215
 216        print "commit %s/%s" % (prefix, ename)
 217        print "mark :%d" % (marks.get_mark(rev))
 218        print "author %s" % (author)
 219        print "committer %s" % (committer)
 220        print "data %d" % (len(desc))
 221        print desc
 222
 223        if len(parents) > 0:
 224            print "from :%s" % (rev_to_mark(parents[0]))
 225            if len(parents) > 1:
 226                print "merge :%s" % (rev_to_mark(parents[1]))
 227
 228        for f in modified:
 229            export_file(c.filectx(f))
 230        for f in removed:
 231            print "D %s" % (f)
 232        print
 233
 234        count += 1
 235        if (count % 100 == 0):
 236            print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
 237            print "#############################################################"
 238
 239    # make sure the ref is updated
 240    print "reset %s/%s" % (prefix, ename)
 241    print "from :%u" % rev_to_mark(rev)
 242    print
 243
 244    marks.set_tip(ename, rev)
 245
 246def export_tag(repo, tag):
 247    export_ref(repo, tag, 'tags', repo[tag])
 248
 249def export_bookmark(repo, bmark):
 250    head = bmarks[bmark]
 251    export_ref(repo, bmark, 'bookmarks', head)
 252
 253def export_branch(repo, branch):
 254    tip = get_branch_tip(repo, branch)
 255    head = repo[tip]
 256    export_ref(repo, branch, 'branches', head)
 257
 258def export_head(repo):
 259    global g_head
 260    export_ref(repo, g_head[0], 'bookmarks', g_head[1])
 261
 262def do_capabilities(parser):
 263    global prefix, dirname
 264
 265    print "import"
 266    print "refspec refs/heads/branches/*:%s/branches/*" % prefix
 267    print "refspec refs/heads/*:%s/bookmarks/*" % prefix
 268    print "refspec refs/tags/*:%s/tags/*" % prefix
 269    print
 270
 271def get_branch_tip(repo, branch):
 272    global branches
 273
 274    heads = branches.get(branch, None)
 275    if not heads:
 276        return None
 277
 278    # verify there's only one head
 279    if (len(heads) > 1):
 280        warn("Branch '%s' has more than one head, consider merging" % branch)
 281        # older versions of mercurial don't have this
 282        if hasattr(repo, "branchtip"):
 283            return repo.branchtip(branch)
 284
 285    return heads[0]
 286
 287def list_head(repo, cur):
 288    global g_head
 289
 290    head = bookmarks.readcurrent(repo)
 291    if not head:
 292        return
 293    node = repo[head]
 294    print "@refs/heads/%s HEAD" % head
 295    g_head = (head, node)
 296
 297def do_list(parser):
 298    global branches, bmarks
 299
 300    repo = parser.repo
 301    for branch in repo.branchmap():
 302        heads = repo.branchheads(branch)
 303        if len(heads):
 304            branches[branch] = heads
 305
 306    for bmark, node in bookmarks.listbookmarks(repo).iteritems():
 307        bmarks[bmark] = repo[node]
 308
 309    cur = repo.dirstate.branch()
 310
 311    list_head(repo, cur)
 312    for branch in branches:
 313        print "? refs/heads/branches/%s" % branch
 314    for bmark in bmarks:
 315        print "? refs/heads/%s" % bmark
 316
 317    for tag, node in repo.tagslist():
 318        if tag == 'tip':
 319            continue
 320        print "? refs/tags/%s" % tag
 321
 322    print
 323
 324def do_import(parser):
 325    repo = parser.repo
 326
 327    path = os.path.join(dirname, 'marks-git')
 328
 329    print "feature done"
 330    if os.path.exists(path):
 331        print "feature import-marks=%s" % path
 332    print "feature export-marks=%s" % path
 333    sys.stdout.flush()
 334
 335    # lets get all the import lines
 336    while parser.check('import'):
 337        ref = parser[1]
 338
 339        if (ref == 'HEAD'):
 340            export_head(repo)
 341        elif ref.startswith('refs/heads/branches/'):
 342            branch = ref[len('refs/heads/branches/'):]
 343            export_branch(repo, branch)
 344        elif ref.startswith('refs/heads/'):
 345            bmark = ref[len('refs/heads/'):]
 346            export_bookmark(repo, bmark)
 347        elif ref.startswith('refs/tags/'):
 348            tag = ref[len('refs/tags/'):]
 349            export_tag(repo, tag)
 350
 351        parser.next()
 352
 353    print 'done'
 354
 355def main(args):
 356    global prefix, dirname, marks, branches, bmarks
 357
 358    alias = args[1]
 359    url = args[2]
 360
 361    gitdir = os.environ['GIT_DIR']
 362    dirname = os.path.join(gitdir, 'hg', alias)
 363    branches = {}
 364    bmarks = {}
 365
 366    repo = get_repo(url, alias)
 367    prefix = 'refs/hg/%s' % alias
 368
 369    if not os.path.exists(dirname):
 370        os.makedirs(dirname)
 371
 372    marks_path = os.path.join(dirname, 'marks-hg')
 373    marks = Marks(marks_path)
 374
 375    parser = Parser(repo)
 376    for line in parser:
 377        if parser.check('capabilities'):
 378            do_capabilities(parser)
 379        elif parser.check('list'):
 380            do_list(parser)
 381        elif parser.check('import'):
 382            do_import(parser)
 383        elif parser.check('export'):
 384            do_export(parser)
 385        else:
 386            die('unhandled command: %s' % line)
 387        sys.stdout.flush()
 388
 389    marks.store()
 390
 391sys.exit(main(sys.argv))