d5857560ed9ea8e6731605f4ae9cd29d7944d5cf
   1#!/usr/bin/env python
   2#
   3# Copyright (c) 2012 Felipe Contreras
   4#
   5
   6# Inspired by Rocco Rutte's hg-fast-export
   7
   8# Just copy to your ~/bin, or anywhere in your $PATH.
   9# Then you can clone with:
  10# git clone hg::/path/to/mercurial/repo/
  11
  12from mercurial import hg, ui, bookmarks, context, util, encoding
  13
  14import re
  15import sys
  16import os
  17import json
  18import shutil
  19import subprocess
  20
  21#
  22# If you want to switch to hg-git compatibility mode:
  23# git config --global remote-hg.hg-git-compat true
  24#
  25# git:
  26# Sensible defaults for git.
  27# hg bookmarks are exported as git branches, hg branches are prefixed
  28# with 'branches/'.
  29#
  30# hg:
  31# Emulate hg-git.
  32# Only hg bookmarks are exported as git branches.
  33# Commits are modified to preserve hg information and allow biridectionality.
  34#
  35
  36NAME_RE = re.compile('^([^<>]+)')
  37AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]+)>$')
  38RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.+)> (\d+) ([+-]\d+)')
  39
  40def die(msg, *args):
  41    sys.stderr.write('ERROR: %s\n' % (msg % args))
  42    sys.exit(1)
  43
  44def warn(msg, *args):
  45    sys.stderr.write('WARNING: %s\n' % (msg % args))
  46
  47def gitmode(flags):
  48    return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
  49
  50def gittz(tz):
  51    return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
  52
  53def hgmode(mode):
  54    m = { '0100755': 'x', '0120000': 'l' }
  55    return m.get(mode, '')
  56
  57class Marks:
  58
  59    def __init__(self, path):
  60        self.path = path
  61        self.tips = {}
  62        self.marks = {}
  63        self.rev_marks = {}
  64        self.last_mark = 0
  65
  66        self.load()
  67
  68    def load(self):
  69        if not os.path.exists(self.path):
  70            return
  71
  72        tmp = json.load(open(self.path))
  73
  74        self.tips = tmp['tips']
  75        self.marks = tmp['marks']
  76        self.last_mark = tmp['last-mark']
  77
  78        for rev, mark in self.marks.iteritems():
  79            self.rev_marks[mark] = int(rev)
  80
  81    def dict(self):
  82        return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
  83
  84    def store(self):
  85        json.dump(self.dict(), open(self.path, 'w'))
  86
  87    def __str__(self):
  88        return str(self.dict())
  89
  90    def from_rev(self, rev):
  91        return self.marks[str(rev)]
  92
  93    def to_rev(self, mark):
  94        return self.rev_marks[mark]
  95
  96    def get_mark(self, rev):
  97        self.last_mark += 1
  98        self.marks[str(rev)] = self.last_mark
  99        return self.last_mark
 100
 101    def new_mark(self, rev, mark):
 102        self.marks[str(rev)] = mark
 103        self.rev_marks[mark] = rev
 104        self.last_mark = mark
 105
 106    def is_marked(self, rev):
 107        return self.marks.has_key(str(rev))
 108
 109    def get_tip(self, branch):
 110        return self.tips.get(branch, 0)
 111
 112    def set_tip(self, branch, tip):
 113        self.tips[branch] = tip
 114
 115class Parser:
 116
 117    def __init__(self, repo):
 118        self.repo = repo
 119        self.line = self.get_line()
 120
 121    def get_line(self):
 122        return sys.stdin.readline().strip()
 123
 124    def __getitem__(self, i):
 125        return self.line.split()[i]
 126
 127    def check(self, word):
 128        return self.line.startswith(word)
 129
 130    def each_block(self, separator):
 131        while self.line != separator:
 132            yield self.line
 133            self.line = self.get_line()
 134
 135    def __iter__(self):
 136        return self.each_block('')
 137
 138    def next(self):
 139        self.line = self.get_line()
 140        if self.line == 'done':
 141            self.line = None
 142
 143    def get_mark(self):
 144        i = self.line.index(':') + 1
 145        return int(self.line[i:])
 146
 147    def get_data(self):
 148        if not self.check('data'):
 149            return None
 150        i = self.line.index(' ') + 1
 151        size = int(self.line[i:])
 152        return sys.stdin.read(size)
 153
 154    def get_author(self):
 155        m = RAW_AUTHOR_RE.match(self.line)
 156        if not m:
 157            return None
 158        _, name, email, date, tz = m.groups()
 159
 160        if email != 'unknown':
 161            if name:
 162                user = '%s <%s>' % (name, email)
 163            else:
 164                user = '<%s>' % (email)
 165        else:
 166            user = name
 167
 168        tz = int(tz)
 169        tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
 170        return (user, int(date), -tz)
 171
 172def export_file(fc):
 173    d = fc.data()
 174    print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
 175    print "data %d" % len(d)
 176    print d
 177
 178def get_filechanges(repo, ctx, parent):
 179    modified = set()
 180    added = set()
 181    removed = set()
 182
 183    cur = ctx.manifest()
 184    prev = repo[parent].manifest().copy()
 185
 186    for fn in cur:
 187        if fn in prev:
 188            if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
 189                modified.add(fn)
 190            del prev[fn]
 191        else:
 192            added.add(fn)
 193    removed |= set(prev.keys())
 194
 195    return added | modified, removed
 196
 197def fixup_user(user):
 198    user = user.replace('"', '')
 199    name = mail = None
 200    m = AUTHOR_RE.match(user)
 201    if m:
 202        name = m.group(1)
 203        mail = m.group(2).strip()
 204    else:
 205        m = NAME_RE.match(user)
 206        if m:
 207            name = m.group(1).strip()
 208
 209    if not name:
 210        name = 'Unknown'
 211    if not mail:
 212        mail = 'unknown'
 213
 214    return '%s <%s>' % (name, mail)
 215
 216def get_repo(url, alias):
 217    global dirname, peer
 218
 219    myui = ui.ui()
 220    myui.setconfig('ui', 'interactive', 'off')
 221
 222    if hg.islocal(url):
 223        repo = hg.repository(myui, url)
 224    else:
 225        local_path = os.path.join(dirname, 'clone')
 226        if not os.path.exists(local_path):
 227            peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
 228            repo = dstpeer.local()
 229        else:
 230            repo = hg.repository(myui, local_path)
 231            peer = hg.peer(myui, {}, url)
 232            repo.pull(peer, heads=None, force=True)
 233
 234    return repo
 235
 236def rev_to_mark(rev):
 237    global marks
 238    return marks.from_rev(rev)
 239
 240def mark_to_rev(mark):
 241    global marks
 242    return marks.to_rev(mark)
 243
 244def export_ref(repo, name, kind, head):
 245    global prefix, marks, mode
 246
 247    ename = '%s/%s' % (kind, name)
 248    tip = marks.get_tip(ename)
 249
 250    # mercurial takes too much time checking this
 251    if tip and tip == head.rev():
 252        # nothing to do
 253        return
 254    revs = repo.revs('%u:%u' % (tip, head))
 255    count = 0
 256
 257    revs = [rev for rev in revs if not marks.is_marked(rev)]
 258
 259    for rev in revs:
 260
 261        c = repo[rev]
 262        (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
 263        rev_branch = extra['branch']
 264
 265        author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
 266        if 'committer' in extra:
 267            user, time, tz = extra['committer'].rsplit(' ', 2)
 268            committer = "%s %s %s" % (user, time, gittz(int(tz)))
 269        else:
 270            committer = author
 271
 272        parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
 273
 274        if len(parents) == 0:
 275            modified = c.manifest().keys()
 276            removed = []
 277        else:
 278            modified, removed = get_filechanges(repo, c, parents[0])
 279
 280        if mode == 'hg':
 281            extra_msg = ''
 282
 283            if rev_branch != 'default':
 284                extra_msg += 'branch : %s\n' % rev_branch
 285
 286            renames = []
 287            for f in c.files():
 288                if f not in c.manifest():
 289                    continue
 290                rename = c.filectx(f).renamed()
 291                if rename:
 292                    renames.append((rename[0], f))
 293
 294            for e in renames:
 295                extra_msg += "rename : %s => %s\n" % e
 296
 297            for key, value in extra.iteritems():
 298                if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
 299                    continue
 300                else:
 301                    extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
 302
 303            desc += '\n'
 304            if extra_msg:
 305                desc += '\n--HG--\n' + extra_msg
 306
 307        if len(parents) == 0 and rev:
 308            print 'reset %s/%s' % (prefix, ename)
 309
 310        print "commit %s/%s" % (prefix, ename)
 311        print "mark :%d" % (marks.get_mark(rev))
 312        print "author %s" % (author)
 313        print "committer %s" % (committer)
 314        print "data %d" % (len(desc))
 315        print desc
 316
 317        if len(parents) > 0:
 318            print "from :%s" % (rev_to_mark(parents[0]))
 319            if len(parents) > 1:
 320                print "merge :%s" % (rev_to_mark(parents[1]))
 321
 322        for f in modified:
 323            export_file(c.filectx(f))
 324        for f in removed:
 325            print "D %s" % (f)
 326        print
 327
 328        count += 1
 329        if (count % 100 == 0):
 330            print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
 331            print "#############################################################"
 332
 333    # make sure the ref is updated
 334    print "reset %s/%s" % (prefix, ename)
 335    print "from :%u" % rev_to_mark(rev)
 336    print
 337
 338    marks.set_tip(ename, rev)
 339
 340def export_tag(repo, tag):
 341    export_ref(repo, tag, 'tags', repo[tag])
 342
 343def export_bookmark(repo, bmark):
 344    head = bmarks[bmark]
 345    export_ref(repo, bmark, 'bookmarks', head)
 346
 347def export_branch(repo, branch):
 348    tip = get_branch_tip(repo, branch)
 349    head = repo[tip]
 350    export_ref(repo, branch, 'branches', head)
 351
 352def export_head(repo):
 353    global g_head
 354    export_ref(repo, g_head[0], 'bookmarks', g_head[1])
 355
 356def do_capabilities(parser):
 357    global prefix, dirname
 358
 359    print "import"
 360    print "export"
 361    print "refspec refs/heads/branches/*:%s/branches/*" % prefix
 362    print "refspec refs/heads/*:%s/bookmarks/*" % prefix
 363    print "refspec refs/tags/*:%s/tags/*" % prefix
 364
 365    path = os.path.join(dirname, 'marks-git')
 366
 367    if os.path.exists(path):
 368        print "*import-marks %s" % path
 369    print "*export-marks %s" % path
 370
 371    print
 372
 373def get_branch_tip(repo, branch):
 374    global branches
 375
 376    heads = branches.get(branch, None)
 377    if not heads:
 378        return None
 379
 380    # verify there's only one head
 381    if (len(heads) > 1):
 382        warn("Branch '%s' has more than one head, consider merging" % branch)
 383        # older versions of mercurial don't have this
 384        if hasattr(repo, "branchtip"):
 385            return repo.branchtip(branch)
 386
 387    return heads[0]
 388
 389def list_head(repo, cur):
 390    global g_head
 391
 392    head = bookmarks.readcurrent(repo)
 393    if not head:
 394        return
 395    node = repo[head]
 396    print "@refs/heads/%s HEAD" % head
 397    g_head = (head, node)
 398
 399def do_list(parser):
 400    global branches, bmarks, mode
 401
 402    repo = parser.repo
 403    for branch in repo.branchmap():
 404        heads = repo.branchheads(branch)
 405        if len(heads):
 406            branches[branch] = heads
 407
 408    for bmark, node in bookmarks.listbookmarks(repo).iteritems():
 409        bmarks[bmark] = repo[node]
 410
 411    cur = repo.dirstate.branch()
 412
 413    list_head(repo, cur)
 414
 415    if mode != 'hg':
 416        for branch in branches:
 417            print "? refs/heads/branches/%s" % branch
 418
 419    for bmark in bmarks:
 420        print "? refs/heads/%s" % bmark
 421
 422    for tag, node in repo.tagslist():
 423        if tag == 'tip':
 424            continue
 425        print "? refs/tags/%s" % tag
 426
 427    print
 428
 429def do_import(parser):
 430    repo = parser.repo
 431
 432    path = os.path.join(dirname, 'marks-git')
 433
 434    print "feature done"
 435    if os.path.exists(path):
 436        print "feature import-marks=%s" % path
 437    print "feature export-marks=%s" % path
 438    sys.stdout.flush()
 439
 440    tmp = encoding.encoding
 441    encoding.encoding = 'utf-8'
 442
 443    # lets get all the import lines
 444    while parser.check('import'):
 445        ref = parser[1]
 446
 447        if (ref == 'HEAD'):
 448            export_head(repo)
 449        elif ref.startswith('refs/heads/branches/'):
 450            branch = ref[len('refs/heads/branches/'):]
 451            export_branch(repo, branch)
 452        elif ref.startswith('refs/heads/'):
 453            bmark = ref[len('refs/heads/'):]
 454            export_bookmark(repo, bmark)
 455        elif ref.startswith('refs/tags/'):
 456            tag = ref[len('refs/tags/'):]
 457            export_tag(repo, tag)
 458
 459        parser.next()
 460
 461    encoding.encoding = tmp
 462
 463    print 'done'
 464
 465def parse_blob(parser):
 466    global blob_marks
 467
 468    parser.next()
 469    mark = parser.get_mark()
 470    parser.next()
 471    data = parser.get_data()
 472    blob_marks[mark] = data
 473    parser.next()
 474    return
 475
 476def get_merge_files(repo, p1, p2, files):
 477    for e in repo[p1].files():
 478        if e not in files:
 479            if e not in repo[p1].manifest():
 480                continue
 481            f = { 'ctx' : repo[p1][e] }
 482            files[e] = f
 483
 484def parse_commit(parser):
 485    global marks, blob_marks, bmarks, parsed_refs
 486    global mode
 487
 488    from_mark = merge_mark = None
 489
 490    ref = parser[1]
 491    parser.next()
 492
 493    commit_mark = parser.get_mark()
 494    parser.next()
 495    author = parser.get_author()
 496    parser.next()
 497    committer = parser.get_author()
 498    parser.next()
 499    data = parser.get_data()
 500    parser.next()
 501    if parser.check('from'):
 502        from_mark = parser.get_mark()
 503        parser.next()
 504    if parser.check('merge'):
 505        merge_mark = parser.get_mark()
 506        parser.next()
 507        if parser.check('merge'):
 508            die('octopus merges are not supported yet')
 509
 510    files = {}
 511
 512    for line in parser:
 513        if parser.check('M'):
 514            t, m, mark_ref, path = line.split(' ')
 515            mark = int(mark_ref[1:])
 516            f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
 517        elif parser.check('D'):
 518            t, path = line.split(' ')
 519            f = { 'deleted' : True }
 520        else:
 521            die('Unknown file command: %s' % line)
 522        files[path] = f
 523
 524    def getfilectx(repo, memctx, f):
 525        of = files[f]
 526        if 'deleted' in of:
 527            raise IOError
 528        if 'ctx' in of:
 529            return of['ctx']
 530        is_exec = of['mode'] == 'x'
 531        is_link = of['mode'] == 'l'
 532        rename = of.get('rename', None)
 533        return context.memfilectx(f, of['data'],
 534                is_link, is_exec, rename)
 535
 536    repo = parser.repo
 537
 538    user, date, tz = author
 539    extra = {}
 540
 541    if committer != author:
 542        extra['committer'] = "%s %u %u" % committer
 543
 544    if from_mark:
 545        p1 = repo.changelog.node(mark_to_rev(from_mark))
 546    else:
 547        p1 = '\0' * 20
 548
 549    if merge_mark:
 550        p2 = repo.changelog.node(mark_to_rev(merge_mark))
 551    else:
 552        p2 = '\0' * 20
 553
 554    #
 555    # If files changed from any of the parents, hg wants to know, but in git if
 556    # nothing changed from the first parent, nothing changed.
 557    #
 558    if merge_mark:
 559        get_merge_files(repo, p1, p2, files)
 560
 561    if mode == 'hg':
 562        i = data.find('\n--HG--\n')
 563        if i >= 0:
 564            tmp = data[i + len('\n--HG--\n'):].strip()
 565            for k, v in [e.split(' : ') for e in tmp.split('\n')]:
 566                if k == 'rename':
 567                    old, new = v.split(' => ', 1)
 568                    files[new]['rename'] = old
 569                elif k == 'branch':
 570                    extra[k] = v
 571                elif k == 'extra':
 572                    ek, ev = v.split(' : ', 1)
 573                    extra[ek] = urllib.unquote(ev)
 574            data = data[:i]
 575
 576    ctx = context.memctx(repo, (p1, p2), data,
 577            files.keys(), getfilectx,
 578            user, (date, tz), extra)
 579
 580    tmp = encoding.encoding
 581    encoding.encoding = 'utf-8'
 582
 583    node = repo.commitctx(ctx)
 584
 585    encoding.encoding = tmp
 586
 587    rev = repo[node].rev()
 588
 589    parsed_refs[ref] = node
 590
 591    marks.new_mark(rev, commit_mark)
 592
 593def parse_reset(parser):
 594    ref = parser[1]
 595    parser.next()
 596    # ugh
 597    if parser.check('commit'):
 598        parse_commit(parser)
 599        return
 600    if not parser.check('from'):
 601        return
 602    from_mark = parser.get_mark()
 603    parser.next()
 604
 605    node = parser.repo.changelog.node(mark_to_rev(from_mark))
 606    parsed_refs[ref] = node
 607
 608def parse_tag(parser):
 609    name = parser[1]
 610    parser.next()
 611    from_mark = parser.get_mark()
 612    parser.next()
 613    tagger = parser.get_author()
 614    parser.next()
 615    data = parser.get_data()
 616    parser.next()
 617
 618    # nothing to do
 619
 620def do_export(parser):
 621    global parsed_refs, bmarks, peer
 622
 623    parser.next()
 624
 625    for line in parser.each_block('done'):
 626        if parser.check('blob'):
 627            parse_blob(parser)
 628        elif parser.check('commit'):
 629            parse_commit(parser)
 630        elif parser.check('reset'):
 631            parse_reset(parser)
 632        elif parser.check('tag'):
 633            parse_tag(parser)
 634        elif parser.check('feature'):
 635            pass
 636        else:
 637            die('unhandled export command: %s' % line)
 638
 639    for ref, node in parsed_refs.iteritems():
 640        if ref.startswith('refs/heads/branches'):
 641            pass
 642        elif ref.startswith('refs/heads/'):
 643            bmark = ref[len('refs/heads/'):]
 644            if bmark in bmarks:
 645                old = bmarks[bmark].hex()
 646            else:
 647                old = ''
 648            if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
 649                continue
 650        elif ref.startswith('refs/tags/'):
 651            tag = ref[len('refs/tags/'):]
 652            parser.repo.tag([tag], node, None, True, None, {})
 653        print "ok %s" % ref
 654
 655    print
 656
 657    if peer:
 658        parser.repo.push(peer, force=False)
 659
 660def main(args):
 661    global prefix, dirname, branches, bmarks
 662    global marks, blob_marks, parsed_refs
 663    global peer, mode
 664
 665    alias = args[1]
 666    url = args[2]
 667    peer = None
 668
 669    cmd = ['git', 'config', '--get', 'remote-hg.hg-git-compat']
 670    hg_git_compat = False
 671    try:
 672        if subprocess.check_output(cmd) == 'true\n':
 673            hg_git_compat = True
 674    except subprocess.CalledProcessError:
 675        pass
 676
 677    if hg_git_compat:
 678        mode = 'hg'
 679    else:
 680        mode = 'git'
 681
 682    if alias[4:] == url:
 683        is_tmp = True
 684        alias = util.sha1(alias).hexdigest()
 685    else:
 686        is_tmp = False
 687
 688    gitdir = os.environ['GIT_DIR']
 689    dirname = os.path.join(gitdir, 'hg', alias)
 690    branches = {}
 691    bmarks = {}
 692    blob_marks = {}
 693    parsed_refs = {}
 694
 695    repo = get_repo(url, alias)
 696    prefix = 'refs/hg/%s' % alias
 697
 698    if not os.path.exists(dirname):
 699        os.makedirs(dirname)
 700
 701    marks_path = os.path.join(dirname, 'marks-hg')
 702    marks = Marks(marks_path)
 703
 704    parser = Parser(repo)
 705    for line in parser:
 706        if parser.check('capabilities'):
 707            do_capabilities(parser)
 708        elif parser.check('list'):
 709            do_list(parser)
 710        elif parser.check('import'):
 711            do_import(parser)
 712        elif parser.check('export'):
 713            do_export(parser)
 714        else:
 715            die('unhandled command: %s' % line)
 716        sys.stdout.flush()
 717
 718    if not is_tmp:
 719        marks.store()
 720    else:
 721        shutil.rmtree(dirname)
 722
 723sys.exit(main(sys.argv))