1#!/usr/bin/env python
2#
3# Copyright (c) 2012 Felipe Contreras
4#
5
6# Inspired by Rocco Rutte's hg-fast-export
7
8# Just copy to your ~/bin, or anywhere in your $PATH.
9# Then you can clone with:
10# git clone hg::/path/to/mercurial/repo/
11
12from mercurial import hg, ui, bookmarks, context, util, encoding
13
14import re
15import sys
16import os
17import json
18import shutil
19import subprocess
20import urllib
21
22#
23# If you want to switch to hg-git compatibility mode:
24# git config --global remote-hg.hg-git-compat true
25#
26# If you are not in hg-git-compat mode and want to disable the tracking of
27# named branches:
28# git config --global remote-hg.track-branches false
29#
30# git:
31# Sensible defaults for git.
32# hg bookmarks are exported as git branches, hg branches are prefixed
33# with 'branches/', HEAD is a special case.
34#
35# hg:
36# Emulate hg-git.
37# Only hg bookmarks are exported as git branches.
38# Commits are modified to preserve hg information and allow bidirectionality.
39#
40
41NAME_RE = re.compile('^([^<>]+)')
42AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
43AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
44RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
45
46def die(msg, *args):
47 sys.stderr.write('ERROR: %s\n' % (msg % args))
48 sys.exit(1)
49
50def warn(msg, *args):
51 sys.stderr.write('WARNING: %s\n' % (msg % args))
52
53def gitmode(flags):
54 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
55
56def gittz(tz):
57 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
58
59def hgmode(mode):
60 m = { '100755': 'x', '120000': 'l' }
61 return m.get(mode, '')
62
63def get_config(config):
64 cmd = ['git', 'config', '--get', config]
65 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
66 output, _ = process.communicate()
67 return output
68
69class Marks:
70
71 def __init__(self, path):
72 self.path = path
73 self.tips = {}
74 self.marks = {}
75 self.rev_marks = {}
76 self.last_mark = 0
77
78 self.load()
79
80 def load(self):
81 if not os.path.exists(self.path):
82 return
83
84 tmp = json.load(open(self.path))
85
86 self.tips = tmp['tips']
87 self.marks = tmp['marks']
88 self.last_mark = tmp['last-mark']
89
90 for rev, mark in self.marks.iteritems():
91 self.rev_marks[mark] = int(rev)
92
93 def dict(self):
94 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
95
96 def store(self):
97 json.dump(self.dict(), open(self.path, 'w'))
98
99 def __str__(self):
100 return str(self.dict())
101
102 def from_rev(self, rev):
103 return self.marks[str(rev)]
104
105 def to_rev(self, mark):
106 return self.rev_marks[mark]
107
108 def get_mark(self, rev):
109 self.last_mark += 1
110 self.marks[str(rev)] = self.last_mark
111 return self.last_mark
112
113 def new_mark(self, rev, mark):
114 self.marks[str(rev)] = mark
115 self.rev_marks[mark] = rev
116 self.last_mark = mark
117
118 def is_marked(self, rev):
119 return self.marks.has_key(str(rev))
120
121 def get_tip(self, branch):
122 return self.tips.get(branch, 0)
123
124 def set_tip(self, branch, tip):
125 self.tips[branch] = tip
126
127class Parser:
128
129 def __init__(self, repo):
130 self.repo = repo
131 self.line = self.get_line()
132
133 def get_line(self):
134 return sys.stdin.readline().strip()
135
136 def __getitem__(self, i):
137 return self.line.split()[i]
138
139 def check(self, word):
140 return self.line.startswith(word)
141
142 def each_block(self, separator):
143 while self.line != separator:
144 yield self.line
145 self.line = self.get_line()
146
147 def __iter__(self):
148 return self.each_block('')
149
150 def next(self):
151 self.line = self.get_line()
152 if self.line == 'done':
153 self.line = None
154
155 def get_mark(self):
156 i = self.line.index(':') + 1
157 return int(self.line[i:])
158
159 def get_data(self):
160 if not self.check('data'):
161 return None
162 i = self.line.index(' ') + 1
163 size = int(self.line[i:])
164 return sys.stdin.read(size)
165
166 def get_author(self):
167 global bad_mail
168
169 ex = None
170 m = RAW_AUTHOR_RE.match(self.line)
171 if not m:
172 return None
173 _, name, email, date, tz = m.groups()
174 if name and 'ext:' in name:
175 m = re.match('^(.+?) ext:\((.+)\)$', name)
176 if m:
177 name = m.group(1)
178 ex = urllib.unquote(m.group(2))
179
180 if email != bad_mail:
181 if name:
182 user = '%s <%s>' % (name, email)
183 else:
184 user = '<%s>' % (email)
185 else:
186 user = name
187
188 if ex:
189 user += ex
190
191 tz = int(tz)
192 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
193 return (user, int(date), -tz)
194
195def export_file(fc):
196 d = fc.data()
197 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
198 print "data %d" % len(d)
199 print d
200
201def get_filechanges(repo, ctx, parent):
202 modified = set()
203 added = set()
204 removed = set()
205
206 cur = ctx.manifest()
207 prev = repo[parent].manifest().copy()
208
209 for fn in cur:
210 if fn in prev:
211 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
212 modified.add(fn)
213 del prev[fn]
214 else:
215 added.add(fn)
216 removed |= set(prev.keys())
217
218 return added | modified, removed
219
220def fixup_user_git(user):
221 name = mail = None
222 user = user.replace('"', '')
223 m = AUTHOR_RE.match(user)
224 if m:
225 name = m.group(1)
226 mail = m.group(2).strip()
227 else:
228 m = NAME_RE.match(user)
229 if m:
230 name = m.group(1).strip()
231 return (name, mail)
232
233def fixup_user_hg(user):
234 def sanitize(name):
235 # stole this from hg-git
236 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
237
238 m = AUTHOR_HG_RE.match(user)
239 if m:
240 name = sanitize(m.group(1))
241 mail = sanitize(m.group(2))
242 ex = m.group(3)
243 if ex:
244 name += ' ext:(' + urllib.quote(ex) + ')'
245 else:
246 name = sanitize(user)
247 if '@' in user:
248 mail = name
249 else:
250 mail = None
251
252 return (name, mail)
253
254def fixup_user(user):
255 global mode, bad_mail
256
257 if mode == 'git':
258 name, mail = fixup_user_git(user)
259 else:
260 name, mail = fixup_user_hg(user)
261
262 if not name:
263 name = bad_name
264 if not mail:
265 mail = bad_mail
266
267 return '%s <%s>' % (name, mail)
268
269def get_repo(url, alias):
270 global dirname, peer
271
272 myui = ui.ui()
273 myui.setconfig('ui', 'interactive', 'off')
274 myui.fout = sys.stderr
275
276 if hg.islocal(url):
277 repo = hg.repository(myui, url)
278 else:
279 local_path = os.path.join(dirname, 'clone')
280 if not os.path.exists(local_path):
281 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
282 repo = dstpeer.local()
283 else:
284 repo = hg.repository(myui, local_path)
285 peer = hg.peer(myui, {}, url)
286 repo.pull(peer, heads=None, force=True)
287
288 return repo
289
290def rev_to_mark(rev):
291 global marks
292 return marks.from_rev(rev)
293
294def mark_to_rev(mark):
295 global marks
296 return marks.to_rev(mark)
297
298def export_ref(repo, name, kind, head):
299 global prefix, marks, mode
300
301 ename = '%s/%s' % (kind, name)
302 tip = marks.get_tip(ename)
303
304 # mercurial takes too much time checking this
305 if tip and tip == head.rev():
306 # nothing to do
307 return
308 revs = xrange(tip, head.rev() + 1)
309 count = 0
310
311 revs = [rev for rev in revs if not marks.is_marked(rev)]
312
313 for rev in revs:
314
315 c = repo[rev]
316 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
317 rev_branch = extra['branch']
318
319 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
320 if 'committer' in extra:
321 user, time, tz = extra['committer'].rsplit(' ', 2)
322 committer = "%s %s %s" % (user, time, gittz(int(tz)))
323 else:
324 committer = author
325
326 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
327
328 if len(parents) == 0:
329 modified = c.manifest().keys()
330 removed = []
331 else:
332 modified, removed = get_filechanges(repo, c, parents[0])
333
334 if mode == 'hg':
335 extra_msg = ''
336
337 if rev_branch != 'default':
338 extra_msg += 'branch : %s\n' % rev_branch
339
340 renames = []
341 for f in c.files():
342 if f not in c.manifest():
343 continue
344 rename = c.filectx(f).renamed()
345 if rename:
346 renames.append((rename[0], f))
347
348 for e in renames:
349 extra_msg += "rename : %s => %s\n" % e
350
351 for key, value in extra.iteritems():
352 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
353 continue
354 else:
355 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
356
357 desc += '\n'
358 if extra_msg:
359 desc += '\n--HG--\n' + extra_msg
360
361 if len(parents) == 0 and rev:
362 print 'reset %s/%s' % (prefix, ename)
363
364 print "commit %s/%s" % (prefix, ename)
365 print "mark :%d" % (marks.get_mark(rev))
366 print "author %s" % (author)
367 print "committer %s" % (committer)
368 print "data %d" % (len(desc))
369 print desc
370
371 if len(parents) > 0:
372 print "from :%s" % (rev_to_mark(parents[0]))
373 if len(parents) > 1:
374 print "merge :%s" % (rev_to_mark(parents[1]))
375
376 for f in modified:
377 export_file(c.filectx(f))
378 for f in removed:
379 print "D %s" % (f)
380 print
381
382 count += 1
383 if (count % 100 == 0):
384 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
385 print "#############################################################"
386
387 # make sure the ref is updated
388 print "reset %s/%s" % (prefix, ename)
389 print "from :%u" % rev_to_mark(rev)
390 print
391
392 marks.set_tip(ename, rev)
393
394def export_tag(repo, tag):
395 export_ref(repo, tag, 'tags', repo[tag])
396
397def export_bookmark(repo, bmark):
398 head = bmarks[bmark]
399 export_ref(repo, bmark, 'bookmarks', head)
400
401def export_branch(repo, branch):
402 tip = get_branch_tip(repo, branch)
403 head = repo[tip]
404 export_ref(repo, branch, 'branches', head)
405
406def export_head(repo):
407 global g_head
408 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
409
410def do_capabilities(parser):
411 global prefix, dirname
412
413 print "import"
414 print "export"
415 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
416 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
417 print "refspec refs/tags/*:%s/tags/*" % prefix
418
419 path = os.path.join(dirname, 'marks-git')
420
421 if os.path.exists(path):
422 print "*import-marks %s" % path
423 print "*export-marks %s" % path
424
425 print
426
427def get_branch_tip(repo, branch):
428 global branches
429
430 heads = branches.get(branch, None)
431 if not heads:
432 return None
433
434 # verify there's only one head
435 if (len(heads) > 1):
436 warn("Branch '%s' has more than one head, consider merging" % branch)
437 # older versions of mercurial don't have this
438 if hasattr(repo, "branchtip"):
439 return repo.branchtip(branch)
440
441 return heads[0]
442
443def list_head(repo, cur):
444 global g_head, bmarks
445
446 head = bookmarks.readcurrent(repo)
447 if head:
448 node = repo[head]
449 else:
450 # fake bookmark from current branch
451 head = cur
452 node = repo['.']
453 if not node:
454 node = repo['tip']
455 if not node:
456 return
457 if head == 'default':
458 head = 'master'
459 bmarks[head] = node
460
461 print "@refs/heads/%s HEAD" % head
462 g_head = (head, node)
463
464def do_list(parser):
465 global branches, bmarks, mode, track_branches
466
467 repo = parser.repo
468 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
469 bmarks[bmark] = repo[node]
470
471 cur = repo.dirstate.branch()
472
473 list_head(repo, cur)
474
475 if track_branches:
476 for branch in repo.branchmap():
477 heads = repo.branchheads(branch)
478 if len(heads):
479 branches[branch] = heads
480
481 for branch in branches:
482 print "? refs/heads/branches/%s" % branch
483
484 for bmark in bmarks:
485 print "? refs/heads/%s" % bmark
486
487 for tag, node in repo.tagslist():
488 if tag == 'tip':
489 continue
490 print "? refs/tags/%s" % tag
491
492 print
493
494def do_import(parser):
495 repo = parser.repo
496
497 path = os.path.join(dirname, 'marks-git')
498
499 print "feature done"
500 if os.path.exists(path):
501 print "feature import-marks=%s" % path
502 print "feature export-marks=%s" % path
503 sys.stdout.flush()
504
505 tmp = encoding.encoding
506 encoding.encoding = 'utf-8'
507
508 # lets get all the import lines
509 while parser.check('import'):
510 ref = parser[1]
511
512 if (ref == 'HEAD'):
513 export_head(repo)
514 elif ref.startswith('refs/heads/branches/'):
515 branch = ref[len('refs/heads/branches/'):]
516 export_branch(repo, branch)
517 elif ref.startswith('refs/heads/'):
518 bmark = ref[len('refs/heads/'):]
519 export_bookmark(repo, bmark)
520 elif ref.startswith('refs/tags/'):
521 tag = ref[len('refs/tags/'):]
522 export_tag(repo, tag)
523
524 parser.next()
525
526 encoding.encoding = tmp
527
528 print 'done'
529
530def parse_blob(parser):
531 global blob_marks
532
533 parser.next()
534 mark = parser.get_mark()
535 parser.next()
536 data = parser.get_data()
537 blob_marks[mark] = data
538 parser.next()
539
540def get_merge_files(repo, p1, p2, files):
541 for e in repo[p1].files():
542 if e not in files:
543 if e not in repo[p1].manifest():
544 continue
545 f = { 'ctx' : repo[p1][e] }
546 files[e] = f
547
548def parse_commit(parser):
549 global marks, blob_marks, parsed_refs
550 global mode
551
552 from_mark = merge_mark = None
553
554 ref = parser[1]
555 parser.next()
556
557 commit_mark = parser.get_mark()
558 parser.next()
559 author = parser.get_author()
560 parser.next()
561 committer = parser.get_author()
562 parser.next()
563 data = parser.get_data()
564 parser.next()
565 if parser.check('from'):
566 from_mark = parser.get_mark()
567 parser.next()
568 if parser.check('merge'):
569 merge_mark = parser.get_mark()
570 parser.next()
571 if parser.check('merge'):
572 die('octopus merges are not supported yet')
573
574 files = {}
575
576 for line in parser:
577 if parser.check('M'):
578 t, m, mark_ref, path = line.split(' ', 3)
579 mark = int(mark_ref[1:])
580 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
581 elif parser.check('D'):
582 t, path = line.split(' ', 1)
583 f = { 'deleted' : True }
584 else:
585 die('Unknown file command: %s' % line)
586 files[path] = f
587
588 def getfilectx(repo, memctx, f):
589 of = files[f]
590 if 'deleted' in of:
591 raise IOError
592 if 'ctx' in of:
593 return of['ctx']
594 is_exec = of['mode'] == 'x'
595 is_link = of['mode'] == 'l'
596 rename = of.get('rename', None)
597 return context.memfilectx(f, of['data'],
598 is_link, is_exec, rename)
599
600 repo = parser.repo
601
602 user, date, tz = author
603 extra = {}
604
605 if committer != author:
606 extra['committer'] = "%s %u %u" % committer
607
608 if from_mark:
609 p1 = repo.changelog.node(mark_to_rev(from_mark))
610 else:
611 p1 = '\0' * 20
612
613 if merge_mark:
614 p2 = repo.changelog.node(mark_to_rev(merge_mark))
615 else:
616 p2 = '\0' * 20
617
618 #
619 # If files changed from any of the parents, hg wants to know, but in git if
620 # nothing changed from the first parent, nothing changed.
621 #
622 if merge_mark:
623 get_merge_files(repo, p1, p2, files)
624
625 if mode == 'hg':
626 i = data.find('\n--HG--\n')
627 if i >= 0:
628 tmp = data[i + len('\n--HG--\n'):].strip()
629 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
630 if k == 'rename':
631 old, new = v.split(' => ', 1)
632 files[new]['rename'] = old
633 elif k == 'branch':
634 extra[k] = v
635 elif k == 'extra':
636 ek, ev = v.split(' : ', 1)
637 extra[ek] = urllib.unquote(ev)
638 data = data[:i]
639
640 ctx = context.memctx(repo, (p1, p2), data,
641 files.keys(), getfilectx,
642 user, (date, tz), extra)
643
644 tmp = encoding.encoding
645 encoding.encoding = 'utf-8'
646
647 node = repo.commitctx(ctx)
648
649 encoding.encoding = tmp
650
651 rev = repo[node].rev()
652
653 parsed_refs[ref] = node
654 marks.new_mark(rev, commit_mark)
655
656def parse_reset(parser):
657 global parsed_refs
658
659 ref = parser[1]
660 parser.next()
661 # ugh
662 if parser.check('commit'):
663 parse_commit(parser)
664 return
665 if not parser.check('from'):
666 return
667 from_mark = parser.get_mark()
668 parser.next()
669
670 node = parser.repo.changelog.node(mark_to_rev(from_mark))
671 parsed_refs[ref] = node
672
673def parse_tag(parser):
674 name = parser[1]
675 parser.next()
676 from_mark = parser.get_mark()
677 parser.next()
678 tagger = parser.get_author()
679 parser.next()
680 data = parser.get_data()
681 parser.next()
682
683 # nothing to do
684
685def do_export(parser):
686 global parsed_refs, bmarks, peer
687
688 p_bmarks = []
689
690 parser.next()
691
692 for line in parser.each_block('done'):
693 if parser.check('blob'):
694 parse_blob(parser)
695 elif parser.check('commit'):
696 parse_commit(parser)
697 elif parser.check('reset'):
698 parse_reset(parser)
699 elif parser.check('tag'):
700 parse_tag(parser)
701 elif parser.check('feature'):
702 pass
703 else:
704 die('unhandled export command: %s' % line)
705
706 for ref, node in parsed_refs.iteritems():
707 if ref.startswith('refs/heads/branches'):
708 pass
709 elif ref.startswith('refs/heads/'):
710 bmark = ref[len('refs/heads/'):]
711 p_bmarks.append((bmark, node))
712 # handle below
713 continue
714 elif ref.startswith('refs/tags/'):
715 tag = ref[len('refs/tags/'):]
716 parser.repo.tag([tag], node, None, True, None, {})
717 else:
718 # transport-helper/fast-export bugs
719 continue
720 print "ok %s" % ref
721
722 if peer:
723 parser.repo.push(peer, force=False)
724
725 # handle bookmarks
726 for bmark, node in p_bmarks:
727 ref = 'refs/heads/' + bmark
728
729 if bmark in bmarks:
730 old = bmarks[bmark].hex()
731 else:
732 old = ''
733
734 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
735 # fake bookmark
736 print "ok %s" % ref
737 continue
738
739 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
740 print "error %s" % ref
741 continue
742
743 print "ok %s" % ref
744
745 print
746
747def fix_path(alias, repo, orig_url):
748 repo_url = util.url(repo.url())
749 url = util.url(orig_url)
750 if str(url) == str(repo_url):
751 return
752 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
753 subprocess.call(cmd)
754
755def main(args):
756 global prefix, dirname, branches, bmarks
757 global marks, blob_marks, parsed_refs
758 global peer, mode, bad_mail, bad_name
759 global track_branches
760
761 alias = args[1]
762 url = args[2]
763 peer = None
764
765 hg_git_compat = False
766 track_branches = True
767 try:
768 if get_config('remote-hg.hg-git-compat') == 'true\n':
769 hg_git_compat = True
770 track_branches = False
771 if get_config('remote-hg.track-branches') == 'false\n':
772 track_branches = False
773 except subprocess.CalledProcessError:
774 pass
775
776 if hg_git_compat:
777 mode = 'hg'
778 bad_mail = 'none@none'
779 bad_name = ''
780 else:
781 mode = 'git'
782 bad_mail = 'unknown'
783 bad_name = 'Unknown'
784
785 if alias[4:] == url:
786 is_tmp = True
787 alias = util.sha1(alias).hexdigest()
788 else:
789 is_tmp = False
790
791 gitdir = os.environ['GIT_DIR']
792 dirname = os.path.join(gitdir, 'hg', alias)
793 branches = {}
794 bmarks = {}
795 blob_marks = {}
796 parsed_refs = {}
797
798 repo = get_repo(url, alias)
799 prefix = 'refs/hg/%s' % alias
800
801 if not is_tmp:
802 fix_path(alias, peer or repo, url)
803
804 if not os.path.exists(dirname):
805 os.makedirs(dirname)
806
807 marks_path = os.path.join(dirname, 'marks-hg')
808 marks = Marks(marks_path)
809
810 parser = Parser(repo)
811 for line in parser:
812 if parser.check('capabilities'):
813 do_capabilities(parser)
814 elif parser.check('list'):
815 do_list(parser)
816 elif parser.check('import'):
817 do_import(parser)
818 elif parser.check('export'):
819 do_export(parser)
820 else:
821 die('unhandled command: %s' % line)
822 sys.stdout.flush()
823
824 if not is_tmp:
825 marks.store()
826 else:
827 shutil.rmtree(dirname)
828
829sys.exit(main(sys.argv))