1#!/usr/bin/env python
2#
3# Copyright (c) 2012 Felipe Contreras
4#
5
6#
7# Just copy to your ~/bin, or anywhere in your $PATH.
8# Then you can clone with:
9# % git clone bzr::/path/to/bzr/repo/or/url
10#
11# For example:
12# % git clone bzr::$HOME/myrepo
13# or
14# % git clone bzr::lp:myrepo
15#
16
17import sys
18
19import bzrlib
20if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
22
23import bzrlib.plugin
24bzrlib.plugin.load_plugins()
25
26import bzrlib.generate_ids
27
28import sys
29import os
30import json
31import re
32import StringIO
33
34NAME_RE = re.compile('^([^<>]+)')
35AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
36RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
37
38def die(msg, *args):
39 sys.stderr.write('ERROR: %s\n' % (msg % args))
40 sys.exit(1)
41
42def warn(msg, *args):
43 sys.stderr.write('WARNING: %s\n' % (msg % args))
44
45def gittz(tz):
46 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
47
48class Marks:
49
50 def __init__(self, path):
51 self.path = path
52 self.tips = {}
53 self.marks = {}
54 self.rev_marks = {}
55 self.last_mark = 0
56 self.load()
57
58 def load(self):
59 if not os.path.exists(self.path):
60 return
61
62 tmp = json.load(open(self.path))
63 self.tips = tmp['tips']
64 self.marks = tmp['marks']
65 self.last_mark = tmp['last-mark']
66
67 for rev, mark in self.marks.iteritems():
68 self.rev_marks[mark] = rev
69
70 def dict(self):
71 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
72
73 def store(self):
74 json.dump(self.dict(), open(self.path, 'w'))
75
76 def __str__(self):
77 return str(self.dict())
78
79 def from_rev(self, rev):
80 return self.marks[rev]
81
82 def to_rev(self, mark):
83 return self.rev_marks[mark]
84
85 def next_mark(self):
86 self.last_mark += 1
87 return self.last_mark
88
89 def get_mark(self, rev):
90 self.last_mark += 1
91 self.marks[rev] = self.last_mark
92 return self.last_mark
93
94 def is_marked(self, rev):
95 return self.marks.has_key(rev)
96
97 def new_mark(self, rev, mark):
98 self.marks[rev] = mark
99 self.rev_marks[mark] = rev
100 self.last_mark = mark
101
102 def get_tip(self, branch):
103 return self.tips.get(branch, None)
104
105 def set_tip(self, branch, tip):
106 self.tips[branch] = tip
107
108class Parser:
109
110 def __init__(self, repo):
111 self.repo = repo
112 self.line = self.get_line()
113
114 def get_line(self):
115 return sys.stdin.readline().strip()
116
117 def __getitem__(self, i):
118 return self.line.split()[i]
119
120 def check(self, word):
121 return self.line.startswith(word)
122
123 def each_block(self, separator):
124 while self.line != separator:
125 yield self.line
126 self.line = self.get_line()
127
128 def __iter__(self):
129 return self.each_block('')
130
131 def next(self):
132 self.line = self.get_line()
133 if self.line == 'done':
134 self.line = None
135
136 def get_mark(self):
137 i = self.line.index(':') + 1
138 return int(self.line[i:])
139
140 def get_data(self):
141 if not self.check('data'):
142 return None
143 i = self.line.index(' ') + 1
144 size = int(self.line[i:])
145 return sys.stdin.read(size)
146
147 def get_author(self):
148 m = RAW_AUTHOR_RE.match(self.line)
149 if not m:
150 return None
151 _, name, email, date, tz = m.groups()
152 committer = '%s <%s>' % (name, email)
153 tz = int(tz)
154 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
155 return (committer, int(date), tz)
156
157def rev_to_mark(rev):
158 global marks
159 return marks.from_rev(rev)
160
161def mark_to_rev(mark):
162 global marks
163 return marks.to_rev(mark)
164
165def fixup_user(user):
166 name = mail = None
167 user = user.replace('"', '')
168 m = AUTHOR_RE.match(user)
169 if m:
170 name = m.group(1)
171 mail = m.group(2).strip()
172 else:
173 m = NAME_RE.match(user)
174 if m:
175 name = m.group(1).strip()
176
177 return '%s <%s>' % (name, mail)
178
179def get_filechanges(cur, prev):
180 modified = {}
181 removed = {}
182
183 changes = cur.changes_from(prev)
184
185 for path, fid, kind in changes.added:
186 modified[path] = fid
187 for path, fid, kind in changes.removed:
188 removed[path] = None
189 for path, fid, kind, mod, _ in changes.modified:
190 modified[path] = fid
191 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
192 removed[oldpath] = None
193 modified[newpath] = fid
194
195 return modified, removed
196
197def export_files(tree, files):
198 global marks, filenodes
199
200 final = []
201 for path, fid in files.iteritems():
202 kind = tree.kind(fid)
203
204 h = tree.get_file_sha1(fid)
205
206 if kind == 'symlink':
207 d = tree.get_symlink_target(fid)
208 mode = '120000'
209 elif kind == 'file':
210
211 if tree.is_executable(fid):
212 mode = '100755'
213 else:
214 mode = '100644'
215
216 # is the blog already exported?
217 if h in filenodes:
218 mark = filenodes[h]
219 final.append((mode, mark, path))
220 continue
221
222 d = tree.get_file_text(fid)
223 elif kind == 'directory':
224 continue
225 else:
226 die("Unhandled kind '%s' for path '%s'" % (kind, path))
227
228 mark = marks.next_mark()
229 filenodes[h] = mark
230
231 print "blob"
232 print "mark :%u" % mark
233 print "data %d" % len(d)
234 print d
235
236 final.append((mode, mark, path))
237
238 return final
239
240def export_branch(branch, name):
241 global prefix, dirname
242
243 ref = '%s/heads/%s' % (prefix, name)
244 tip = marks.get_tip(name)
245
246 repo = branch.repository
247 repo.lock_read()
248 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
249 count = 0
250
251 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
252
253 for revid in revs:
254
255 rev = repo.get_revision(revid)
256
257 parents = rev.parent_ids
258 time = rev.timestamp
259 tz = rev.timezone
260 committer = rev.committer.encode('utf-8')
261 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
262 author = committer
263 msg = rev.message.encode('utf-8')
264
265 msg += '\n'
266
267 if len(parents) == 0:
268 parent = bzrlib.revision.NULL_REVISION
269 else:
270 parent = parents[0]
271
272 cur_tree = repo.revision_tree(revid)
273 prev = repo.revision_tree(parent)
274 modified, removed = get_filechanges(cur_tree, prev)
275
276 modified_final = export_files(cur_tree, modified)
277
278 if len(parents) == 0:
279 print 'reset %s' % ref
280
281 print "commit %s" % ref
282 print "mark :%d" % (marks.get_mark(revid))
283 print "author %s" % (author)
284 print "committer %s" % (committer)
285 print "data %d" % (len(msg))
286 print msg
287
288 for i, p in enumerate(parents):
289 try:
290 m = rev_to_mark(p)
291 except KeyError:
292 # ghost?
293 continue
294 if i == 0:
295 print "from :%s" % m
296 else:
297 print "merge :%s" % m
298
299 for f in modified_final:
300 print "M %s :%u %s" % f
301 for f in removed:
302 print "D %s" % (f)
303 print
304
305 count += 1
306 if (count % 100 == 0):
307 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
308 print "#############################################################"
309
310 repo.unlock()
311
312 revid = branch.last_revision()
313
314 # make sure the ref is updated
315 print "reset %s" % ref
316 print "from :%u" % rev_to_mark(revid)
317 print
318
319 marks.set_tip(name, revid)
320
321def export_tag(repo, name):
322 global tags
323 try:
324 print "reset refs/tags/%s" % name
325 print "from :%u" % rev_to_mark(tags[name])
326 print
327 except KeyError:
328 warn("TODO: fetch tag '%s'" % name)
329
330def do_import(parser):
331 global dirname
332
333 branch = parser.repo
334 path = os.path.join(dirname, 'marks-git')
335
336 print "feature done"
337 if os.path.exists(path):
338 print "feature import-marks=%s" % path
339 print "feature export-marks=%s" % path
340 sys.stdout.flush()
341
342 while parser.check('import'):
343 ref = parser[1]
344 if ref.startswith('refs/heads/'):
345 name = ref[len('refs/heads/'):]
346 export_branch(branch, name)
347 if ref.startswith('refs/tags/'):
348 name = ref[len('refs/tags/'):]
349 export_tag(branch, name)
350 parser.next()
351
352 print 'done'
353
354 sys.stdout.flush()
355
356def parse_blob(parser):
357 global blob_marks
358
359 parser.next()
360 mark = parser.get_mark()
361 parser.next()
362 data = parser.get_data()
363 blob_marks[mark] = data
364 parser.next()
365
366class CustomTree():
367
368 def __init__(self, repo, revid, parents, files):
369 global files_cache
370
371 self.repo = repo
372 self.revid = revid
373 self.parents = parents
374 self.updates = {}
375
376 def copy_tree(revid):
377 files = files_cache[revid] = {}
378 tree = repo.repository.revision_tree(revid)
379 repo.lock_read()
380 try:
381 for path, entry in tree.iter_entries_by_dir():
382 files[path] = entry.file_id
383 finally:
384 repo.unlock()
385 return files
386
387 if len(parents) == 0:
388 self.base_id = bzrlib.revision.NULL_REVISION
389 self.base_files = {}
390 else:
391 self.base_id = parents[0]
392 self.base_files = files_cache.get(self.base_id, None)
393 if not self.base_files:
394 self.base_files = copy_tree(self.base_id)
395
396 self.files = files_cache[revid] = self.base_files.copy()
397
398 for path, f in files.iteritems():
399 fid = self.files.get(path, None)
400 if not fid:
401 fid = bzrlib.generate_ids.gen_file_id(path)
402 f['path'] = path
403 self.updates[fid] = f
404
405 def last_revision(self):
406 return self.base_id
407
408 def iter_changes(self):
409 changes = []
410
411 def get_parent(dirname, basename):
412 parent_fid = self.base_files.get(dirname, None)
413 if parent_fid:
414 return parent_fid
415 parent_fid = self.files.get(dirname, None)
416 if parent_fid:
417 return parent_fid
418 if basename == '':
419 return None
420 fid = bzrlib.generate_ids.gen_file_id(path)
421 d = add_entry(fid, dirname, 'directory')
422 return fid
423
424 def add_entry(fid, path, kind, mode = None):
425 dirname, basename = os.path.split(path)
426 parent_fid = get_parent(dirname, basename)
427
428 executable = False
429 if mode == '100755':
430 executable = True
431 elif mode == '120000':
432 kind = 'symlink'
433
434 change = (fid,
435 (None, path),
436 True,
437 (False, True),
438 (None, parent_fid),
439 (None, basename),
440 (None, kind),
441 (None, executable))
442 self.files[path] = change[0]
443 changes.append(change)
444 return change
445
446 def update_entry(fid, path, kind, mode = None):
447 dirname, basename = os.path.split(path)
448 parent_fid = get_parent(dirname, basename)
449
450 executable = False
451 if mode == '100755':
452 executable = True
453 elif mode == '120000':
454 kind = 'symlink'
455
456 change = (fid,
457 (path, path),
458 True,
459 (True, True),
460 (None, parent_fid),
461 (None, basename),
462 (None, kind),
463 (None, executable))
464 self.files[path] = change[0]
465 changes.append(change)
466 return change
467
468 def remove_entry(fid, path, kind):
469 dirname, basename = os.path.split(path)
470 parent_fid = get_parent(dirname, basename)
471 change = (fid,
472 (path, None),
473 True,
474 (True, False),
475 (parent_fid, None),
476 (None, None),
477 (None, None),
478 (None, None))
479 del self.files[path]
480 changes.append(change)
481 return change
482
483 for fid, f in self.updates.iteritems():
484 path = f['path']
485
486 if 'deleted' in f:
487 remove_entry(fid, path, 'file')
488 continue
489
490 if path in self.base_files:
491 update_entry(fid, path, 'file', f['mode'])
492 else:
493 add_entry(fid, path, 'file', f['mode'])
494
495 return changes
496
497 def get_file_with_stat(self, file_id, path=None):
498 return (StringIO.StringIO(self.updates[file_id]['data']), None)
499
500 def get_symlink_target(self, file_id):
501 return self.updates[file_id]['data']
502
503def parse_commit(parser):
504 global marks, blob_marks, bmarks, parsed_refs
505 global mode
506
507 parents = []
508
509 ref = parser[1]
510 parser.next()
511
512 if ref != 'refs/heads/master':
513 die("bzr doesn't support multiple branches; use 'master'")
514
515 commit_mark = parser.get_mark()
516 parser.next()
517 author = parser.get_author()
518 parser.next()
519 committer = parser.get_author()
520 parser.next()
521 data = parser.get_data()
522 parser.next()
523 if parser.check('from'):
524 parents.append(parser.get_mark())
525 parser.next()
526 while parser.check('merge'):
527 parents.append(parser.get_mark())
528 parser.next()
529
530 files = {}
531
532 for line in parser:
533 if parser.check('M'):
534 t, m, mark_ref, path = line.split(' ', 3)
535 mark = int(mark_ref[1:])
536 f = { 'mode' : m, 'data' : blob_marks[mark] }
537 elif parser.check('D'):
538 t, path = line.split(' ')
539 f = { 'deleted' : True }
540 else:
541 die('Unknown file command: %s' % line)
542 files[path] = f
543
544 repo = parser.repo
545
546 committer, date, tz = committer
547 parents = [str(mark_to_rev(p)) for p in parents]
548 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
549 props = {}
550 props['branch-nick'] = repo.nick
551
552 mtree = CustomTree(repo, revid, parents, files)
553 changes = mtree.iter_changes()
554
555 repo.lock_write()
556 try:
557 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
558 try:
559 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
560 builder.finish_inventory()
561 builder.commit(data.decode('utf-8', 'replace'))
562 except Exception, e:
563 builder.abort()
564 raise
565 finally:
566 repo.unlock()
567
568 parsed_refs[ref] = revid
569 marks.new_mark(revid, commit_mark)
570
571def parse_reset(parser):
572 global parsed_refs
573
574 ref = parser[1]
575 parser.next()
576
577 if ref != 'refs/heads/master':
578 die("bzr doesn't support multiple branches; use 'master'")
579
580 # ugh
581 if parser.check('commit'):
582 parse_commit(parser)
583 return
584 if not parser.check('from'):
585 return
586 from_mark = parser.get_mark()
587 parser.next()
588
589 parsed_refs[ref] = mark_to_rev(from_mark)
590
591def do_export(parser):
592 global parsed_refs, dirname, peer
593
594 parser.next()
595
596 for line in parser.each_block('done'):
597 if parser.check('blob'):
598 parse_blob(parser)
599 elif parser.check('commit'):
600 parse_commit(parser)
601 elif parser.check('reset'):
602 parse_reset(parser)
603 elif parser.check('tag'):
604 pass
605 elif parser.check('feature'):
606 pass
607 else:
608 die('unhandled export command: %s' % line)
609
610 repo = parser.repo
611
612 for ref, revid in parsed_refs.iteritems():
613 if ref == 'refs/heads/master':
614 repo.generate_revision_history(revid, marks.get_tip('master'))
615 revno, revid = repo.last_revision_info()
616 if hasattr(peer, "import_last_revision_info_and_tags"):
617 peer.import_last_revision_info_and_tags(repo, revno, revid)
618 else:
619 peer.import_last_revision_info(repo.repository, revno, revid)
620 wt = peer.bzrdir.open_workingtree()
621 wt.update()
622 print "ok %s" % ref
623 print
624
625def do_capabilities(parser):
626 global dirname
627
628 print "import"
629 print "export"
630 print "refspec refs/heads/*:%s/heads/*" % prefix
631
632 path = os.path.join(dirname, 'marks-git')
633
634 if os.path.exists(path):
635 print "*import-marks %s" % path
636 print "*export-marks %s" % path
637
638 print
639
640def do_list(parser):
641 global tags
642 print "? refs/heads/%s" % 'master'
643 for tag, revid in parser.repo.tags.get_tag_dict().items():
644 print "? refs/tags/%s" % tag
645 tags[tag] = revid
646 print "@refs/heads/%s HEAD" % 'master'
647 print
648
649def get_repo(url, alias):
650 global dirname, peer
651
652 clone_path = os.path.join(dirname, 'clone')
653 origin = bzrlib.bzrdir.BzrDir.open(url)
654 remote_branch = origin.open_branch()
655
656 if os.path.exists(clone_path):
657 # pull
658 d = bzrlib.bzrdir.BzrDir.open(clone_path)
659 branch = d.open_branch()
660 result = branch.pull(remote_branch, [], None, False)
661 else:
662 # clone
663 d = origin.sprout(clone_path, None,
664 hardlink=True, create_tree_if_local=False,
665 source_branch=remote_branch)
666 branch = d.open_branch()
667 branch.bind(remote_branch)
668
669 peer = remote_branch
670
671 return branch
672
673def main(args):
674 global marks, prefix, dirname
675 global tags, filenodes
676 global blob_marks
677 global parsed_refs
678 global files_cache
679
680 alias = args[1]
681 url = args[2]
682
683 prefix = 'refs/bzr/%s' % alias
684 tags = {}
685 filenodes = {}
686 blob_marks = {}
687 parsed_refs = {}
688 files_cache = {}
689
690 gitdir = os.environ['GIT_DIR']
691 dirname = os.path.join(gitdir, 'bzr', alias)
692
693 if not os.path.exists(dirname):
694 os.makedirs(dirname)
695
696 repo = get_repo(url, alias)
697
698 marks_path = os.path.join(dirname, 'marks-int')
699 marks = Marks(marks_path)
700
701 parser = Parser(repo)
702 for line in parser:
703 if parser.check('capabilities'):
704 do_capabilities(parser)
705 elif parser.check('list'):
706 do_list(parser)
707 elif parser.check('import'):
708 do_import(parser)
709 elif parser.check('export'):
710 do_export(parser)
711 else:
712 die('unhandled command: %s' % line)
713 sys.stdout.flush()
714
715 marks.store()
716
717sys.exit(main(sys.argv))