1#!/usr/bin/env python
2#
3# Copyright (c) 2012 Felipe Contreras
4#
5
6#
7# Just copy to your ~/bin, or anywhere in your $PATH.
8# Then you can clone with:
9# % git clone bzr::/path/to/bzr/repo/or/url
10#
11# For example:
12# % git clone bzr::$HOME/myrepo
13# or
14# % git clone bzr::lp:myrepo
15#
16
17import sys
18
19import bzrlib
20bzrlib.initialize()
21
22import bzrlib.plugin
23bzrlib.plugin.load_plugins()
24
25import bzrlib.generate_ids
26
27import sys
28import os
29import json
30import re
31import StringIO
32
33NAME_RE = re.compile('^([^<>]+)')
34AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
35RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
36
37def die(msg, *args):
38 sys.stderr.write('ERROR: %s\n' % (msg % args))
39 sys.exit(1)
40
41def warn(msg, *args):
42 sys.stderr.write('WARNING: %s\n' % (msg % args))
43
44def gittz(tz):
45 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
46
47class Marks:
48
49 def __init__(self, path):
50 self.path = path
51 self.tips = {}
52 self.marks = {}
53 self.rev_marks = {}
54 self.last_mark = 0
55 self.load()
56
57 def load(self):
58 if not os.path.exists(self.path):
59 return
60
61 tmp = json.load(open(self.path))
62 self.tips = tmp['tips']
63 self.marks = tmp['marks']
64 self.last_mark = tmp['last-mark']
65
66 for rev, mark in self.marks.iteritems():
67 self.rev_marks[mark] = rev
68
69 def dict(self):
70 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
71
72 def store(self):
73 json.dump(self.dict(), open(self.path, 'w'))
74
75 def __str__(self):
76 return str(self.dict())
77
78 def from_rev(self, rev):
79 return self.marks[rev]
80
81 def to_rev(self, mark):
82 return self.rev_marks[mark]
83
84 def next_mark(self):
85 self.last_mark += 1
86 return self.last_mark
87
88 def get_mark(self, rev):
89 self.last_mark += 1
90 self.marks[rev] = self.last_mark
91 return self.last_mark
92
93 def is_marked(self, rev):
94 return self.marks.has_key(rev)
95
96 def new_mark(self, rev, mark):
97 self.marks[rev] = mark
98 self.rev_marks[mark] = rev
99 self.last_mark = mark
100
101 def get_tip(self, branch):
102 return self.tips.get(branch, None)
103
104 def set_tip(self, branch, tip):
105 self.tips[branch] = tip
106
107class Parser:
108
109 def __init__(self, repo):
110 self.repo = repo
111 self.line = self.get_line()
112
113 def get_line(self):
114 return sys.stdin.readline().strip()
115
116 def __getitem__(self, i):
117 return self.line.split()[i]
118
119 def check(self, word):
120 return self.line.startswith(word)
121
122 def each_block(self, separator):
123 while self.line != separator:
124 yield self.line
125 self.line = self.get_line()
126
127 def __iter__(self):
128 return self.each_block('')
129
130 def next(self):
131 self.line = self.get_line()
132 if self.line == 'done':
133 self.line = None
134
135 def get_mark(self):
136 i = self.line.index(':') + 1
137 return int(self.line[i:])
138
139 def get_data(self):
140 if not self.check('data'):
141 return None
142 i = self.line.index(' ') + 1
143 size = int(self.line[i:])
144 return sys.stdin.read(size)
145
146 def get_author(self):
147 m = RAW_AUTHOR_RE.match(self.line)
148 if not m:
149 return None
150 _, name, email, date, tz = m.groups()
151 committer = '%s <%s>' % (name, email)
152 tz = int(tz)
153 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
154 return (committer, int(date), tz)
155
156def rev_to_mark(rev):
157 global marks
158 return marks.from_rev(rev)
159
160def mark_to_rev(mark):
161 global marks
162 return marks.to_rev(mark)
163
164def fixup_user(user):
165 name = mail = None
166 user = user.replace('"', '')
167 m = AUTHOR_RE.match(user)
168 if m:
169 name = m.group(1)
170 mail = m.group(2).strip()
171 else:
172 m = NAME_RE.match(user)
173 if m:
174 name = m.group(1).strip()
175
176 return '%s <%s>' % (name, mail)
177
178def get_filechanges(cur, prev):
179 modified = {}
180 removed = {}
181
182 changes = cur.changes_from(prev)
183
184 for path, fid, kind in changes.added:
185 modified[path] = fid
186 for path, fid, kind in changes.removed:
187 removed[path] = None
188 for path, fid, kind, mod, _ in changes.modified:
189 modified[path] = fid
190 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
191 removed[oldpath] = None
192 modified[newpath] = fid
193
194 return modified, removed
195
196def export_files(tree, files):
197 global marks, filenodes
198
199 final = []
200 for path, fid in files.iteritems():
201 kind = tree.kind(fid)
202
203 h = tree.get_file_sha1(fid)
204
205 if kind == 'symlink':
206 d = tree.get_symlink_target(fid)
207 mode = '120000'
208 elif kind == 'file':
209
210 if tree.is_executable(fid):
211 mode = '100755'
212 else:
213 mode = '100644'
214
215 # is the blog already exported?
216 if h in filenodes:
217 mark = filenodes[h]
218 final.append((mode, mark, path))
219 continue
220
221 d = tree.get_file_text(fid)
222 elif kind == 'directory':
223 continue
224 else:
225 die("Unhandled kind '%s' for path '%s'" % (kind, path))
226
227 mark = marks.next_mark()
228 filenodes[h] = mark
229
230 print "blob"
231 print "mark :%u" % mark
232 print "data %d" % len(d)
233 print d
234
235 final.append((mode, mark, path))
236
237 return final
238
239def export_branch(branch, name):
240 global prefix, dirname
241
242 ref = '%s/heads/%s' % (prefix, name)
243 tip = marks.get_tip(name)
244
245 repo = branch.repository
246 repo.lock_read()
247 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
248 count = 0
249
250 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
251
252 for revid in revs:
253
254 rev = repo.get_revision(revid)
255
256 parents = rev.parent_ids
257 time = rev.timestamp
258 tz = rev.timezone
259 committer = rev.committer.encode('utf-8')
260 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
261 author = committer
262 msg = rev.message.encode('utf-8')
263
264 msg += '\n'
265
266 if len(parents) == 0:
267 parent = bzrlib.revision.NULL_REVISION
268 else:
269 parent = parents[0]
270
271 cur_tree = repo.revision_tree(revid)
272 prev = repo.revision_tree(parent)
273 modified, removed = get_filechanges(cur_tree, prev)
274
275 modified_final = export_files(cur_tree, modified)
276
277 if len(parents) == 0:
278 print 'reset %s' % ref
279
280 print "commit %s" % ref
281 print "mark :%d" % (marks.get_mark(revid))
282 print "author %s" % (author)
283 print "committer %s" % (committer)
284 print "data %d" % (len(msg))
285 print msg
286
287 for i, p in enumerate(parents):
288 try:
289 m = rev_to_mark(p)
290 except KeyError:
291 # ghost?
292 continue
293 if i == 0:
294 print "from :%s" % m
295 else:
296 print "merge :%s" % m
297
298 for f in modified_final:
299 print "M %s :%u %s" % f
300 for f in removed:
301 print "D %s" % (f)
302 print
303
304 count += 1
305 if (count % 100 == 0):
306 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
307 print "#############################################################"
308
309 repo.unlock()
310
311 revid = branch.last_revision()
312
313 # make sure the ref is updated
314 print "reset %s" % ref
315 print "from :%u" % rev_to_mark(revid)
316 print
317
318 marks.set_tip(name, revid)
319
320def export_tag(repo, name):
321 global tags
322 try:
323 print "reset refs/tags/%s" % name
324 print "from :%u" % rev_to_mark(tags[name])
325 print
326 except KeyError:
327 warn("TODO: fetch tag '%s'" % name)
328
329def do_import(parser):
330 global dirname
331
332 branch = parser.repo
333 path = os.path.join(dirname, 'marks-git')
334
335 print "feature done"
336 if os.path.exists(path):
337 print "feature import-marks=%s" % path
338 print "feature export-marks=%s" % path
339 sys.stdout.flush()
340
341 while parser.check('import'):
342 ref = parser[1]
343 if ref.startswith('refs/heads/'):
344 name = ref[len('refs/heads/'):]
345 export_branch(branch, name)
346 if ref.startswith('refs/tags/'):
347 name = ref[len('refs/tags/'):]
348 export_tag(branch, name)
349 parser.next()
350
351 print 'done'
352
353 sys.stdout.flush()
354
355def parse_blob(parser):
356 global blob_marks
357
358 parser.next()
359 mark = parser.get_mark()
360 parser.next()
361 data = parser.get_data()
362 blob_marks[mark] = data
363 parser.next()
364
365class CustomTree():
366
367 def __init__(self, repo, revid, parents, files):
368 global files_cache
369
370 self.repo = repo
371 self.revid = revid
372 self.parents = parents
373 self.updates = {}
374
375 def copy_tree(revid):
376 files = files_cache[revid] = {}
377 tree = repo.repository.revision_tree(revid)
378 repo.lock_read()
379 try:
380 for path, entry in tree.iter_entries_by_dir():
381 files[path] = entry.file_id
382 finally:
383 repo.unlock()
384 return files
385
386 if len(parents) == 0:
387 self.base_id = bzrlib.revision.NULL_REVISION
388 self.base_files = {}
389 else:
390 self.base_id = parents[0]
391 self.base_files = files_cache.get(self.base_id, None)
392 if not self.base_files:
393 self.base_files = copy_tree(self.base_id)
394
395 self.files = files_cache[revid] = self.base_files.copy()
396
397 for path, f in files.iteritems():
398 fid = self.files.get(path, None)
399 if not fid:
400 fid = bzrlib.generate_ids.gen_file_id(path)
401 f['path'] = path
402 self.updates[fid] = f
403
404 def last_revision(self):
405 return self.base_id
406
407 def iter_changes(self):
408 changes = []
409
410 def get_parent(dirname, basename):
411 parent_fid = self.base_files.get(dirname, None)
412 if parent_fid:
413 return parent_fid
414 parent_fid = self.files.get(dirname, None)
415 if parent_fid:
416 return parent_fid
417 if basename == '':
418 return None
419 fid = bzrlib.generate_ids.gen_file_id(path)
420 d = add_entry(fid, dirname, 'directory')
421 return fid
422
423 def add_entry(fid, path, kind, mode = None):
424 dirname, basename = os.path.split(path)
425 parent_fid = get_parent(dirname, basename)
426
427 executable = False
428 if mode == '100755':
429 executable = True
430 elif mode == '120000':
431 kind = 'symlink'
432
433 change = (fid,
434 (None, path),
435 True,
436 (False, True),
437 (None, parent_fid),
438 (None, basename),
439 (None, kind),
440 (None, executable))
441 self.files[path] = change[0]
442 changes.append(change)
443 return change
444
445 def update_entry(fid, path, kind, mode = None):
446 dirname, basename = os.path.split(path)
447 parent_fid = get_parent(dirname, basename)
448
449 executable = False
450 if mode == '100755':
451 executable = True
452 elif mode == '120000':
453 kind = 'symlink'
454
455 change = (fid,
456 (path, path),
457 True,
458 (True, True),
459 (None, parent_fid),
460 (None, basename),
461 (None, kind),
462 (None, executable))
463 self.files[path] = change[0]
464 changes.append(change)
465 return change
466
467 def remove_entry(fid, path, kind):
468 dirname, basename = os.path.split(path)
469 parent_fid = get_parent(dirname, basename)
470 change = (fid,
471 (path, None),
472 True,
473 (True, False),
474 (parent_fid, None),
475 (None, None),
476 (None, None),
477 (None, None))
478 del self.files[path]
479 changes.append(change)
480 return change
481
482 for fid, f in self.updates.iteritems():
483 path = f['path']
484
485 if 'deleted' in f:
486 remove_entry(fid, path, 'file')
487 continue
488
489 if path in self.base_files:
490 update_entry(fid, path, 'file', f['mode'])
491 else:
492 add_entry(fid, path, 'file', f['mode'])
493
494 return changes
495
496 def get_file_with_stat(self, file_id, path=None):
497 return (StringIO.StringIO(self.updates[file_id]['data']), None)
498
499 def get_symlink_target(self, file_id):
500 return self.updates[file_id]['data']
501
502def parse_commit(parser):
503 global marks, blob_marks, bmarks, parsed_refs
504 global mode
505
506 parents = []
507
508 ref = parser[1]
509 parser.next()
510
511 if ref != 'refs/heads/master':
512 die("bzr doesn't support multiple branches; use 'master'")
513
514 commit_mark = parser.get_mark()
515 parser.next()
516 author = parser.get_author()
517 parser.next()
518 committer = parser.get_author()
519 parser.next()
520 data = parser.get_data()
521 parser.next()
522 if parser.check('from'):
523 parents.append(parser.get_mark())
524 parser.next()
525 while parser.check('merge'):
526 parents.append(parser.get_mark())
527 parser.next()
528
529 files = {}
530
531 for line in parser:
532 if parser.check('M'):
533 t, m, mark_ref, path = line.split(' ', 3)
534 mark = int(mark_ref[1:])
535 f = { 'mode' : m, 'data' : blob_marks[mark] }
536 elif parser.check('D'):
537 t, path = line.split(' ')
538 f = { 'deleted' : True }
539 else:
540 die('Unknown file command: %s' % line)
541 files[path] = f
542
543 repo = parser.repo
544
545 committer, date, tz = committer
546 parents = [str(mark_to_rev(p)) for p in parents]
547 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
548 props = {}
549 props['branch-nick'] = repo.nick
550
551 mtree = CustomTree(repo, revid, parents, files)
552 changes = mtree.iter_changes()
553
554 repo.lock_write()
555 try:
556 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid, False)
557 try:
558 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
559 builder.finish_inventory()
560 builder.commit(data.decode('utf-8', 'replace'))
561 except Exception, e:
562 builder.abort()
563 raise
564 finally:
565 repo.unlock()
566
567 parsed_refs[ref] = revid
568 marks.new_mark(revid, commit_mark)
569
570def parse_reset(parser):
571 global parsed_refs
572
573 ref = parser[1]
574 parser.next()
575
576 if ref != 'refs/heads/master':
577 die("bzr doesn't support multiple branches; use 'master'")
578
579 # ugh
580 if parser.check('commit'):
581 parse_commit(parser)
582 return
583 if not parser.check('from'):
584 return
585 from_mark = parser.get_mark()
586 parser.next()
587
588 parsed_refs[ref] = mark_to_rev(from_mark)
589
590def do_export(parser):
591 global parsed_refs, dirname, peer
592
593 parser.next()
594
595 for line in parser.each_block('done'):
596 if parser.check('blob'):
597 parse_blob(parser)
598 elif parser.check('commit'):
599 parse_commit(parser)
600 elif parser.check('reset'):
601 parse_reset(parser)
602 elif parser.check('tag'):
603 pass
604 elif parser.check('feature'):
605 pass
606 else:
607 die('unhandled export command: %s' % line)
608
609 repo = parser.repo
610
611 for ref, revid in parsed_refs.iteritems():
612 if ref == 'refs/heads/master':
613 repo.generate_revision_history(revid, marks.get_tip('master'))
614 revno, revid = repo.last_revision_info()
615 peer.import_last_revision_info_and_tags(repo, revno, revid)
616 wt = peer.bzrdir.open_workingtree()
617 wt.update()
618 print "ok %s" % ref
619 print
620
621def do_capabilities(parser):
622 global dirname
623
624 print "import"
625 print "export"
626 print "refspec refs/heads/*:%s/heads/*" % prefix
627
628 path = os.path.join(dirname, 'marks-git')
629
630 if os.path.exists(path):
631 print "*import-marks %s" % path
632 print "*export-marks %s" % path
633
634 print
635
636def do_list(parser):
637 global tags
638 print "? refs/heads/%s" % 'master'
639 for tag, revid in parser.repo.tags.get_tag_dict().items():
640 print "? refs/tags/%s" % tag
641 tags[tag] = revid
642 print "@refs/heads/%s HEAD" % 'master'
643 print
644
645def get_repo(url, alias):
646 global dirname, peer
647
648 clone_path = os.path.join(dirname, 'clone')
649 origin = bzrlib.controldir.ControlDir.open(url)
650 remote_branch = origin.open_branch()
651
652 if os.path.exists(clone_path):
653 # pull
654 d = bzrlib.controldir.ControlDir.open(clone_path)
655 branch = d.open_branch()
656 result = branch.pull(remote_branch, [], None, False)
657 else:
658 # clone
659 d = origin.sprout(clone_path, None,
660 hardlink=True, create_tree_if_local=False,
661 source_branch=remote_branch)
662 branch = d.open_branch()
663 branch.bind(remote_branch)
664
665 peer = remote_branch
666
667 return branch
668
669def main(args):
670 global marks, prefix, dirname
671 global tags, filenodes
672 global blob_marks
673 global parsed_refs
674 global files_cache
675
676 alias = args[1]
677 url = args[2]
678
679 prefix = 'refs/bzr/%s' % alias
680 tags = {}
681 filenodes = {}
682 blob_marks = {}
683 parsed_refs = {}
684 files_cache = {}
685
686 gitdir = os.environ['GIT_DIR']
687 dirname = os.path.join(gitdir, 'bzr', alias)
688
689 if not os.path.exists(dirname):
690 os.makedirs(dirname)
691
692 repo = get_repo(url, alias)
693
694 marks_path = os.path.join(dirname, 'marks-int')
695 marks = Marks(marks_path)
696
697 parser = Parser(repo)
698 for line in parser:
699 if parser.check('capabilities'):
700 do_capabilities(parser)
701 elif parser.check('list'):
702 do_list(parser)
703 elif parser.check('import'):
704 do_import(parser)
705 elif parser.check('export'):
706 do_export(parser)
707 else:
708 die('unhandled command: %s' % line)
709 sys.stdout.flush()
710
711 marks.store()
712
713sys.exit(main(sys.argv))