1#!/usr/bin/env python
2#
3# Copyright (c) 2012 Felipe Contreras
4#
5
6#
7# Just copy to your ~/bin, or anywhere in your $PATH.
8# Then you can clone with:
9# % git clone bzr::/path/to/bzr/repo/or/url
10#
11# For example:
12# % git clone bzr::$HOME/myrepo
13# or
14# % git clone bzr::lp:myrepo
15#
16
17import sys
18
19import bzrlib
20bzrlib.initialize()
21
22import bzrlib.plugin
23bzrlib.plugin.load_plugins()
24
25import bzrlib.generate_ids
26
27import sys
28import os
29import json
30import re
31import StringIO
32
33NAME_RE = re.compile('^([^<>]+)')
34AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
35RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
36
37def die(msg, *args):
38 sys.stderr.write('ERROR: %s\n' % (msg % args))
39 sys.exit(1)
40
41def warn(msg, *args):
42 sys.stderr.write('WARNING: %s\n' % (msg % args))
43
44def gittz(tz):
45 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
46
47class Marks:
48
49 def __init__(self, path):
50 self.path = path
51 self.tips = {}
52 self.marks = {}
53 self.rev_marks = {}
54 self.last_mark = 0
55 self.load()
56
57 def load(self):
58 if not os.path.exists(self.path):
59 return
60
61 tmp = json.load(open(self.path))
62 self.tips = tmp['tips']
63 self.marks = tmp['marks']
64 self.last_mark = tmp['last-mark']
65
66 for rev, mark in self.marks.iteritems():
67 self.rev_marks[mark] = rev
68
69 def dict(self):
70 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
71
72 def store(self):
73 json.dump(self.dict(), open(self.path, 'w'))
74
75 def __str__(self):
76 return str(self.dict())
77
78 def from_rev(self, rev):
79 return self.marks[rev]
80
81 def to_rev(self, mark):
82 return self.rev_marks[mark]
83
84 def next_mark(self):
85 self.last_mark += 1
86 return self.last_mark
87
88 def get_mark(self, rev):
89 self.last_mark += 1
90 self.marks[rev] = self.last_mark
91 return self.last_mark
92
93 def is_marked(self, rev):
94 return self.marks.has_key(rev)
95
96 def new_mark(self, rev, mark):
97 self.marks[rev] = mark
98 self.rev_marks[mark] = rev
99 self.last_mark = mark
100
101 def get_tip(self, branch):
102 return self.tips.get(branch, None)
103
104 def set_tip(self, branch, tip):
105 self.tips[branch] = tip
106
107class Parser:
108
109 def __init__(self, repo):
110 self.repo = repo
111 self.line = self.get_line()
112
113 def get_line(self):
114 return sys.stdin.readline().strip()
115
116 def __getitem__(self, i):
117 return self.line.split()[i]
118
119 def check(self, word):
120 return self.line.startswith(word)
121
122 def each_block(self, separator):
123 while self.line != separator:
124 yield self.line
125 self.line = self.get_line()
126
127 def __iter__(self):
128 return self.each_block('')
129
130 def next(self):
131 self.line = self.get_line()
132 if self.line == 'done':
133 self.line = None
134
135 def get_mark(self):
136 i = self.line.index(':') + 1
137 return int(self.line[i:])
138
139 def get_data(self):
140 if not self.check('data'):
141 return None
142 i = self.line.index(' ') + 1
143 size = int(self.line[i:])
144 return sys.stdin.read(size)
145
146 def get_author(self):
147 m = RAW_AUTHOR_RE.match(self.line)
148 if not m:
149 return None
150 _, name, email, date, tz = m.groups()
151 committer = '%s <%s>' % (name, email)
152 tz = int(tz)
153 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
154 return (committer, int(date), tz)
155
156def rev_to_mark(rev):
157 global marks
158 return marks.from_rev(rev)
159
160def mark_to_rev(mark):
161 global marks
162 return marks.to_rev(mark)
163
164def fixup_user(user):
165 name = mail = None
166 user = user.replace('"', '')
167 m = AUTHOR_RE.match(user)
168 if m:
169 name = m.group(1)
170 mail = m.group(2).strip()
171 else:
172 m = NAME_RE.match(user)
173 if m:
174 name = m.group(1).strip()
175
176 return '%s <%s>' % (name, mail)
177
178def get_filechanges(cur, prev):
179 modified = {}
180 removed = {}
181
182 changes = cur.changes_from(prev)
183
184 for path, fid, kind in changes.added:
185 modified[path] = fid
186 for path, fid, kind in changes.removed:
187 removed[path] = None
188 for path, fid, kind, mod, _ in changes.modified:
189 modified[path] = fid
190 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
191 removed[oldpath] = None
192 modified[newpath] = fid
193
194 return modified, removed
195
196def export_files(tree, files):
197 global marks, filenodes
198
199 final = []
200 for path, fid in files.iteritems():
201 h = tree.get_file_sha1(fid)
202
203 mode = '100644'
204
205 # is the blob already exported?
206 if h in filenodes:
207 mark = filenodes[h]
208 else:
209 d = tree.get_file_text(fid)
210
211 mark = marks.next_mark()
212 filenodes[h] = mark
213
214 print "blob"
215 print "mark :%u" % mark
216 print "data %d" % len(d)
217 print d
218
219 final.append((mode, mark, path))
220
221 return final
222
223def export_branch(branch, name):
224 global prefix, dirname
225
226 ref = '%s/heads/%s' % (prefix, name)
227 tip = marks.get_tip(name)
228
229 repo = branch.repository
230 repo.lock_read()
231 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
232 count = 0
233
234 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
235
236 for revid in revs:
237
238 rev = repo.get_revision(revid)
239
240 parents = rev.parent_ids
241 time = rev.timestamp
242 tz = rev.timezone
243 committer = rev.committer.encode('utf-8')
244 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
245 author = committer
246 msg = rev.message.encode('utf-8')
247
248 msg += '\n'
249
250 if len(parents) == 0:
251 parent = bzrlib.revision.NULL_REVISION
252 else:
253 parent = parents[0]
254
255 cur_tree = repo.revision_tree(revid)
256 prev = repo.revision_tree(parent)
257 modified, removed = get_filechanges(cur_tree, prev)
258
259 modified_final = export_files(cur_tree, modified)
260
261 if len(parents) == 0:
262 print 'reset %s' % ref
263
264 print "commit %s" % ref
265 print "mark :%d" % (marks.get_mark(revid))
266 print "author %s" % (author)
267 print "committer %s" % (committer)
268 print "data %d" % (len(msg))
269 print msg
270
271 for i, p in enumerate(parents):
272 try:
273 m = rev_to_mark(p)
274 except KeyError:
275 # ghost?
276 continue
277 if i == 0:
278 print "from :%s" % m
279 else:
280 print "merge :%s" % m
281
282 for f in modified_final:
283 print "M %s :%u %s" % f
284 for f in removed:
285 print "D %s" % (f)
286 print
287
288 count += 1
289 if (count % 100 == 0):
290 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
291 print "#############################################################"
292
293 repo.unlock()
294
295 revid = branch.last_revision()
296
297 # make sure the ref is updated
298 print "reset %s" % ref
299 print "from :%u" % rev_to_mark(revid)
300 print
301
302 marks.set_tip(name, revid)
303
304def export_tag(repo, name):
305 global tags
306 try:
307 print "reset refs/tags/%s" % name
308 print "from :%u" % rev_to_mark(tags[name])
309 print
310 except KeyError:
311 warn("TODO: fetch tag '%s'" % name)
312
313def do_import(parser):
314 global dirname
315
316 branch = parser.repo
317 path = os.path.join(dirname, 'marks-git')
318
319 print "feature done"
320 if os.path.exists(path):
321 print "feature import-marks=%s" % path
322 print "feature export-marks=%s" % path
323 sys.stdout.flush()
324
325 while parser.check('import'):
326 ref = parser[1]
327 if ref.startswith('refs/heads/'):
328 name = ref[len('refs/heads/'):]
329 export_branch(branch, name)
330 if ref.startswith('refs/tags/'):
331 name = ref[len('refs/tags/'):]
332 export_tag(branch, name)
333 parser.next()
334
335 print 'done'
336
337 sys.stdout.flush()
338
339def parse_blob(parser):
340 global blob_marks
341
342 parser.next()
343 mark = parser.get_mark()
344 parser.next()
345 data = parser.get_data()
346 blob_marks[mark] = data
347 parser.next()
348
349class CustomTree():
350
351 def __init__(self, repo, revid, parents, files):
352 global files_cache
353
354 self.repo = repo
355 self.revid = revid
356 self.parents = parents
357 self.updates = files
358
359 def copy_tree(revid):
360 files = files_cache[revid] = {}
361 tree = repo.repository.revision_tree(revid)
362 repo.lock_read()
363 try:
364 for path, entry in tree.iter_entries_by_dir():
365 files[path] = entry.file_id
366 finally:
367 repo.unlock()
368 return files
369
370 if len(parents) == 0:
371 self.base_id = bzrlib.revision.NULL_REVISION
372 self.base_files = {}
373 else:
374 self.base_id = parents[0]
375 self.base_files = files_cache.get(self.base_id, None)
376 if not self.base_files:
377 self.base_files = copy_tree(self.base_id)
378
379 self.files = files_cache[revid] = self.base_files.copy()
380
381 def last_revision(self):
382 return self.base_id
383
384 def iter_changes(self):
385 changes = []
386
387 def get_parent(dirname, basename):
388 parent_fid = self.base_files.get(dirname, None)
389 if parent_fid:
390 return parent_fid
391 parent_fid = self.files.get(dirname, None)
392 if parent_fid:
393 return parent_fid
394 if basename == '':
395 return None
396 d = add_entry(dirname, 'directory')
397 return d[0]
398
399 def add_entry(path, kind):
400 dirname, basename = os.path.split(path)
401 parent_fid = get_parent(dirname, basename)
402 fid = bzrlib.generate_ids.gen_file_id(path)
403 change = (fid,
404 (None, path),
405 True,
406 (False, True),
407 (None, parent_fid),
408 (None, basename),
409 (None, kind),
410 (None, False))
411 self.files[path] = change[0]
412 changes.append(change)
413 return change
414
415 def update_entry(path, kind):
416 dirname, basename = os.path.split(path)
417 fid = self.base_files[path]
418 parent_fid = get_parent(dirname, basename)
419 change = (fid,
420 (path, path),
421 True,
422 (True, True),
423 (None, parent_fid),
424 (None, basename),
425 (None, kind),
426 (None, False))
427 self.files[path] = change[0]
428 changes.append(change)
429 return change
430
431 def remove_entry(path, kind):
432 dirname, basename = os.path.split(path)
433 fid = self.base_files[path]
434 parent_fid = get_parent(dirname, basename)
435 change = (fid,
436 (path, None),
437 True,
438 (True, False),
439 (parent_fid, None),
440 (None, None),
441 (None, None),
442 (None, None))
443 del self.files[path]
444 changes.append(change)
445 return change
446
447 for path, f in self.updates.iteritems():
448 if 'deleted' in f:
449 remove_entry(path, 'file')
450 elif path in self.base_files:
451 update_entry(path, 'file')
452 else:
453 add_entry(path, 'file')
454
455 return changes
456
457 def get_file_with_stat(self, file_id, path=None):
458 return (StringIO.StringIO(self.updates[path]['data']), None)
459
460def parse_commit(parser):
461 global marks, blob_marks, bmarks, parsed_refs
462 global mode
463
464 parents = []
465
466 ref = parser[1]
467 parser.next()
468
469 if ref != 'refs/heads/master':
470 die("bzr doesn't support multiple branches; use 'master'")
471
472 commit_mark = parser.get_mark()
473 parser.next()
474 author = parser.get_author()
475 parser.next()
476 committer = parser.get_author()
477 parser.next()
478 data = parser.get_data()
479 parser.next()
480 if parser.check('from'):
481 parents.append(parser.get_mark())
482 parser.next()
483 while parser.check('merge'):
484 parents.append(parser.get_mark())
485 parser.next()
486
487 files = {}
488
489 for line in parser:
490 if parser.check('M'):
491 t, m, mark_ref, path = line.split(' ', 3)
492 mark = int(mark_ref[1:])
493 f = { 'mode' : m, 'data' : blob_marks[mark] }
494 elif parser.check('D'):
495 t, path = line.split(' ')
496 f = { 'deleted' : True }
497 else:
498 die('Unknown file command: %s' % line)
499 files[path] = f
500
501 repo = parser.repo
502
503 committer, date, tz = committer
504 parents = [str(mark_to_rev(p)) for p in parents]
505 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
506 props = {}
507 props['branch-nick'] = repo.nick
508
509 mtree = CustomTree(repo, revid, parents, files)
510 changes = mtree.iter_changes()
511
512 repo.lock_write()
513 try:
514 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid, False)
515 try:
516 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
517 builder.finish_inventory()
518 builder.commit(data.decode('utf-8', 'replace'))
519 except Exception, e:
520 builder.abort()
521 raise
522 finally:
523 repo.unlock()
524
525 parsed_refs[ref] = revid
526 marks.new_mark(revid, commit_mark)
527
528def parse_reset(parser):
529 global parsed_refs
530
531 ref = parser[1]
532 parser.next()
533
534 if ref != 'refs/heads/master':
535 die("bzr doesn't support multiple branches; use 'master'")
536
537 # ugh
538 if parser.check('commit'):
539 parse_commit(parser)
540 return
541 if not parser.check('from'):
542 return
543 from_mark = parser.get_mark()
544 parser.next()
545
546 parsed_refs[ref] = mark_to_rev(from_mark)
547
548def do_export(parser):
549 global parsed_refs, dirname, peer
550
551 parser.next()
552
553 for line in parser.each_block('done'):
554 if parser.check('blob'):
555 parse_blob(parser)
556 elif parser.check('commit'):
557 parse_commit(parser)
558 elif parser.check('reset'):
559 parse_reset(parser)
560 elif parser.check('tag'):
561 pass
562 elif parser.check('feature'):
563 pass
564 else:
565 die('unhandled export command: %s' % line)
566
567 repo = parser.repo
568
569 for ref, revid in parsed_refs.iteritems():
570 if ref == 'refs/heads/master':
571 repo.generate_revision_history(revid, marks.get_tip('master'))
572 revno, revid = repo.last_revision_info()
573 peer.import_last_revision_info_and_tags(repo, revno, revid)
574 print "ok %s" % ref
575 print
576
577def do_capabilities(parser):
578 global dirname
579
580 print "import"
581 print "export"
582 print "refspec refs/heads/*:%s/heads/*" % prefix
583
584 path = os.path.join(dirname, 'marks-git')
585
586 if os.path.exists(path):
587 print "*import-marks %s" % path
588 print "*export-marks %s" % path
589
590 print
591
592def do_list(parser):
593 global tags
594 print "? refs/heads/%s" % 'master'
595 for tag, revid in parser.repo.tags.get_tag_dict().items():
596 print "? refs/tags/%s" % tag
597 tags[tag] = revid
598 print "@refs/heads/%s HEAD" % 'master'
599 print
600
601def get_repo(url, alias):
602 global dirname, peer
603
604 clone_path = os.path.join(dirname, 'clone')
605 origin = bzrlib.controldir.ControlDir.open(url)
606 remote_branch = origin.open_branch()
607
608 if os.path.exists(clone_path):
609 # pull
610 d = bzrlib.controldir.ControlDir.open(clone_path)
611 branch = d.open_branch()
612 result = branch.pull(remote_branch, [], None, False)
613 else:
614 # clone
615 d = origin.sprout(clone_path, None,
616 hardlink=True, create_tree_if_local=False,
617 source_branch=remote_branch)
618 branch = d.open_branch()
619 branch.bind(remote_branch)
620
621 peer = remote_branch
622
623 return branch
624
625def main(args):
626 global marks, prefix, dirname
627 global tags, filenodes
628 global blob_marks
629 global parsed_refs
630 global files_cache
631
632 alias = args[1]
633 url = args[2]
634
635 prefix = 'refs/bzr/%s' % alias
636 tags = {}
637 filenodes = {}
638 blob_marks = {}
639 parsed_refs = {}
640 files_cache = {}
641
642 gitdir = os.environ['GIT_DIR']
643 dirname = os.path.join(gitdir, 'bzr', alias)
644
645 if not os.path.exists(dirname):
646 os.makedirs(dirname)
647
648 repo = get_repo(url, alias)
649
650 marks_path = os.path.join(dirname, 'marks-int')
651 marks = Marks(marks_path)
652
653 parser = Parser(repo)
654 for line in parser:
655 if parser.check('capabilities'):
656 do_capabilities(parser)
657 elif parser.check('list'):
658 do_list(parser)
659 elif parser.check('import'):
660 do_import(parser)
661 elif parser.check('export'):
662 do_export(parser)
663 else:
664 die('unhandled command: %s' % line)
665 sys.stdout.flush()
666
667 marks.store()
668
669sys.exit(main(sys.argv))