2bae5d034159b7d2c85e82d7bd5269257077a92a
1#!/usr/bin/env python
2#
3# Copyright (c) 2012 Felipe Contreras
4#
5
6#
7# Just copy to your ~/bin, or anywhere in your $PATH.
8# Then you can clone with:
9# % git clone bzr::/path/to/bzr/repo/or/url
10#
11# For example:
12# % git clone bzr::$HOME/myrepo
13# or
14# % git clone bzr::lp:myrepo
15#
16
17import sys
18
19import bzrlib
20bzrlib.initialize()
21
22import bzrlib.plugin
23bzrlib.plugin.load_plugins()
24
25import bzrlib.generate_ids
26
27import sys
28import os
29import json
30import re
31import StringIO
32
33NAME_RE = re.compile('^([^<>]+)')
34AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
35RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
36
37def die(msg, *args):
38 sys.stderr.write('ERROR: %s\n' % (msg % args))
39 sys.exit(1)
40
41def warn(msg, *args):
42 sys.stderr.write('WARNING: %s\n' % (msg % args))
43
44def gittz(tz):
45 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
46
47class Marks:
48
49 def __init__(self, path):
50 self.path = path
51 self.tips = {}
52 self.marks = {}
53 self.rev_marks = {}
54 self.last_mark = 0
55 self.load()
56
57 def load(self):
58 if not os.path.exists(self.path):
59 return
60
61 tmp = json.load(open(self.path))
62 self.tips = tmp['tips']
63 self.marks = tmp['marks']
64 self.last_mark = tmp['last-mark']
65
66 for rev, mark in self.marks.iteritems():
67 self.rev_marks[mark] = rev
68
69 def dict(self):
70 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
71
72 def store(self):
73 json.dump(self.dict(), open(self.path, 'w'))
74
75 def __str__(self):
76 return str(self.dict())
77
78 def from_rev(self, rev):
79 return self.marks[rev]
80
81 def to_rev(self, mark):
82 return self.rev_marks[mark]
83
84 def next_mark(self):
85 self.last_mark += 1
86 return self.last_mark
87
88 def get_mark(self, rev):
89 self.last_mark += 1
90 self.marks[rev] = self.last_mark
91 return self.last_mark
92
93 def is_marked(self, rev):
94 return self.marks.has_key(rev)
95
96 def new_mark(self, rev, mark):
97 self.marks[rev] = mark
98 self.rev_marks[mark] = rev
99 self.last_mark = mark
100
101 def get_tip(self, branch):
102 return self.tips.get(branch, None)
103
104 def set_tip(self, branch, tip):
105 self.tips[branch] = tip
106
107class Parser:
108
109 def __init__(self, repo):
110 self.repo = repo
111 self.line = self.get_line()
112
113 def get_line(self):
114 return sys.stdin.readline().strip()
115
116 def __getitem__(self, i):
117 return self.line.split()[i]
118
119 def check(self, word):
120 return self.line.startswith(word)
121
122 def each_block(self, separator):
123 while self.line != separator:
124 yield self.line
125 self.line = self.get_line()
126
127 def __iter__(self):
128 return self.each_block('')
129
130 def next(self):
131 self.line = self.get_line()
132 if self.line == 'done':
133 self.line = None
134
135 def get_mark(self):
136 i = self.line.index(':') + 1
137 return int(self.line[i:])
138
139 def get_data(self):
140 if not self.check('data'):
141 return None
142 i = self.line.index(' ') + 1
143 size = int(self.line[i:])
144 return sys.stdin.read(size)
145
146 def get_author(self):
147 m = RAW_AUTHOR_RE.match(self.line)
148 if not m:
149 return None
150 _, name, email, date, tz = m.groups()
151 committer = '%s <%s>' % (name, email)
152 tz = int(tz)
153 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
154 return (committer, int(date), tz)
155
156def rev_to_mark(rev):
157 global marks
158 return marks.from_rev(rev)
159
160def mark_to_rev(mark):
161 global marks
162 return marks.to_rev(mark)
163
164def fixup_user(user):
165 name = mail = None
166 user = user.replace('"', '')
167 m = AUTHOR_RE.match(user)
168 if m:
169 name = m.group(1)
170 mail = m.group(2).strip()
171 else:
172 m = NAME_RE.match(user)
173 if m:
174 name = m.group(1).strip()
175
176 return '%s <%s>' % (name, mail)
177
178def get_filechanges(cur, prev):
179 modified = {}
180 removed = {}
181
182 changes = cur.changes_from(prev)
183
184 for path, fid, kind in changes.added:
185 modified[path] = fid
186 for path, fid, kind in changes.removed:
187 removed[path] = None
188 for path, fid, kind, mod, _ in changes.modified:
189 modified[path] = fid
190 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
191 removed[oldpath] = None
192 modified[newpath] = fid
193
194 return modified, removed
195
196def export_files(tree, files):
197 global marks, filenodes
198
199 final = []
200 for path, fid in files.iteritems():
201 kind = tree.kind(fid)
202
203 h = tree.get_file_sha1(fid)
204
205 if kind == 'symlink':
206 d = tree.get_symlink_target(fid)
207 mode = '120000'
208 elif kind == 'file':
209
210 if tree.is_executable(fid):
211 mode = '100755'
212 else:
213 mode = '100644'
214
215 # is the blog already exported?
216 if h in filenodes:
217 mark = filenodes[h]
218 final.append((mode, mark, path))
219 continue
220
221 d = tree.get_file_text(fid)
222 elif kind == 'directory':
223 continue
224 else:
225 die("Unhandled kind '%s' for path '%s'" % (kind, path))
226
227 mark = marks.next_mark()
228 filenodes[h] = mark
229
230 print "blob"
231 print "mark :%u" % mark
232 print "data %d" % len(d)
233 print d
234
235 final.append((mode, mark, path))
236
237 return final
238
239def export_branch(branch, name):
240 global prefix, dirname
241
242 ref = '%s/heads/%s' % (prefix, name)
243 tip = marks.get_tip(name)
244
245 repo = branch.repository
246 repo.lock_read()
247 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
248 count = 0
249
250 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
251
252 for revid in revs:
253
254 rev = repo.get_revision(revid)
255
256 parents = rev.parent_ids
257 time = rev.timestamp
258 tz = rev.timezone
259 committer = rev.committer.encode('utf-8')
260 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
261 author = committer
262 msg = rev.message.encode('utf-8')
263
264 msg += '\n'
265
266 if len(parents) == 0:
267 parent = bzrlib.revision.NULL_REVISION
268 else:
269 parent = parents[0]
270
271 cur_tree = repo.revision_tree(revid)
272 prev = repo.revision_tree(parent)
273 modified, removed = get_filechanges(cur_tree, prev)
274
275 modified_final = export_files(cur_tree, modified)
276
277 if len(parents) == 0:
278 print 'reset %s' % ref
279
280 print "commit %s" % ref
281 print "mark :%d" % (marks.get_mark(revid))
282 print "author %s" % (author)
283 print "committer %s" % (committer)
284 print "data %d" % (len(msg))
285 print msg
286
287 for i, p in enumerate(parents):
288 try:
289 m = rev_to_mark(p)
290 except KeyError:
291 # ghost?
292 continue
293 if i == 0:
294 print "from :%s" % m
295 else:
296 print "merge :%s" % m
297
298 for f in modified_final:
299 print "M %s :%u %s" % f
300 for f in removed:
301 print "D %s" % (f)
302 print
303
304 count += 1
305 if (count % 100 == 0):
306 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
307 print "#############################################################"
308
309 repo.unlock()
310
311 revid = branch.last_revision()
312
313 # make sure the ref is updated
314 print "reset %s" % ref
315 print "from :%u" % rev_to_mark(revid)
316 print
317
318 marks.set_tip(name, revid)
319
320def export_tag(repo, name):
321 global tags
322 try:
323 print "reset refs/tags/%s" % name
324 print "from :%u" % rev_to_mark(tags[name])
325 print
326 except KeyError:
327 warn("TODO: fetch tag '%s'" % name)
328
329def do_import(parser):
330 global dirname
331
332 branch = parser.repo
333 path = os.path.join(dirname, 'marks-git')
334
335 print "feature done"
336 if os.path.exists(path):
337 print "feature import-marks=%s" % path
338 print "feature export-marks=%s" % path
339 sys.stdout.flush()
340
341 while parser.check('import'):
342 ref = parser[1]
343 if ref.startswith('refs/heads/'):
344 name = ref[len('refs/heads/'):]
345 export_branch(branch, name)
346 if ref.startswith('refs/tags/'):
347 name = ref[len('refs/tags/'):]
348 export_tag(branch, name)
349 parser.next()
350
351 print 'done'
352
353 sys.stdout.flush()
354
355def parse_blob(parser):
356 global blob_marks
357
358 parser.next()
359 mark = parser.get_mark()
360 parser.next()
361 data = parser.get_data()
362 blob_marks[mark] = data
363 parser.next()
364
365class CustomTree():
366
367 def __init__(self, repo, revid, parents, files):
368 global files_cache
369
370 self.repo = repo
371 self.revid = revid
372 self.parents = parents
373 self.updates = files
374
375 def copy_tree(revid):
376 files = files_cache[revid] = {}
377 tree = repo.repository.revision_tree(revid)
378 repo.lock_read()
379 try:
380 for path, entry in tree.iter_entries_by_dir():
381 files[path] = entry.file_id
382 finally:
383 repo.unlock()
384 return files
385
386 if len(parents) == 0:
387 self.base_id = bzrlib.revision.NULL_REVISION
388 self.base_files = {}
389 else:
390 self.base_id = parents[0]
391 self.base_files = files_cache.get(self.base_id, None)
392 if not self.base_files:
393 self.base_files = copy_tree(self.base_id)
394
395 self.files = files_cache[revid] = self.base_files.copy()
396
397 def last_revision(self):
398 return self.base_id
399
400 def iter_changes(self):
401 changes = []
402
403 def get_parent(dirname, basename):
404 parent_fid = self.base_files.get(dirname, None)
405 if parent_fid:
406 return parent_fid
407 parent_fid = self.files.get(dirname, None)
408 if parent_fid:
409 return parent_fid
410 if basename == '':
411 return None
412 d = add_entry(dirname, 'directory')
413 return d[0]
414
415 def add_entry(path, kind):
416 dirname, basename = os.path.split(path)
417 parent_fid = get_parent(dirname, basename)
418 fid = bzrlib.generate_ids.gen_file_id(path)
419 change = (fid,
420 (None, path),
421 True,
422 (False, True),
423 (None, parent_fid),
424 (None, basename),
425 (None, kind),
426 (None, False))
427 self.files[path] = change[0]
428 changes.append(change)
429 return change
430
431 def update_entry(path, kind):
432 dirname, basename = os.path.split(path)
433 fid = self.base_files[path]
434 parent_fid = get_parent(dirname, basename)
435 change = (fid,
436 (path, path),
437 True,
438 (True, True),
439 (None, parent_fid),
440 (None, basename),
441 (None, kind),
442 (None, False))
443 self.files[path] = change[0]
444 changes.append(change)
445 return change
446
447 def remove_entry(path, kind):
448 dirname, basename = os.path.split(path)
449 fid = self.base_files[path]
450 parent_fid = get_parent(dirname, basename)
451 change = (fid,
452 (path, None),
453 True,
454 (True, False),
455 (parent_fid, None),
456 (None, None),
457 (None, None),
458 (None, None))
459 del self.files[path]
460 changes.append(change)
461 return change
462
463 for path, f in self.updates.iteritems():
464 if 'deleted' in f:
465 remove_entry(path, 'file')
466 elif path in self.base_files:
467 update_entry(path, 'file')
468 else:
469 add_entry(path, 'file')
470
471 return changes
472
473 def get_file_with_stat(self, file_id, path=None):
474 return (StringIO.StringIO(self.updates[path]['data']), None)
475
476def parse_commit(parser):
477 global marks, blob_marks, bmarks, parsed_refs
478 global mode
479
480 parents = []
481
482 ref = parser[1]
483 parser.next()
484
485 if ref != 'refs/heads/master':
486 die("bzr doesn't support multiple branches; use 'master'")
487
488 commit_mark = parser.get_mark()
489 parser.next()
490 author = parser.get_author()
491 parser.next()
492 committer = parser.get_author()
493 parser.next()
494 data = parser.get_data()
495 parser.next()
496 if parser.check('from'):
497 parents.append(parser.get_mark())
498 parser.next()
499 while parser.check('merge'):
500 parents.append(parser.get_mark())
501 parser.next()
502
503 files = {}
504
505 for line in parser:
506 if parser.check('M'):
507 t, m, mark_ref, path = line.split(' ', 3)
508 mark = int(mark_ref[1:])
509 f = { 'mode' : m, 'data' : blob_marks[mark] }
510 elif parser.check('D'):
511 t, path = line.split(' ')
512 f = { 'deleted' : True }
513 else:
514 die('Unknown file command: %s' % line)
515 files[path] = f
516
517 repo = parser.repo
518
519 committer, date, tz = committer
520 parents = [str(mark_to_rev(p)) for p in parents]
521 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
522 props = {}
523 props['branch-nick'] = repo.nick
524
525 mtree = CustomTree(repo, revid, parents, files)
526 changes = mtree.iter_changes()
527
528 repo.lock_write()
529 try:
530 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid, False)
531 try:
532 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
533 builder.finish_inventory()
534 builder.commit(data.decode('utf-8', 'replace'))
535 except Exception, e:
536 builder.abort()
537 raise
538 finally:
539 repo.unlock()
540
541 parsed_refs[ref] = revid
542 marks.new_mark(revid, commit_mark)
543
544def parse_reset(parser):
545 global parsed_refs
546
547 ref = parser[1]
548 parser.next()
549
550 if ref != 'refs/heads/master':
551 die("bzr doesn't support multiple branches; use 'master'")
552
553 # ugh
554 if parser.check('commit'):
555 parse_commit(parser)
556 return
557 if not parser.check('from'):
558 return
559 from_mark = parser.get_mark()
560 parser.next()
561
562 parsed_refs[ref] = mark_to_rev(from_mark)
563
564def do_export(parser):
565 global parsed_refs, dirname, peer
566
567 parser.next()
568
569 for line in parser.each_block('done'):
570 if parser.check('blob'):
571 parse_blob(parser)
572 elif parser.check('commit'):
573 parse_commit(parser)
574 elif parser.check('reset'):
575 parse_reset(parser)
576 elif parser.check('tag'):
577 pass
578 elif parser.check('feature'):
579 pass
580 else:
581 die('unhandled export command: %s' % line)
582
583 repo = parser.repo
584
585 for ref, revid in parsed_refs.iteritems():
586 if ref == 'refs/heads/master':
587 repo.generate_revision_history(revid, marks.get_tip('master'))
588 revno, revid = repo.last_revision_info()
589 peer.import_last_revision_info_and_tags(repo, revno, revid)
590 wt = peer.bzrdir.open_workingtree()
591 wt.update()
592 print "ok %s" % ref
593 print
594
595def do_capabilities(parser):
596 global dirname
597
598 print "import"
599 print "export"
600 print "refspec refs/heads/*:%s/heads/*" % prefix
601
602 path = os.path.join(dirname, 'marks-git')
603
604 if os.path.exists(path):
605 print "*import-marks %s" % path
606 print "*export-marks %s" % path
607
608 print
609
610def do_list(parser):
611 global tags
612 print "? refs/heads/%s" % 'master'
613 for tag, revid in parser.repo.tags.get_tag_dict().items():
614 print "? refs/tags/%s" % tag
615 tags[tag] = revid
616 print "@refs/heads/%s HEAD" % 'master'
617 print
618
619def get_repo(url, alias):
620 global dirname, peer
621
622 clone_path = os.path.join(dirname, 'clone')
623 origin = bzrlib.controldir.ControlDir.open(url)
624 remote_branch = origin.open_branch()
625
626 if os.path.exists(clone_path):
627 # pull
628 d = bzrlib.controldir.ControlDir.open(clone_path)
629 branch = d.open_branch()
630 result = branch.pull(remote_branch, [], None, False)
631 else:
632 # clone
633 d = origin.sprout(clone_path, None,
634 hardlink=True, create_tree_if_local=False,
635 source_branch=remote_branch)
636 branch = d.open_branch()
637 branch.bind(remote_branch)
638
639 peer = remote_branch
640
641 return branch
642
643def main(args):
644 global marks, prefix, dirname
645 global tags, filenodes
646 global blob_marks
647 global parsed_refs
648 global files_cache
649
650 alias = args[1]
651 url = args[2]
652
653 prefix = 'refs/bzr/%s' % alias
654 tags = {}
655 filenodes = {}
656 blob_marks = {}
657 parsed_refs = {}
658 files_cache = {}
659
660 gitdir = os.environ['GIT_DIR']
661 dirname = os.path.join(gitdir, 'bzr', alias)
662
663 if not os.path.exists(dirname):
664 os.makedirs(dirname)
665
666 repo = get_repo(url, alias)
667
668 marks_path = os.path.join(dirname, 'marks-int')
669 marks = Marks(marks_path)
670
671 parser = Parser(repo)
672 for line in parser:
673 if parser.check('capabilities'):
674 do_capabilities(parser)
675 elif parser.check('list'):
676 do_list(parser)
677 elif parser.check('import'):
678 do_import(parser)
679 elif parser.check('export'):
680 do_export(parser)
681 else:
682 die('unhandled command: %s' % line)
683 sys.stdout.flush()
684
685 marks.store()
686
687sys.exit(main(sys.argv))