contrib / remote-helpers / git-remote-bzron commit upload-pack: simplify request validation (3f1da57)
   1#!/usr/bin/env python
   2#
   3# Copyright (c) 2012 Felipe Contreras
   4#
   5
   6#
   7# Just copy to your ~/bin, or anywhere in your $PATH.
   8# Then you can clone with:
   9# % git clone bzr::/path/to/bzr/repo/or/url
  10#
  11# For example:
  12# % git clone bzr::$HOME/myrepo
  13# or
  14# % git clone bzr::lp:myrepo
  15#
  16
  17import sys
  18
  19import bzrlib
  20if hasattr(bzrlib, "initialize"):
  21    bzrlib.initialize()
  22
  23import bzrlib.plugin
  24bzrlib.plugin.load_plugins()
  25
  26import bzrlib.generate_ids
  27import bzrlib.transport
  28
  29import sys
  30import os
  31import json
  32import re
  33import StringIO
  34
  35NAME_RE = re.compile('^([^<>]+)')
  36AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
  37RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
  38
  39def die(msg, *args):
  40    sys.stderr.write('ERROR: %s\n' % (msg % args))
  41    sys.exit(1)
  42
  43def warn(msg, *args):
  44    sys.stderr.write('WARNING: %s\n' % (msg % args))
  45
  46def gittz(tz):
  47    return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
  48
  49class Marks:
  50
  51    def __init__(self, path):
  52        self.path = path
  53        self.tips = {}
  54        self.marks = {}
  55        self.rev_marks = {}
  56        self.last_mark = 0
  57        self.load()
  58
  59    def load(self):
  60        if not os.path.exists(self.path):
  61            return
  62
  63        tmp = json.load(open(self.path))
  64        self.tips = tmp['tips']
  65        self.marks = tmp['marks']
  66        self.last_mark = tmp['last-mark']
  67
  68        for rev, mark in self.marks.iteritems():
  69            self.rev_marks[mark] = rev
  70
  71    def dict(self):
  72        return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
  73
  74    def store(self):
  75        json.dump(self.dict(), open(self.path, 'w'))
  76
  77    def __str__(self):
  78        return str(self.dict())
  79
  80    def from_rev(self, rev):
  81        return self.marks[rev]
  82
  83    def to_rev(self, mark):
  84        return self.rev_marks[mark]
  85
  86    def next_mark(self):
  87        self.last_mark += 1
  88        return self.last_mark
  89
  90    def get_mark(self, rev):
  91        self.last_mark += 1
  92        self.marks[rev] = self.last_mark
  93        return self.last_mark
  94
  95    def is_marked(self, rev):
  96        return self.marks.has_key(rev)
  97
  98    def new_mark(self, rev, mark):
  99        self.marks[rev] = mark
 100        self.rev_marks[mark] = rev
 101        self.last_mark = mark
 102
 103    def get_tip(self, branch):
 104        return self.tips.get(branch, None)
 105
 106    def set_tip(self, branch, tip):
 107        self.tips[branch] = tip
 108
 109class Parser:
 110
 111    def __init__(self, repo):
 112        self.repo = repo
 113        self.line = self.get_line()
 114
 115    def get_line(self):
 116        return sys.stdin.readline().strip()
 117
 118    def __getitem__(self, i):
 119        return self.line.split()[i]
 120
 121    def check(self, word):
 122        return self.line.startswith(word)
 123
 124    def each_block(self, separator):
 125        while self.line != separator:
 126            yield self.line
 127            self.line = self.get_line()
 128
 129    def __iter__(self):
 130        return self.each_block('')
 131
 132    def next(self):
 133        self.line = self.get_line()
 134        if self.line == 'done':
 135            self.line = None
 136
 137    def get_mark(self):
 138        i = self.line.index(':') + 1
 139        return int(self.line[i:])
 140
 141    def get_data(self):
 142        if not self.check('data'):
 143            return None
 144        i = self.line.index(' ') + 1
 145        size = int(self.line[i:])
 146        return sys.stdin.read(size)
 147
 148    def get_author(self):
 149        m = RAW_AUTHOR_RE.match(self.line)
 150        if not m:
 151            return None
 152        _, name, email, date, tz = m.groups()
 153        committer = '%s <%s>' % (name, email)
 154        tz = int(tz)
 155        tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
 156        return (committer, int(date), tz)
 157
 158def rev_to_mark(rev):
 159    global marks
 160    return marks.from_rev(rev)
 161
 162def mark_to_rev(mark):
 163    global marks
 164    return marks.to_rev(mark)
 165
 166def fixup_user(user):
 167    name = mail = None
 168    user = user.replace('"', '')
 169    m = AUTHOR_RE.match(user)
 170    if m:
 171        name = m.group(1)
 172        mail = m.group(2).strip()
 173    else:
 174        m = NAME_RE.match(user)
 175        if m:
 176            name = m.group(1).strip()
 177
 178    return '%s <%s>' % (name, mail)
 179
 180def get_filechanges(cur, prev):
 181    modified = {}
 182    removed = {}
 183
 184    changes = cur.changes_from(prev)
 185
 186    for path, fid, kind in changes.added:
 187        modified[path] = fid
 188    for path, fid, kind in changes.removed:
 189        removed[path] = None
 190    for path, fid, kind, mod, _ in changes.modified:
 191        modified[path] = fid
 192    for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
 193        removed[oldpath] = None
 194        modified[newpath] = fid
 195
 196    return modified, removed
 197
 198def export_files(tree, files):
 199    global marks, filenodes
 200
 201    final = []
 202    for path, fid in files.iteritems():
 203        kind = tree.kind(fid)
 204
 205        h = tree.get_file_sha1(fid)
 206
 207        if kind == 'symlink':
 208            d = tree.get_symlink_target(fid)
 209            mode = '120000'
 210        elif kind == 'file':
 211
 212            if tree.is_executable(fid):
 213                mode = '100755'
 214            else:
 215                mode = '100644'
 216
 217            # is the blog already exported?
 218            if h in filenodes:
 219                mark = filenodes[h]
 220                final.append((mode, mark, path))
 221                continue
 222
 223            d = tree.get_file_text(fid)
 224        elif kind == 'directory':
 225            continue
 226        else:
 227            die("Unhandled kind '%s' for path '%s'" % (kind, path))
 228
 229        mark = marks.next_mark()
 230        filenodes[h] = mark
 231
 232        print "blob"
 233        print "mark :%u" % mark
 234        print "data %d" % len(d)
 235        print d
 236
 237        final.append((mode, mark, path))
 238
 239    return final
 240
 241def export_branch(branch, name):
 242    global prefix, dirname
 243
 244    ref = '%s/heads/%s' % (prefix, name)
 245    tip = marks.get_tip(name)
 246
 247    repo = branch.repository
 248    repo.lock_read()
 249    revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
 250    count = 0
 251
 252    revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
 253
 254    for revid in revs:
 255
 256        rev = repo.get_revision(revid)
 257
 258        parents = rev.parent_ids
 259        time = rev.timestamp
 260        tz = rev.timezone
 261        committer = rev.committer.encode('utf-8')
 262        committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
 263        author = committer
 264        msg = rev.message.encode('utf-8')
 265
 266        msg += '\n'
 267
 268        if len(parents) == 0:
 269            parent = bzrlib.revision.NULL_REVISION
 270        else:
 271            parent = parents[0]
 272
 273        cur_tree = repo.revision_tree(revid)
 274        prev = repo.revision_tree(parent)
 275        modified, removed = get_filechanges(cur_tree, prev)
 276
 277        modified_final = export_files(cur_tree, modified)
 278
 279        if len(parents) == 0:
 280            print 'reset %s' % ref
 281
 282        print "commit %s" % ref
 283        print "mark :%d" % (marks.get_mark(revid))
 284        print "author %s" % (author)
 285        print "committer %s" % (committer)
 286        print "data %d" % (len(msg))
 287        print msg
 288
 289        for i, p in enumerate(parents):
 290            try:
 291                m = rev_to_mark(p)
 292            except KeyError:
 293                # ghost?
 294                continue
 295            if i == 0:
 296                print "from :%s" % m
 297            else:
 298                print "merge :%s" % m
 299
 300        for f in modified_final:
 301            print "M %s :%u %s" % f
 302        for f in removed:
 303            print "D %s" % (f)
 304        print
 305
 306        count += 1
 307        if (count % 100 == 0):
 308            print "progress revision %s (%d/%d)" % (revid, count, len(revs))
 309            print "#############################################################"
 310
 311    repo.unlock()
 312
 313    revid = branch.last_revision()
 314
 315    # make sure the ref is updated
 316    print "reset %s" % ref
 317    print "from :%u" % rev_to_mark(revid)
 318    print
 319
 320    marks.set_tip(name, revid)
 321
 322def export_tag(repo, name):
 323    global tags
 324    try:
 325        print "reset refs/tags/%s" % name
 326        print "from :%u" % rev_to_mark(tags[name])
 327        print
 328    except KeyError:
 329        warn("TODO: fetch tag '%s'" % name)
 330
 331def do_import(parser):
 332    global dirname
 333
 334    branch = parser.repo
 335    path = os.path.join(dirname, 'marks-git')
 336
 337    print "feature done"
 338    if os.path.exists(path):
 339        print "feature import-marks=%s" % path
 340    print "feature export-marks=%s" % path
 341    sys.stdout.flush()
 342
 343    while parser.check('import'):
 344        ref = parser[1]
 345        if ref.startswith('refs/heads/'):
 346            name = ref[len('refs/heads/'):]
 347            export_branch(branch, name)
 348        if ref.startswith('refs/tags/'):
 349            name = ref[len('refs/tags/'):]
 350            export_tag(branch, name)
 351        parser.next()
 352
 353    print 'done'
 354
 355    sys.stdout.flush()
 356
 357def parse_blob(parser):
 358    global blob_marks
 359
 360    parser.next()
 361    mark = parser.get_mark()
 362    parser.next()
 363    data = parser.get_data()
 364    blob_marks[mark] = data
 365    parser.next()
 366
 367class CustomTree():
 368
 369    def __init__(self, repo, revid, parents, files):
 370        global files_cache
 371
 372        self.repo = repo
 373        self.revid = revid
 374        self.parents = parents
 375        self.updates = {}
 376
 377        def copy_tree(revid):
 378            files = files_cache[revid] = {}
 379            tree = repo.repository.revision_tree(revid)
 380            repo.lock_read()
 381            try:
 382                for path, entry in tree.iter_entries_by_dir():
 383                    files[path] = entry.file_id
 384            finally:
 385                repo.unlock()
 386            return files
 387
 388        if len(parents) == 0:
 389            self.base_id = bzrlib.revision.NULL_REVISION
 390            self.base_files = {}
 391        else:
 392            self.base_id = parents[0]
 393            self.base_files = files_cache.get(self.base_id, None)
 394            if not self.base_files:
 395                self.base_files = copy_tree(self.base_id)
 396
 397        self.files = files_cache[revid] = self.base_files.copy()
 398
 399        for path, f in files.iteritems():
 400            fid = self.files.get(path, None)
 401            if not fid:
 402                fid = bzrlib.generate_ids.gen_file_id(path)
 403            f['path'] = path
 404            self.updates[fid] = f
 405
 406    def last_revision(self):
 407        return self.base_id
 408
 409    def iter_changes(self):
 410        changes = []
 411
 412        def get_parent(dirname, basename):
 413            parent_fid = self.base_files.get(dirname, None)
 414            if parent_fid:
 415                return parent_fid
 416            parent_fid = self.files.get(dirname, None)
 417            if parent_fid:
 418                return parent_fid
 419            if basename == '':
 420                return None
 421            fid = bzrlib.generate_ids.gen_file_id(path)
 422            d = add_entry(fid, dirname, 'directory')
 423            return fid
 424
 425        def add_entry(fid, path, kind, mode = None):
 426            dirname, basename = os.path.split(path)
 427            parent_fid = get_parent(dirname, basename)
 428
 429            executable = False
 430            if mode == '100755':
 431                executable = True
 432            elif mode == '120000':
 433                kind = 'symlink'
 434
 435            change = (fid,
 436                    (None, path),
 437                    True,
 438                    (False, True),
 439                    (None, parent_fid),
 440                    (None, basename),
 441                    (None, kind),
 442                    (None, executable))
 443            self.files[path] = change[0]
 444            changes.append(change)
 445            return change
 446
 447        def update_entry(fid, path, kind, mode = None):
 448            dirname, basename = os.path.split(path)
 449            parent_fid = get_parent(dirname, basename)
 450
 451            executable = False
 452            if mode == '100755':
 453                executable = True
 454            elif mode == '120000':
 455                kind = 'symlink'
 456
 457            change = (fid,
 458                    (path, path),
 459                    True,
 460                    (True, True),
 461                    (None, parent_fid),
 462                    (None, basename),
 463                    (None, kind),
 464                    (None, executable))
 465            self.files[path] = change[0]
 466            changes.append(change)
 467            return change
 468
 469        def remove_entry(fid, path, kind):
 470            dirname, basename = os.path.split(path)
 471            parent_fid = get_parent(dirname, basename)
 472            change = (fid,
 473                    (path, None),
 474                    True,
 475                    (True, False),
 476                    (parent_fid, None),
 477                    (None, None),
 478                    (None, None),
 479                    (None, None))
 480            del self.files[path]
 481            changes.append(change)
 482            return change
 483
 484        for fid, f in self.updates.iteritems():
 485            path = f['path']
 486
 487            if 'deleted' in f:
 488                remove_entry(fid, path, 'file')
 489                continue
 490
 491            if path in self.base_files:
 492                update_entry(fid, path, 'file', f['mode'])
 493            else:
 494                add_entry(fid, path, 'file', f['mode'])
 495
 496        return changes
 497
 498    def get_file_with_stat(self, file_id, path=None):
 499        return (StringIO.StringIO(self.updates[file_id]['data']), None)
 500
 501    def get_symlink_target(self, file_id):
 502        return self.updates[file_id]['data']
 503
 504def parse_commit(parser):
 505    global marks, blob_marks, bmarks, parsed_refs
 506    global mode
 507
 508    parents = []
 509
 510    ref = parser[1]
 511    parser.next()
 512
 513    if ref != 'refs/heads/master':
 514        die("bzr doesn't support multiple branches; use 'master'")
 515
 516    commit_mark = parser.get_mark()
 517    parser.next()
 518    author = parser.get_author()
 519    parser.next()
 520    committer = parser.get_author()
 521    parser.next()
 522    data = parser.get_data()
 523    parser.next()
 524    if parser.check('from'):
 525        parents.append(parser.get_mark())
 526        parser.next()
 527    while parser.check('merge'):
 528        parents.append(parser.get_mark())
 529        parser.next()
 530
 531    files = {}
 532
 533    for line in parser:
 534        if parser.check('M'):
 535            t, m, mark_ref, path = line.split(' ', 3)
 536            mark = int(mark_ref[1:])
 537            f = { 'mode' : m, 'data' : blob_marks[mark] }
 538        elif parser.check('D'):
 539            t, path = line.split(' ')
 540            f = { 'deleted' : True }
 541        else:
 542            die('Unknown file command: %s' % line)
 543        files[path] = f
 544
 545    repo = parser.repo
 546
 547    committer, date, tz = committer
 548    parents = [str(mark_to_rev(p)) for p in parents]
 549    revid = bzrlib.generate_ids.gen_revision_id(committer, date)
 550    props = {}
 551    props['branch-nick'] = repo.nick
 552
 553    mtree = CustomTree(repo, revid, parents, files)
 554    changes = mtree.iter_changes()
 555
 556    repo.lock_write()
 557    try:
 558        builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
 559        try:
 560            list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
 561            builder.finish_inventory()
 562            builder.commit(data.decode('utf-8', 'replace'))
 563        except Exception, e:
 564            builder.abort()
 565            raise
 566    finally:
 567        repo.unlock()
 568
 569    parsed_refs[ref] = revid
 570    marks.new_mark(revid, commit_mark)
 571
 572def parse_reset(parser):
 573    global parsed_refs
 574
 575    ref = parser[1]
 576    parser.next()
 577
 578    if ref != 'refs/heads/master':
 579        die("bzr doesn't support multiple branches; use 'master'")
 580
 581    # ugh
 582    if parser.check('commit'):
 583        parse_commit(parser)
 584        return
 585    if not parser.check('from'):
 586        return
 587    from_mark = parser.get_mark()
 588    parser.next()
 589
 590    parsed_refs[ref] = mark_to_rev(from_mark)
 591
 592def do_export(parser):
 593    global parsed_refs, dirname, peer
 594
 595    parser.next()
 596
 597    for line in parser.each_block('done'):
 598        if parser.check('blob'):
 599            parse_blob(parser)
 600        elif parser.check('commit'):
 601            parse_commit(parser)
 602        elif parser.check('reset'):
 603            parse_reset(parser)
 604        elif parser.check('tag'):
 605            pass
 606        elif parser.check('feature'):
 607            pass
 608        else:
 609            die('unhandled export command: %s' % line)
 610
 611    repo = parser.repo
 612
 613    for ref, revid in parsed_refs.iteritems():
 614        if ref == 'refs/heads/master':
 615            repo.generate_revision_history(revid, marks.get_tip('master'))
 616            revno, revid = repo.last_revision_info()
 617            if peer:
 618                if hasattr(peer, "import_last_revision_info_and_tags"):
 619                    peer.import_last_revision_info_and_tags(repo, revno, revid)
 620                else:
 621                    peer.import_last_revision_info(repo.repository, revno, revid)
 622                wt = peer.bzrdir.open_workingtree()
 623            else:
 624                wt = repo.bzrdir.open_workingtree()
 625            wt.update()
 626        print "ok %s" % ref
 627    print
 628
 629def do_capabilities(parser):
 630    global dirname
 631
 632    print "import"
 633    print "export"
 634    print "refspec refs/heads/*:%s/heads/*" % prefix
 635
 636    path = os.path.join(dirname, 'marks-git')
 637
 638    if os.path.exists(path):
 639        print "*import-marks %s" % path
 640    print "*export-marks %s" % path
 641
 642    print
 643
 644def do_list(parser):
 645    global tags
 646    print "? refs/heads/%s" % 'master'
 647    for tag, revid in parser.repo.tags.get_tag_dict().items():
 648        print "? refs/tags/%s" % tag
 649        tags[tag] = revid
 650    print "@refs/heads/%s HEAD" % 'master'
 651    print
 652
 653def get_repo(url, alias):
 654    global dirname, peer
 655
 656    origin = bzrlib.bzrdir.BzrDir.open(url)
 657    branch = origin.open_branch()
 658
 659    if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
 660        clone_path = os.path.join(dirname, 'clone')
 661        remote_branch = branch
 662        if os.path.exists(clone_path):
 663            # pull
 664            d = bzrlib.bzrdir.BzrDir.open(clone_path)
 665            branch = d.open_branch()
 666            result = branch.pull(remote_branch, [], None, False)
 667        else:
 668            # clone
 669            d = origin.sprout(clone_path, None,
 670                    hardlink=True, create_tree_if_local=False,
 671                    source_branch=remote_branch)
 672            branch = d.open_branch()
 673            branch.bind(remote_branch)
 674
 675        peer = remote_branch
 676    else:
 677        peer = None
 678
 679    return branch
 680
 681def main(args):
 682    global marks, prefix, dirname
 683    global tags, filenodes
 684    global blob_marks
 685    global parsed_refs
 686    global files_cache
 687
 688    alias = args[1]
 689    url = args[2]
 690
 691    prefix = 'refs/bzr/%s' % alias
 692    tags = {}
 693    filenodes = {}
 694    blob_marks = {}
 695    parsed_refs = {}
 696    files_cache = {}
 697
 698    gitdir = os.environ['GIT_DIR']
 699    dirname = os.path.join(gitdir, 'bzr', alias)
 700
 701    if not os.path.exists(dirname):
 702        os.makedirs(dirname)
 703
 704    repo = get_repo(url, alias)
 705
 706    marks_path = os.path.join(dirname, 'marks-int')
 707    marks = Marks(marks_path)
 708
 709    parser = Parser(repo)
 710    for line in parser:
 711        if parser.check('capabilities'):
 712            do_capabilities(parser)
 713        elif parser.check('list'):
 714            do_list(parser)
 715        elif parser.check('import'):
 716            do_import(parser)
 717        elif parser.check('export'):
 718            do_export(parser)
 719        else:
 720            die('unhandled command: %s' % line)
 721        sys.stdout.flush()
 722
 723    marks.store()
 724
 725sys.exit(main(sys.argv))