git_remote_helpers / git / git.pyon commit Merge branch 'jc/maint-diffstat-numstat-context' into maint (eee947f)
   1#!/usr/bin/env python
   2
   3"""Functionality for interacting with Git repositories.
   4
   5This module provides classes for interfacing with a Git repository.
   6"""
   7
   8import os
   9import re
  10import time
  11from binascii import hexlify
  12from cStringIO import StringIO
  13import unittest
  14
  15from git_remote_helpers.util import debug, error, die, start_command, run_command
  16
  17
  18def get_git_dir ():
  19    """Return the path to the GIT_DIR for this repo."""
  20    args = ("git", "rev-parse", "--git-dir")
  21    exit_code, output, errors = run_command(args)
  22    if exit_code:
  23        die("Failed to retrieve git dir")
  24    assert not errors
  25    return output.strip()
  26
  27
  28def parse_git_config ():
  29    """Return a dict containing the parsed version of 'git config -l'."""
  30    exit_code, output, errors = run_command(("git", "config", "-z", "-l"))
  31    if exit_code:
  32        die("Failed to retrieve git configuration")
  33    assert not errors
  34    return dict([e.split('\n', 1) for e in output.split("\0") if e])
  35
  36
  37def git_config_bool (value):
  38    """Convert the given git config string value to True or False.
  39
  40    Raise ValueError if the given string was not recognized as a
  41    boolean value.
  42
  43    """
  44    norm_value = str(value).strip().lower()
  45    if norm_value in ("true", "1", "yes", "on", ""):
  46        return True
  47    if norm_value in ("false", "0", "no", "off", "none"):
  48        return False
  49    raise ValueError("Failed to parse '%s' into a boolean value" % (value))
  50
  51
  52def valid_git_ref (ref_name):
  53    """Return True iff the given ref name is a valid git ref name."""
  54    # The following is a reimplementation of the git check-ref-format
  55    # command.  The rules were derived from the git check-ref-format(1)
  56    # manual page.  This code should be replaced by a call to
  57    # check_ref_format() in the git library, when such is available.
  58    if ref_name.endswith('/') or \
  59       ref_name.startswith('.') or \
  60       ref_name.count('/.') or \
  61       ref_name.count('..') or \
  62       ref_name.endswith('.lock'):
  63        return False
  64    for c in ref_name:
  65        if ord(c) < 0x20 or ord(c) == 0x7f or c in " ~^:?*[":
  66            return False
  67    return True
  68
  69
  70class GitObjectFetcher(object):
  71
  72    """Provide parsed access to 'git cat-file --batch'.
  73
  74    This provides a read-only interface to the Git object database.
  75
  76    """
  77
  78    def __init__ (self):
  79        """Initiate a 'git cat-file --batch' session."""
  80        self.queue = []  # List of object names to be submitted
  81        self.in_transit = None  # Object name currently in transit
  82
  83        # 'git cat-file --batch' produces binary output which is likely
  84        # to be corrupted by the default "rU"-mode pipe opened by
  85        # start_command.  (Mode == "rU" does universal new-line
  86        # conversion, which mangles carriage returns.) Therefore, we
  87        # open an explicitly binary-safe pipe for transferring the
  88        # output from 'git cat-file --batch'.
  89        pipe_r_fd, pipe_w_fd = os.pipe()
  90        pipe_r = os.fdopen(pipe_r_fd, "rb")
  91        pipe_w = os.fdopen(pipe_w_fd, "wb")
  92        self.proc = start_command(("git", "cat-file", "--batch"),
  93                                  stdout = pipe_w)
  94        self.f = pipe_r
  95
  96    def __del__ (self):
  97        """Verify completed communication with 'git cat-file --batch'."""
  98        assert not self.queue
  99        assert self.in_transit is None
 100        self.proc.stdin.close()
 101        assert self.proc.wait() == 0  # Zero exit code
 102        assert self.f.read() == ""  # No remaining output
 103
 104    def _submit_next_object (self):
 105        """Submit queue items to the 'git cat-file --batch' process.
 106
 107        If there are items in the queue, and there is currently no item
 108        currently in 'transit', then pop the first item off the queue,
 109        and submit it.
 110
 111        """
 112        if self.queue and self.in_transit is None:
 113            self.in_transit = self.queue.pop(0)
 114            print >> self.proc.stdin, self.in_transit[0]
 115
 116    def push (self, obj, callback):
 117        """Push the given object name onto the queue.
 118
 119        The given callback function will at some point in the future
 120        be called exactly once with the following arguments:
 121        - self - this GitObjectFetcher instance
 122        - obj  - the object name provided to push()
 123        - sha1 - the SHA1 of the object, if 'None' obj is missing
 124        - t    - the type of the object (tag/commit/tree/blob)
 125        - size - the size of the object in bytes
 126        - data - the object contents
 127
 128        """
 129        self.queue.append((obj, callback))
 130        self._submit_next_object()  # (Re)start queue processing
 131
 132    def process_next_entry (self):
 133        """Read the next entry off the queue and invoke callback."""
 134        obj, cb = self.in_transit
 135        self.in_transit = None
 136        header = self.f.readline()
 137        if header == "%s missing\n" % (obj):
 138            cb(self, obj, None, None, None, None)
 139            return
 140        sha1, t, size = header.split(" ")
 141        assert len(sha1) == 40
 142        assert t in ("tag", "commit", "tree", "blob")
 143        assert size.endswith("\n")
 144        size = int(size.strip())
 145        data = self.f.read(size)
 146        assert self.f.read(1) == "\n"
 147        cb(self, obj, sha1, t, size, data)
 148        self._submit_next_object()
 149
 150    def process (self):
 151        """Process the current queue until empty."""
 152        while self.in_transit is not None:
 153            self.process_next_entry()
 154
 155    # High-level convenience methods:
 156
 157    def get_sha1 (self, objspec):
 158        """Return the SHA1 of the object specified by 'objspec'.
 159
 160        Return None if 'objspec' does not specify an existing object.
 161
 162        """
 163        class _ObjHandler(object):
 164            """Helper class for getting the returned SHA1."""
 165            def __init__ (self, parser):
 166                self.parser = parser
 167                self.sha1 = None
 168
 169            def __call__ (self, parser, obj, sha1, t, size, data):
 170                # FIXME: Many unused arguments. Could this be cheaper?
 171                assert parser == self.parser
 172                self.sha1 = sha1
 173
 174        handler = _ObjHandler(self)
 175        self.push(objspec, handler)
 176        self.process()
 177        return handler.sha1
 178
 179    def open_obj (self, objspec):
 180        """Return a file object wrapping the contents of a named object.
 181
 182        The caller is responsible for calling .close() on the returned
 183        file object.
 184
 185        Raise KeyError if 'objspec' does not exist in the repo.
 186
 187        """
 188        class _ObjHandler(object):
 189            """Helper class for parsing the returned git object."""
 190            def __init__ (self, parser):
 191                """Set up helper."""
 192                self.parser = parser
 193                self.contents = StringIO()
 194                self.err = None
 195
 196            def __call__ (self, parser, obj, sha1, t, size, data):
 197                """Git object callback (see GitObjectFetcher documentation)."""
 198                assert parser == self.parser
 199                if not sha1:  # Missing object
 200                    self.err = "Missing object '%s'" % obj
 201                else:
 202                    assert size == len(data)
 203                    self.contents.write(data)
 204
 205        handler = _ObjHandler(self)
 206        self.push(objspec, handler)
 207        self.process()
 208        if handler.err:
 209            raise KeyError(handler.err)
 210        handler.contents.seek(0)
 211        return handler.contents
 212
 213    def walk_tree (self, tree_objspec, callback, prefix = ""):
 214        """Recursively walk the given Git tree object.
 215
 216        Recursively walk all subtrees of the given tree object, and
 217        invoke the given callback passing three arguments:
 218        (path, mode, data) with the path, permission bits, and contents
 219        of all the blobs found in the entire tree structure.
 220
 221        """
 222        class _ObjHandler(object):
 223            """Helper class for walking a git tree structure."""
 224            def __init__ (self, parser, cb, path, mode = None):
 225                """Set up helper."""
 226                self.parser = parser
 227                self.cb = cb
 228                self.path = path
 229                self.mode = mode
 230                self.err = None
 231
 232            def parse_tree (self, treedata):
 233                """Parse tree object data, yield tree entries.
 234
 235                Each tree entry is a 3-tuple (mode, sha1, path)
 236
 237                self.path is prepended to all paths yielded
 238                from this method.
 239
 240                """
 241                while treedata:
 242                    mode = int(treedata[:6], 10)
 243                    # Turn 100xxx into xxx
 244                    if mode > 100000:
 245                        mode -= 100000
 246                    assert treedata[6] == " "
 247                    i = treedata.find("\0", 7)
 248                    assert i > 0
 249                    path = treedata[7:i]
 250                    sha1 = hexlify(treedata[i + 1: i + 21])
 251                    yield (mode, sha1, self.path + path)
 252                    treedata = treedata[i + 21:]
 253
 254            def __call__ (self, parser, obj, sha1, t, size, data):
 255                """Git object callback (see GitObjectFetcher documentation)."""
 256                assert parser == self.parser
 257                if not sha1:  # Missing object
 258                    self.err = "Missing object '%s'" % (obj)
 259                    return
 260                assert size == len(data)
 261                if t == "tree":
 262                    if self.path:
 263                        self.path += "/"
 264                    # Recurse into all blobs and subtrees
 265                    for m, s, p in self.parse_tree(data):
 266                        parser.push(s,
 267                                    self.__class__(self.parser, self.cb, p, m))
 268                elif t == "blob":
 269                    self.cb(self.path, self.mode, data)
 270                else:
 271                    raise ValueError("Unknown object type '%s'" % (t))
 272
 273        self.push(tree_objspec, _ObjHandler(self, callback, prefix))
 274        self.process()
 275
 276
 277class GitRefMap(object):
 278
 279    """Map Git ref names to the Git object names they currently point to.
 280
 281    Behaves like a dictionary of Git ref names -> Git object names.
 282
 283    """
 284
 285    def __init__ (self, obj_fetcher):
 286        """Create a new Git ref -> object map."""
 287        self.obj_fetcher = obj_fetcher
 288        self._cache = {}  # dict: refname -> objname
 289
 290    def _load (self, ref):
 291        """Retrieve the object currently bound to the given ref.
 292
 293        The name of the object pointed to by the given ref is stored
 294        into this mapping, and also returned.
 295
 296        """
 297        if ref not in self._cache:
 298            self._cache[ref] = self.obj_fetcher.get_sha1(ref)
 299        return self._cache[ref]
 300
 301    def __contains__ (self, refname):
 302        """Return True if the given refname is present in this cache."""
 303        return bool(self._load(refname))
 304
 305    def __getitem__ (self, refname):
 306        """Return the git object name pointed to by the given refname."""
 307        commit = self._load(refname)
 308        if commit is None:
 309            raise KeyError("Unknown ref '%s'" % (refname))
 310        return commit
 311
 312    def get (self, refname, default = None):
 313        """Return the git object name pointed to by the given refname."""
 314        commit = self._load(refname)
 315        if commit is None:
 316            return default
 317        return commit
 318
 319
 320class GitFICommit(object):
 321
 322    """Encapsulate the data in a Git fast-import commit command."""
 323
 324    SHA1RE = re.compile(r'^[0-9a-f]{40}$')
 325
 326    @classmethod
 327    def parse_mode (cls, mode):
 328        """Verify the given git file mode, and return it as a string."""
 329        assert mode in (644, 755, 100644, 100755, 120000)
 330        return "%i" % (mode)
 331
 332    @classmethod
 333    def parse_objname (cls, objname):
 334        """Return the given object name (or mark number) as a string."""
 335        if isinstance(objname, int):  # Object name is a mark number
 336            assert objname > 0
 337            return ":%i" % (objname)
 338
 339        # No existence check is done, only checks for valid format
 340        assert cls.SHA1RE.match(objname)  # Object name is valid SHA1
 341        return objname
 342
 343    @classmethod
 344    def quote_path (cls, path):
 345        """Return a quoted version of the given path."""
 346        path = path.replace("\\", "\\\\")
 347        path = path.replace("\n", "\\n")
 348        path = path.replace('"', '\\"')
 349        return '"%s"' % (path)
 350
 351    @classmethod
 352    def parse_path (cls, path):
 353        """Verify that the given path is valid, and quote it, if needed."""
 354        assert not isinstance(path, int)  # Cannot be a mark number
 355
 356        # These checks verify the rules on the fast-import man page
 357        assert not path.count("//")
 358        assert not path.endswith("/")
 359        assert not path.startswith("/")
 360        assert not path.count("/./")
 361        assert not path.count("/../")
 362        assert not path.endswith("/.")
 363        assert not path.endswith("/..")
 364        assert not path.startswith("./")
 365        assert not path.startswith("../")
 366
 367        if path.count('"') + path.count('\n') + path.count('\\'):
 368            return cls.quote_path(path)
 369        return path
 370
 371    def __init__ (self, name, email, timestamp, timezone, message):
 372        """Create a new Git fast-import commit, with the given metadata."""
 373        self.name = name
 374        self.email = email
 375        self.timestamp = timestamp
 376        self.timezone = timezone
 377        self.message = message
 378        self.pathops = []  # List of path operations in this commit
 379
 380    def modify (self, mode, blobname, path):
 381        """Add a file modification to this Git fast-import commit."""
 382        self.pathops.append(("M",
 383                             self.parse_mode(mode),
 384                             self.parse_objname(blobname),
 385                             self.parse_path(path)))
 386
 387    def delete (self, path):
 388        """Add a file deletion to this Git fast-import commit."""
 389        self.pathops.append(("D", self.parse_path(path)))
 390
 391    def copy (self, path, newpath):
 392        """Add a file copy to this Git fast-import commit."""
 393        self.pathops.append(("C",
 394                             self.parse_path(path),
 395                             self.parse_path(newpath)))
 396
 397    def rename (self, path, newpath):
 398        """Add a file rename to this Git fast-import commit."""
 399        self.pathops.append(("R",
 400                             self.parse_path(path),
 401                             self.parse_path(newpath)))
 402
 403    def note (self, blobname, commit):
 404        """Add a note object to this Git fast-import commit."""
 405        self.pathops.append(("N",
 406                             self.parse_objname(blobname),
 407                             self.parse_objname(commit)))
 408
 409    def deleteall (self):
 410        """Delete all files in this Git fast-import commit."""
 411        self.pathops.append("deleteall")
 412
 413
 414class TestGitFICommit(unittest.TestCase):
 415
 416    """GitFICommit selftests."""
 417
 418    def test_basic (self):
 419        """GitFICommit basic selftests."""
 420
 421        def expect_fail (method, data):
 422            """Verify that the method(data) raises an AssertionError."""
 423            try:
 424                method(data)
 425            except AssertionError:
 426                return
 427            raise AssertionError("Failed test for invalid data '%s(%s)'" %
 428                                 (method.__name__, repr(data)))
 429
 430    def test_parse_mode (self):
 431        """GitFICommit.parse_mode() selftests."""
 432        self.assertEqual(GitFICommit.parse_mode(644), "644")
 433        self.assertEqual(GitFICommit.parse_mode(755), "755")
 434        self.assertEqual(GitFICommit.parse_mode(100644), "100644")
 435        self.assertEqual(GitFICommit.parse_mode(100755), "100755")
 436        self.assertEqual(GitFICommit.parse_mode(120000), "120000")
 437        self.assertRaises(AssertionError, GitFICommit.parse_mode, 0)
 438        self.assertRaises(AssertionError, GitFICommit.parse_mode, 123)
 439        self.assertRaises(AssertionError, GitFICommit.parse_mode, 600)
 440        self.assertRaises(AssertionError, GitFICommit.parse_mode, "644")
 441        self.assertRaises(AssertionError, GitFICommit.parse_mode, "abc")
 442
 443    def test_parse_objname (self):
 444        """GitFICommit.parse_objname() selftests."""
 445        self.assertEqual(GitFICommit.parse_objname(1), ":1")
 446        self.assertRaises(AssertionError, GitFICommit.parse_objname, 0)
 447        self.assertRaises(AssertionError, GitFICommit.parse_objname, -1)
 448        self.assertEqual(GitFICommit.parse_objname("0123456789" * 4),
 449                         "0123456789" * 4)
 450        self.assertEqual(GitFICommit.parse_objname("2468abcdef" * 4),
 451                         "2468abcdef" * 4)
 452        self.assertRaises(AssertionError, GitFICommit.parse_objname,
 453                          "abcdefghij" * 4)
 454
 455    def test_parse_path (self):
 456        """GitFICommit.parse_path() selftests."""
 457        self.assertEqual(GitFICommit.parse_path("foo/bar"), "foo/bar")
 458        self.assertEqual(GitFICommit.parse_path("path/with\n and \" in it"),
 459                         '"path/with\\n and \\" in it"')
 460        self.assertRaises(AssertionError, GitFICommit.parse_path, 1)
 461        self.assertRaises(AssertionError, GitFICommit.parse_path, 0)
 462        self.assertRaises(AssertionError, GitFICommit.parse_path, -1)
 463        self.assertRaises(AssertionError, GitFICommit.parse_path, "foo//bar")
 464        self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/")
 465        self.assertRaises(AssertionError, GitFICommit.parse_path, "/foo/bar")
 466        self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/./bar")
 467        self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/../bar")
 468        self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/.")
 469        self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/..")
 470        self.assertRaises(AssertionError, GitFICommit.parse_path, "./foo/bar")
 471        self.assertRaises(AssertionError, GitFICommit.parse_path, "../foo/bar")
 472
 473
 474class GitFastImport(object):
 475
 476    """Encapsulate communication with git fast-import."""
 477
 478    def __init__ (self, f, obj_fetcher, last_mark = 0):
 479        """Set up self to communicate with a fast-import process through f."""
 480        self.f = f  # File object where fast-import stream is written
 481        self.obj_fetcher = obj_fetcher  # GitObjectFetcher instance
 482        self.next_mark = last_mark + 1  # Next mark number
 483        self.refs = set()  # Keep track of the refnames we've seen
 484
 485    def comment (self, s):
 486        """Write the given comment in the fast-import stream."""
 487        assert "\n" not in s, "Malformed comment: '%s'" % (s)
 488        self.f.write("# %s\n" % (s))
 489
 490    def commit (self, ref, commitdata):
 491        """Make a commit on the given ref, with the given GitFICommit.
 492
 493        Return the mark number identifying this commit.
 494
 495        """
 496        self.f.write("""\
 497commit %(ref)s
 498mark :%(mark)i
 499committer %(name)s <%(email)s> %(timestamp)i %(timezone)s
 500data %(msgLength)i
 501%(msg)s
 502""" % {
 503    'ref': ref,
 504    'mark': self.next_mark,
 505    'name': commitdata.name,
 506    'email': commitdata.email,
 507    'timestamp': commitdata.timestamp,
 508    'timezone': commitdata.timezone,
 509    'msgLength': len(commitdata.message),
 510    'msg': commitdata.message,
 511})
 512
 513        if ref not in self.refs:
 514            self.refs.add(ref)
 515            parent = ref + "^0"
 516            if self.obj_fetcher.get_sha1(parent):
 517                self.f.write("from %s\n" % (parent))
 518
 519        for op in commitdata.pathops:
 520            self.f.write(" ".join(op))
 521            self.f.write("\n")
 522        self.f.write("\n")
 523        retval = self.next_mark
 524        self.next_mark += 1
 525        return retval
 526
 527    def blob (self, data):
 528        """Import the given blob.
 529
 530        Return the mark number identifying this blob.
 531
 532        """
 533        self.f.write("blob\nmark :%i\ndata %i\n%s\n" %
 534                     (self.next_mark, len(data), data))
 535        retval = self.next_mark
 536        self.next_mark += 1
 537        return retval
 538
 539    def reset (self, ref, objname):
 540        """Reset the given ref to point at the given Git object."""
 541        self.f.write("reset %s\nfrom %s\n\n" %
 542                     (ref, GitFICommit.parse_objname(objname)))
 543        if ref not in self.refs:
 544            self.refs.add(ref)
 545
 546
 547class GitNotes(object):
 548
 549    """Encapsulate access to Git notes.
 550
 551    Simulates a dictionary of object name (SHA1) -> Git note mappings.
 552
 553    """
 554
 555    def __init__ (self, notes_ref, obj_fetcher):
 556        """Create a new Git notes interface, bound to the given notes ref."""
 557        self.notes_ref = notes_ref
 558        self.obj_fetcher = obj_fetcher  # Used to get objects from repo
 559        self.imports = []  # list: (objname, note data blob name) tuples
 560
 561    def __del__ (self):
 562        """Verify that self.commit_notes() was called before destruction."""
 563        if self.imports:
 564            error("Missing call to self.commit_notes().")
 565            error("%i notes are not committed!", len(self.imports))
 566
 567    def _load (self, objname):
 568        """Return the note data associated with the given git object.
 569
 570        The note data is returned in string form. If no note is found
 571        for the given object, None is returned.
 572
 573        """
 574        try:
 575            f = self.obj_fetcher.open_obj("%s:%s" % (self.notes_ref, objname))
 576            ret = f.read()
 577            f.close()
 578        except KeyError:
 579            ret = None
 580        return ret
 581
 582    def __getitem__ (self, objname):
 583        """Return the note contents associated with the given object.
 584
 585        Raise KeyError if given object has no associated note.
 586
 587        """
 588        blobdata = self._load(objname)
 589        if blobdata is None:
 590            raise KeyError("Object '%s' has no note" % (objname))
 591        return blobdata
 592
 593    def get (self, objname, default = None):
 594        """Return the note contents associated with the given object.
 595
 596        Return given default if given object has no associated note.
 597
 598        """
 599        blobdata = self._load(objname)
 600        if blobdata is None:
 601            return default
 602        return blobdata
 603
 604    def import_note (self, objname, data, gfi):
 605        """Tell git fast-import to store data as a note for objname.
 606
 607        This method uses the given GitFastImport object to create a
 608        blob containing the given note data.  Also an entry mapping the
 609        given object name to the created blob is stored until
 610        commit_notes() is called.
 611
 612        Note that this method only works if it is later followed by a
 613        call to self.commit_notes() (which produces the note commit
 614        that refers to the blob produced here).
 615
 616        """
 617        if not data.endswith("\n"):
 618            data += "\n"
 619        gfi.comment("Importing note for object %s" % (objname))
 620        mark = gfi.blob(data)
 621        self.imports.append((objname, mark))
 622
 623    def commit_notes (self, gfi, author, message):
 624        """Produce a git fast-import note commit for the imported notes.
 625
 626        This method uses the given GitFastImport object to create a
 627        commit on the notes ref, introducing the notes previously
 628        submitted to import_note().
 629
 630        """
 631        if not self.imports:
 632            return
 633        commitdata = GitFICommit(author[0], author[1],
 634                                 time.time(), "0000", message)
 635        for objname, blobname in self.imports:
 636            assert isinstance(objname, int) and objname > 0
 637            assert isinstance(blobname, int) and blobname > 0
 638            commitdata.note(blobname, objname)
 639        gfi.commit(self.notes_ref, commitdata)
 640        self.imports = []
 641
 642
 643class GitCachedNotes(GitNotes):
 644
 645    """Encapsulate access to Git notes (cached version).
 646
 647    Only use this class if no caching is done at a higher level.
 648
 649    Simulates a dictionary of object name (SHA1) -> Git note mappings.
 650
 651    """
 652
 653    def __init__ (self, notes_ref, obj_fetcher):
 654        """Set up a caching wrapper around GitNotes."""
 655        GitNotes.__init__(self, notes_ref, obj_fetcher)
 656        self._cache = {}  # Cache: object name -> note data
 657
 658    def __del__ (self):
 659        """Verify that GitNotes' destructor is called."""
 660        GitNotes.__del__(self)
 661
 662    def _load (self, objname):
 663        """Extend GitNotes._load() with a local objname -> note cache."""
 664        if objname not in self._cache:
 665            self._cache[objname] = GitNotes._load(self, objname)
 666        return self._cache[objname]
 667
 668    def import_note (self, objname, data, gfi):
 669        """Extend GitNotes.import_note() with a local objname -> note cache."""
 670        if not data.endswith("\n"):
 671            data += "\n"
 672        assert objname not in self._cache
 673        self._cache[objname] = data
 674        GitNotes.import_note(self, objname, data, gfi)
 675
 676
 677if __name__ == '__main__':
 678    unittest.main()