1#!/usr/bin/env python 2 3"""Functionality for interacting with Git repositories. 4 5This module provides classes for interfacing with a Git repository. 6""" 7 8import os 9import re 10import time 11from binascii import hexlify 12from cStringIO import StringIO 13import unittest 14 15from git_remote_helpers.util import debug, error, die, start_command, run_command 16 17 18defget_git_dir(): 19"""Return the path to the GIT_DIR for this repo.""" 20 args = ("git","rev-parse","--git-dir") 21 exit_code, output, errors =run_command(args) 22if exit_code: 23die("Failed to retrieve git dir") 24assert not errors 25return output.strip() 26 27 28defparse_git_config(): 29"""Return a dict containing the parsed version of 'git config -l'.""" 30 exit_code, output, errors =run_command(("git","config","-z","-l")) 31if exit_code: 32die("Failed to retrieve git configuration") 33assert not errors 34returndict([e.split('\n',1)for e in output.split("\0")if e]) 35 36 37defgit_config_bool(value): 38"""Convert the given git config string value to True or False. 39 40 Raise ValueError if the given string was not recognized as a 41 boolean value. 42 43 """ 44 norm_value =str(value).strip().lower() 45if norm_value in("true","1","yes","on",""): 46return True 47if norm_value in("false","0","no","off","none"): 48return False 49raiseValueError("Failed to parse '%s' into a boolean value"% (value)) 50 51 52defvalid_git_ref(ref_name): 53"""Return True iff the given ref name is a valid git ref name.""" 54# The following is a reimplementation of the git check-ref-format 55# command. The rules were derived from the git check-ref-format(1) 56# manual page. This code should be replaced by a call to 57# check_ref_format() in the git library, when such is available. 58if ref_name.endswith('/')or \ 59 ref_name.startswith('.')or \ 60 ref_name.count('/.')or \ 61 ref_name.count('..')or \ 62 ref_name.endswith('.lock'): 63return False 64for c in ref_name: 65iford(c) <0x20orord(c) ==0x7for c in" ~^:?*[": 66return False 67return True 68 69 70classGitObjectFetcher(object): 71 72"""Provide parsed access to 'git cat-file --batch'. 73 74 This provides a read-only interface to the Git object database. 75 76 """ 77 78def__init__(self): 79"""Initiate a 'git cat-file --batch' session.""" 80 self.queue = []# List of object names to be submitted 81 self.in_transit =None# Object name currently in transit 82 83# 'git cat-file --batch' produces binary output which is likely 84# to be corrupted by the default "rU"-mode pipe opened by 85# start_command. (Mode == "rU" does universal new-line 86# conversion, which mangles carriage returns.) Therefore, we 87# open an explicitly binary-safe pipe for transferring the 88# output from 'git cat-file --batch'. 89 pipe_r_fd, pipe_w_fd = os.pipe() 90 pipe_r = os.fdopen(pipe_r_fd,"rb") 91 pipe_w = os.fdopen(pipe_w_fd,"wb") 92 self.proc =start_command(("git","cat-file","--batch"), 93 stdout = pipe_w) 94 self.f = pipe_r 95 96def__del__(self): 97"""Verify completed communication with 'git cat-file --batch'.""" 98assert not self.queue 99assert self.in_transit is None 100 self.proc.stdin.close() 101assert self.proc.wait() ==0# Zero exit code 102assert self.f.read() ==""# No remaining output 103 104def_submit_next_object(self): 105"""Submit queue items to the 'git cat-file --batch' process. 106 107 If there are items in the queue, and there is currently no item 108 currently in 'transit', then pop the first item off the queue, 109 and submit it. 110 111 """ 112if self.queue and self.in_transit is None: 113 self.in_transit = self.queue.pop(0) 114print>> self.proc.stdin, self.in_transit[0] 115 116defpush(self, obj, callback): 117"""Push the given object name onto the queue. 118 119 The given callback function will at some point in the future 120 be called exactly once with the following arguments: 121 - self - this GitObjectFetcher instance 122 - obj - the object name provided to push() 123 - sha1 - the SHA1 of the object, if 'None' obj is missing 124 - t - the type of the object (tag/commit/tree/blob) 125 - size - the size of the object in bytes 126 - data - the object contents 127 128 """ 129 self.queue.append((obj, callback)) 130 self._submit_next_object()# (Re)start queue processing 131 132defprocess_next_entry(self): 133"""Read the next entry off the queue and invoke callback.""" 134 obj, cb = self.in_transit 135 self.in_transit =None 136 header = self.f.readline() 137if header =="%smissing\n"% (obj): 138cb(self, obj,None,None,None,None) 139return 140 sha1, t, size = header.split(" ") 141assertlen(sha1) ==40 142assert t in("tag","commit","tree","blob") 143assert size.endswith("\n") 144 size =int(size.strip()) 145 data = self.f.read(size) 146assert self.f.read(1) =="\n" 147cb(self, obj, sha1, t, size, data) 148 self._submit_next_object() 149 150defprocess(self): 151"""Process the current queue until empty.""" 152while self.in_transit is not None: 153 self.process_next_entry() 154 155# High-level convenience methods: 156 157defget_sha1(self, objspec): 158"""Return the SHA1 of the object specified by 'objspec'. 159 160 Return None if 'objspec' does not specify an existing object. 161 162 """ 163class_ObjHandler(object): 164"""Helper class for getting the returned SHA1.""" 165def__init__(self, parser): 166 self.parser = parser 167 self.sha1 =None 168 169def__call__(self, parser, obj, sha1, t, size, data): 170# FIXME: Many unused arguments. Could this be cheaper? 171assert parser == self.parser 172 self.sha1 = sha1 173 174 handler =_ObjHandler(self) 175 self.push(objspec, handler) 176 self.process() 177return handler.sha1 178 179defopen_obj(self, objspec): 180"""Return a file object wrapping the contents of a named object. 181 182 The caller is responsible for calling .close() on the returned 183 file object. 184 185 Raise KeyError if 'objspec' does not exist in the repo. 186 187 """ 188class_ObjHandler(object): 189"""Helper class for parsing the returned git object.""" 190def__init__(self, parser): 191"""Set up helper.""" 192 self.parser = parser 193 self.contents =StringIO() 194 self.err =None 195 196def__call__(self, parser, obj, sha1, t, size, data): 197"""Git object callback (see GitObjectFetcher documentation).""" 198assert parser == self.parser 199if not sha1:# Missing object 200 self.err ="Missing object '%s'"% obj 201else: 202assert size ==len(data) 203 self.contents.write(data) 204 205 handler =_ObjHandler(self) 206 self.push(objspec, handler) 207 self.process() 208if handler.err: 209raiseKeyError(handler.err) 210 handler.contents.seek(0) 211return handler.contents 212 213defwalk_tree(self, tree_objspec, callback, prefix =""): 214"""Recursively walk the given Git tree object. 215 216 Recursively walk all subtrees of the given tree object, and 217 invoke the given callback passing three arguments: 218 (path, mode, data) with the path, permission bits, and contents 219 of all the blobs found in the entire tree structure. 220 221 """ 222class_ObjHandler(object): 223"""Helper class for walking a git tree structure.""" 224def__init__(self, parser, cb, path, mode =None): 225"""Set up helper.""" 226 self.parser = parser 227 self.cb = cb 228 self.path = path 229 self.mode = mode 230 self.err =None 231 232defparse_tree(self, treedata): 233"""Parse tree object data, yield tree entries. 234 235 Each tree entry is a 3-tuple (mode, sha1, path) 236 237 self.path is prepended to all paths yielded 238 from this method. 239 240 """ 241while treedata: 242 mode =int(treedata[:6],10) 243# Turn 100xxx into xxx 244if mode >100000: 245 mode -=100000 246assert treedata[6] ==" " 247 i = treedata.find("\0",7) 248assert i >0 249 path = treedata[7:i] 250 sha1 =hexlify(treedata[i +1: i +21]) 251yield(mode, sha1, self.path + path) 252 treedata = treedata[i +21:] 253 254def__call__(self, parser, obj, sha1, t, size, data): 255"""Git object callback (see GitObjectFetcher documentation).""" 256assert parser == self.parser 257if not sha1:# Missing object 258 self.err ="Missing object '%s'"% (obj) 259return 260assert size ==len(data) 261if t =="tree": 262if self.path: 263 self.path +="/" 264# Recurse into all blobs and subtrees 265for m, s, p in self.parse_tree(data): 266 parser.push(s, 267 self.__class__(self.parser, self.cb, p, m)) 268elif t =="blob": 269 self.cb(self.path, self.mode, data) 270else: 271raiseValueError("Unknown object type '%s'"% (t)) 272 273 self.push(tree_objspec,_ObjHandler(self, callback, prefix)) 274 self.process() 275 276 277classGitRefMap(object): 278 279"""Map Git ref names to the Git object names they currently point to. 280 281 Behaves like a dictionary of Git ref names -> Git object names. 282 283 """ 284 285def__init__(self, obj_fetcher): 286"""Create a new Git ref -> object map.""" 287 self.obj_fetcher = obj_fetcher 288 self._cache = {}# dict: refname -> objname 289 290def_load(self, ref): 291"""Retrieve the object currently bound to the given ref. 292 293 The name of the object pointed to by the given ref is stored 294 into this mapping, and also returned. 295 296 """ 297if ref not in self._cache: 298 self._cache[ref] = self.obj_fetcher.get_sha1(ref) 299return self._cache[ref] 300 301def__contains__(self, refname): 302"""Return True if the given refname is present in this cache.""" 303returnbool(self._load(refname)) 304 305def__getitem__(self, refname): 306"""Return the git object name pointed to by the given refname.""" 307 commit = self._load(refname) 308if commit is None: 309raiseKeyError("Unknown ref '%s'"% (refname)) 310return commit 311 312defget(self, refname, default =None): 313"""Return the git object name pointed to by the given refname.""" 314 commit = self._load(refname) 315if commit is None: 316return default 317return commit 318 319 320classGitFICommit(object): 321 322"""Encapsulate the data in a Git fast-import commit command.""" 323 324 SHA1RE = re.compile(r'^[0-9a-f]{40}$') 325 326@classmethod 327defparse_mode(cls, mode): 328"""Verify the given git file mode, and return it as a string.""" 329assert mode in(644,755,100644,100755,120000) 330return"%i"% (mode) 331 332@classmethod 333defparse_objname(cls, objname): 334"""Return the given object name (or mark number) as a string.""" 335ifisinstance(objname,int):# Object name is a mark number 336assert objname >0 337return":%i"% (objname) 338 339# No existence check is done, only checks for valid format 340assert cls.SHA1RE.match(objname)# Object name is valid SHA1 341return objname 342 343@classmethod 344defquote_path(cls, path): 345"""Return a quoted version of the given path.""" 346 path = path.replace("\\","\\\\") 347 path = path.replace("\n","\\n") 348 path = path.replace('"','\\"') 349return'"%s"'% (path) 350 351@classmethod 352defparse_path(cls, path): 353"""Verify that the given path is valid, and quote it, if needed.""" 354assert notisinstance(path,int)# Cannot be a mark number 355 356# These checks verify the rules on the fast-import man page 357assert not path.count("//") 358assert not path.endswith("/") 359assert not path.startswith("/") 360assert not path.count("/./") 361assert not path.count("/../") 362assert not path.endswith("/.") 363assert not path.endswith("/..") 364assert not path.startswith("./") 365assert not path.startswith("../") 366 367if path.count('"') + path.count('\n') + path.count('\\'): 368return cls.quote_path(path) 369return path 370 371def__init__(self, name, email, timestamp, timezone, message): 372"""Create a new Git fast-import commit, with the given metadata.""" 373 self.name = name 374 self.email = email 375 self.timestamp = timestamp 376 self.timezone = timezone 377 self.message = message 378 self.pathops = []# List of path operations in this commit 379 380defmodify(self, mode, blobname, path): 381"""Add a file modification to this Git fast-import commit.""" 382 self.pathops.append(("M", 383 self.parse_mode(mode), 384 self.parse_objname(blobname), 385 self.parse_path(path))) 386 387defdelete(self, path): 388"""Add a file deletion to this Git fast-import commit.""" 389 self.pathops.append(("D", self.parse_path(path))) 390 391defcopy(self, path, newpath): 392"""Add a file copy to this Git fast-import commit.""" 393 self.pathops.append(("C", 394 self.parse_path(path), 395 self.parse_path(newpath))) 396 397defrename(self, path, newpath): 398"""Add a file rename to this Git fast-import commit.""" 399 self.pathops.append(("R", 400 self.parse_path(path), 401 self.parse_path(newpath))) 402 403defnote(self, blobname, commit): 404"""Add a note object to this Git fast-import commit.""" 405 self.pathops.append(("N", 406 self.parse_objname(blobname), 407 self.parse_objname(commit))) 408 409defdeleteall(self): 410"""Delete all files in this Git fast-import commit.""" 411 self.pathops.append("deleteall") 412 413 414classTestGitFICommit(unittest.TestCase): 415 416"""GitFICommit selftests.""" 417 418deftest_basic(self): 419"""GitFICommit basic selftests.""" 420 421defexpect_fail(method, data): 422"""Verify that the method(data) raises an AssertionError.""" 423try: 424method(data) 425exceptAssertionError: 426return 427raiseAssertionError("Failed test for invalid data '%s(%s)'"% 428(method.__name__,repr(data))) 429 430deftest_parse_mode(self): 431"""GitFICommit.parse_mode() selftests.""" 432 self.assertEqual(GitFICommit.parse_mode(644),"644") 433 self.assertEqual(GitFICommit.parse_mode(755),"755") 434 self.assertEqual(GitFICommit.parse_mode(100644),"100644") 435 self.assertEqual(GitFICommit.parse_mode(100755),"100755") 436 self.assertEqual(GitFICommit.parse_mode(120000),"120000") 437 self.assertRaises(AssertionError, GitFICommit.parse_mode,0) 438 self.assertRaises(AssertionError, GitFICommit.parse_mode,123) 439 self.assertRaises(AssertionError, GitFICommit.parse_mode,600) 440 self.assertRaises(AssertionError, GitFICommit.parse_mode,"644") 441 self.assertRaises(AssertionError, GitFICommit.parse_mode,"abc") 442 443deftest_parse_objname(self): 444"""GitFICommit.parse_objname() selftests.""" 445 self.assertEqual(GitFICommit.parse_objname(1),":1") 446 self.assertRaises(AssertionError, GitFICommit.parse_objname,0) 447 self.assertRaises(AssertionError, GitFICommit.parse_objname, -1) 448 self.assertEqual(GitFICommit.parse_objname("0123456789"*4), 449"0123456789"*4) 450 self.assertEqual(GitFICommit.parse_objname("2468abcdef"*4), 451"2468abcdef"*4) 452 self.assertRaises(AssertionError, GitFICommit.parse_objname, 453"abcdefghij"*4) 454 455deftest_parse_path(self): 456"""GitFICommit.parse_path() selftests.""" 457 self.assertEqual(GitFICommit.parse_path("foo/bar"),"foo/bar") 458 self.assertEqual(GitFICommit.parse_path("path/with\nand\"in it"), 459'"path/with\\n and\\" in it"') 460 self.assertRaises(AssertionError, GitFICommit.parse_path,1) 461 self.assertRaises(AssertionError, GitFICommit.parse_path,0) 462 self.assertRaises(AssertionError, GitFICommit.parse_path, -1) 463 self.assertRaises(AssertionError, GitFICommit.parse_path,"foo//bar") 464 self.assertRaises(AssertionError, GitFICommit.parse_path,"foo/bar/") 465 self.assertRaises(AssertionError, GitFICommit.parse_path,"/foo/bar") 466 self.assertRaises(AssertionError, GitFICommit.parse_path,"foo/./bar") 467 self.assertRaises(AssertionError, GitFICommit.parse_path,"foo/../bar") 468 self.assertRaises(AssertionError, GitFICommit.parse_path,"foo/bar/.") 469 self.assertRaises(AssertionError, GitFICommit.parse_path,"foo/bar/..") 470 self.assertRaises(AssertionError, GitFICommit.parse_path,"./foo/bar") 471 self.assertRaises(AssertionError, GitFICommit.parse_path,"../foo/bar") 472 473 474classGitFastImport(object): 475 476"""Encapsulate communication with git fast-import.""" 477 478def__init__(self, f, obj_fetcher, last_mark =0): 479"""Set up self to communicate with a fast-import process through f.""" 480 self.f = f # File object where fast-import stream is written 481 self.obj_fetcher = obj_fetcher # GitObjectFetcher instance 482 self.next_mark = last_mark +1# Next mark number 483 self.refs =set()# Keep track of the refnames we've seen 484 485defcomment(self, s): 486"""Write the given comment in the fast-import stream.""" 487assert"\n"not in s,"Malformed comment: '%s'"% (s) 488 self.f.write("#%s\n"% (s)) 489 490defcommit(self, ref, commitdata): 491"""Make a commit on the given ref, with the given GitFICommit. 492 493 Return the mark number identifying this commit. 494 495 """ 496 self.f.write("""\ 497commit%(ref)s 498mark :%(mark)i 499committer%(name)s<%(email)s>%(timestamp)i%(timezone)s 500data%(msgLength)i 501%(msg)s 502"""% { 503'ref': ref, 504'mark': self.next_mark, 505'name': commitdata.name, 506'email': commitdata.email, 507'timestamp': commitdata.timestamp, 508'timezone': commitdata.timezone, 509'msgLength':len(commitdata.message), 510'msg': commitdata.message, 511}) 512 513if ref not in self.refs: 514 self.refs.add(ref) 515 parent = ref +"^0" 516if self.obj_fetcher.get_sha1(parent): 517 self.f.write("from%s\n"% (parent)) 518 519for op in commitdata.pathops: 520 self.f.write(" ".join(op)) 521 self.f.write("\n") 522 self.f.write("\n") 523 retval = self.next_mark 524 self.next_mark +=1 525return retval 526 527defblob(self, data): 528"""Import the given blob. 529 530 Return the mark number identifying this blob. 531 532 """ 533 self.f.write("blob\nmark :%i\ndata%i\n%s\n"% 534(self.next_mark,len(data), data)) 535 retval = self.next_mark 536 self.next_mark +=1 537return retval 538 539defreset(self, ref, objname): 540"""Reset the given ref to point at the given Git object.""" 541 self.f.write("reset%s\nfrom%s\n\n"% 542(ref, GitFICommit.parse_objname(objname))) 543if ref not in self.refs: 544 self.refs.add(ref) 545 546 547classGitNotes(object): 548 549"""Encapsulate access to Git notes. 550 551 Simulates a dictionary of object name (SHA1) -> Git note mappings. 552 553 """ 554 555def__init__(self, notes_ref, obj_fetcher): 556"""Create a new Git notes interface, bound to the given notes ref.""" 557 self.notes_ref = notes_ref 558 self.obj_fetcher = obj_fetcher # Used to get objects from repo 559 self.imports = []# list: (objname, note data blob name) tuples 560 561def__del__(self): 562"""Verify that self.commit_notes() was called before destruction.""" 563if self.imports: 564error("Missing call to self.commit_notes().") 565error("%inotes are not committed!",len(self.imports)) 566 567def_load(self, objname): 568"""Return the note data associated with the given git object. 569 570 The note data is returned in string form. If no note is found 571 for the given object, None is returned. 572 573 """ 574try: 575 f = self.obj_fetcher.open_obj("%s:%s"% (self.notes_ref, objname)) 576 ret = f.read() 577 f.close() 578exceptKeyError: 579 ret =None 580return ret 581 582def__getitem__(self, objname): 583"""Return the note contents associated with the given object. 584 585 Raise KeyError if given object has no associated note. 586 587 """ 588 blobdata = self._load(objname) 589if blobdata is None: 590raiseKeyError("Object '%s' has no note"% (objname)) 591return blobdata 592 593defget(self, objname, default =None): 594"""Return the note contents associated with the given object. 595 596 Return given default if given object has no associated note. 597 598 """ 599 blobdata = self._load(objname) 600if blobdata is None: 601return default 602return blobdata 603 604defimport_note(self, objname, data, gfi): 605"""Tell git fast-import to store data as a note for objname. 606 607 This method uses the given GitFastImport object to create a 608 blob containing the given note data. Also an entry mapping the 609 given object name to the created blob is stored until 610 commit_notes() is called. 611 612 Note that this method only works if it is later followed by a 613 call to self.commit_notes() (which produces the note commit 614 that refers to the blob produced here). 615 616 """ 617if not data.endswith("\n"): 618 data +="\n" 619 gfi.comment("Importing note for object%s"% (objname)) 620 mark = gfi.blob(data) 621 self.imports.append((objname, mark)) 622 623defcommit_notes(self, gfi, author, message): 624"""Produce a git fast-import note commit for the imported notes. 625 626 This method uses the given GitFastImport object to create a 627 commit on the notes ref, introducing the notes previously 628 submitted to import_note(). 629 630 """ 631if not self.imports: 632return 633 commitdata =GitFICommit(author[0], author[1], 634 time.time(),"0000", message) 635for objname, blobname in self.imports: 636assertisinstance(objname,int)and objname >0 637assertisinstance(blobname,int)and blobname >0 638 commitdata.note(blobname, objname) 639 gfi.commit(self.notes_ref, commitdata) 640 self.imports = [] 641 642 643classGitCachedNotes(GitNotes): 644 645"""Encapsulate access to Git notes (cached version). 646 647 Only use this class if no caching is done at a higher level. 648 649 Simulates a dictionary of object name (SHA1) -> Git note mappings. 650 651 """ 652 653def__init__(self, notes_ref, obj_fetcher): 654"""Set up a caching wrapper around GitNotes.""" 655 GitNotes.__init__(self, notes_ref, obj_fetcher) 656 self._cache = {}# Cache: object name -> note data 657 658def__del__(self): 659"""Verify that GitNotes' destructor is called.""" 660 GitNotes.__del__(self) 661 662def_load(self, objname): 663"""Extend GitNotes._load() with a local objname -> note cache.""" 664if objname not in self._cache: 665 self._cache[objname] = GitNotes._load(self, objname) 666return self._cache[objname] 667 668defimport_note(self, objname, data, gfi): 669"""Extend GitNotes.import_note() with a local objname -> note cache.""" 670if not data.endswith("\n"): 671 data +="\n" 672assert objname not in self._cache 673 self._cache[objname] = data 674 GitNotes.import_note(self, objname, data, gfi) 675 676 677if __name__ =='__main__': 678 unittest.main()