git-merge-fredrik.pyon commit Fix off-by-one error in git-merge (f88ed17)
   1#!/usr/bin/python
   2
   3import sys, math, random, os, re, signal, tempfile, stat, errno, traceback
   4from heapq import heappush, heappop
   5from sets import Set
   6
   7sys.path.append('@@GIT_PYTHON_PATH@@')
   8from gitMergeCommon import *
   9
  10alwaysWriteTree = False
  11
  12# The actual merge code
  13# ---------------------
  14
  15def merge(h1, h2, branch1Name, branch2Name, graph, callDepth=0):
  16    '''Merge the commits h1 and h2, return the resulting virtual
  17    commit object and a flag indicating the cleaness of the merge.'''
  18    assert(isinstance(h1, Commit) and isinstance(h2, Commit))
  19    assert(isinstance(graph, Graph))
  20
  21    def infoMsg(*args):
  22        sys.stdout.write('  '*callDepth)
  23        printList(args)
  24    infoMsg('Merging:')
  25    infoMsg(h1)
  26    infoMsg(h2)
  27    sys.stdout.flush()
  28
  29    ca = getCommonAncestors(graph, h1, h2)
  30    infoMsg('found', len(ca), 'common ancestor(s):')
  31    for x in ca:
  32        infoMsg(x)
  33    sys.stdout.flush()
  34
  35    Ms = ca[0]
  36    for h in ca[1:]:
  37        [Ms, ignore] = merge(Ms, h,
  38                             'Temporary shared merge branch 1',
  39                             'Temporary shared merge branch 2',
  40                             graph, callDepth+1)
  41        assert(isinstance(Ms, Commit))
  42
  43    if callDepth == 0:
  44        if len(ca) > 1:
  45            runProgram(['git-read-tree', h1.tree()])
  46            runProgram(['git-update-cache', '-q', '--refresh'])
  47        # Use the original index if we only have one common ancestor
  48        
  49        updateWd = True
  50        if alwaysWriteTree:
  51            cleanCache = True
  52        else:
  53            cleanCache = False
  54    else:
  55        runProgram(['git-read-tree', h1.tree()])
  56        updateWd = False
  57        cleanCache = True
  58
  59    [shaRes, clean] = mergeTrees(h1.tree(), h2.tree(), Ms.tree(),
  60                                 branch1Name, branch2Name,
  61                                 cleanCache, updateWd)
  62
  63    if clean or cleanCache:
  64        res = Commit(None, [h1, h2], tree=shaRes)
  65        graph.addNode(res)
  66    else:
  67        res = None
  68
  69    return [res, clean]
  70
  71getFilesRE = re.compile('([0-9]+) ([a-z0-9]+) ([0-9a-f]{40})\t(.*)')
  72def getFilesAndDirs(tree):
  73    files = Set()
  74    dirs = Set()
  75    out = runProgram(['git-ls-tree', '-r', '-z', tree])
  76    for l in out.split('\0'):
  77        m = getFilesRE.match(l)
  78        if m:
  79            if m.group(2) == 'tree':
  80                dirs.add(m.group(4))
  81            elif m.group(2) == 'blob':
  82                files.add(m.group(4))
  83
  84    return [files, dirs]
  85
  86class CacheEntry:
  87    def __init__(self, path):
  88        class Stage:
  89            def __init__(self):
  90                self.sha1 = None
  91                self.mode = None
  92        
  93        self.stages = [Stage(), Stage(), Stage()]
  94        self.path = path
  95
  96unmergedRE = re.compile('^([0-9]+) ([0-9a-f]{40}) ([1-3])\t(.*)$')
  97def unmergedCacheEntries():
  98    '''Create a dictionary mapping file names to CacheEntry
  99    objects. The dictionary contains one entry for every path with a
 100    non-zero stage entry.'''
 101
 102    lines = runProgram(['git-ls-files', '-z', '--unmerged']).split('\0')
 103    lines.pop()
 104
 105    res = {}
 106    for l in lines:
 107        m = unmergedRE.match(l)
 108        if m:
 109            mode = int(m.group(1), 8)
 110            sha1 = m.group(2)
 111            stage = int(m.group(3)) - 1
 112            path = m.group(4)
 113
 114            if res.has_key(path):
 115                e = res[path]
 116            else:
 117                e = CacheEntry(path)
 118                res[path] = e
 119                
 120            e.stages[stage].mode = mode
 121            e.stages[stage].sha1 = sha1
 122        else:
 123            die('Error: Merge program failed: Unexpected output from', \
 124                'git-ls-files:', l)
 125    return res
 126
 127def mergeTrees(head, merge, common, branch1Name, branch2Name,
 128               cleanCache, updateWd):
 129    '''Merge the trees 'head' and 'merge' with the common ancestor
 130    'common'. The name of the head branch is 'branch1Name' and the name of
 131    the merge branch is 'branch2Name'. Return a tuple (tree, cleanMerge)
 132    where tree is the resulting tree and cleanMerge is True iff the
 133    merge was clean.'''
 134    
 135    assert(isSha(head) and isSha(merge) and isSha(common))
 136
 137    if common == merge:
 138        print 'Already uptodate!'
 139        return [head, True]
 140
 141    if updateWd:
 142        updateArg = '-u'
 143    else:
 144        updateArg = '-i'
 145    runProgram(['git-read-tree', updateArg, '-m', common, head, merge])
 146    cleanMerge = True
 147
 148    [tree, code] = runProgram('git-write-tree', returnCode=True)
 149    tree = tree.rstrip()
 150    if code != 0:
 151        [files, dirs] = getFilesAndDirs(head)
 152        [filesM, dirsM] = getFilesAndDirs(merge)
 153        files.union_update(filesM)
 154        dirs.union_update(dirsM)
 155        
 156        cleanMerge = True
 157        entries = unmergedCacheEntries()
 158        for name in entries:
 159            if not processEntry(entries[name], branch1Name, branch2Name,
 160                                files, dirs, cleanCache, updateWd):
 161                cleanMerge = False
 162                
 163        if cleanMerge or cleanCache:
 164            tree = runProgram('git-write-tree').rstrip()
 165        else:
 166            tree = None
 167    else:
 168        cleanMerge = True
 169
 170    return [tree, cleanMerge]
 171
 172def processEntry(entry, branch1Name, branch2Name, files, dirs,
 173                 cleanCache, updateWd):
 174    '''Merge one cache entry. 'files' is a Set with the files in both of
 175    the heads that we are going to merge. 'dirs' contains the
 176    corresponding data for directories. If 'cleanCache' is True no
 177    non-zero stages will be left in the cache for the path
 178    corresponding to the entry 'entry'.'''
 179
 180# cleanCache == True  => Don't leave any non-stage 0 entries in the cache.
 181#               False => Leave unmerged entries
 182
 183# updateWd  == True  => Update the working directory to correspond to the cache
 184#              False => Leave the working directory unchanged
 185
 186# clean     == True  => non-conflict case
 187#              False => conflict case
 188
 189# If cleanCache == False then the cache shouldn't be updated if clean == False
 190
 191    def updateFile(clean, sha, mode, path):
 192        if cleanCache or (not cleanCache and clean):
 193            runProgram(['git-update-cache', '--add', '--cacheinfo',
 194                        '0%o' % mode, sha, path])
 195
 196        if updateWd:
 197            prog = ['git-cat-file', 'blob', sha]
 198            if stat.S_ISREG(mode):
 199                try:
 200                    os.unlink(path)
 201                except OSError:
 202                    pass
 203                if mode & 0100:
 204                    mode = 0777
 205                else:
 206                    mode = 0666
 207                fd = os.open(path, os.O_WRONLY | os.O_TRUNC | os.O_CREAT, mode)
 208                proc = subprocess.Popen(prog, stdout=fd)
 209                proc.wait()
 210                os.close(fd)
 211            elif stat.S_ISLNK(mode):
 212                linkTarget = runProgram(prog)
 213                os.symlink(linkTarget, path)
 214            else:
 215                assert(False)
 216            runProgram(['git-update-cache', '--', path])
 217
 218    def removeFile(clean, path):
 219        if cleanCache or (not cleanCache and clean):
 220            runProgram(['git-update-cache', '--force-remove', '--', path])
 221
 222        if updateWd:
 223            try:
 224                os.unlink(path)
 225            except OSError, e:
 226                if e.errno != errno.ENOENT and e.errno != errno.EISDIR:
 227                    raise
 228
 229    def uniquePath(path, branch):
 230        newPath = path + '_' + branch
 231        suffix = 0
 232        while newPath in files or newPath in dirs:
 233            suffix += 1
 234            newPath = path + '_' + branch + '_' + str(suffix)
 235        files.add(newPath)
 236        return newPath
 237
 238    debug('processing', entry.path, 'clean cache:', cleanCache,
 239          'wd:', updateWd)
 240
 241    cleanMerge = True
 242
 243    path = entry.path
 244    oSha = entry.stages[0].sha1
 245    oMode = entry.stages[0].mode
 246    aSha = entry.stages[1].sha1
 247    aMode = entry.stages[1].mode
 248    bSha = entry.stages[2].sha1
 249    bMode = entry.stages[2].mode
 250
 251    assert(oSha == None or isSha(oSha))
 252    assert(aSha == None or isSha(aSha))
 253    assert(bSha == None or isSha(bSha))
 254
 255    assert(oMode == None or type(oMode) is int)
 256    assert(aMode == None or type(aMode) is int)
 257    assert(bMode == None or type(bMode) is int)
 258
 259    if (oSha and (not aSha or not bSha)):
 260    #
 261    # Case A: Deleted in one
 262    #
 263        if (not aSha     and not bSha) or \
 264           (aSha == oSha and not bSha) or \
 265           (not aSha     and bSha == oSha):
 266    # Deleted in both or deleted in one and unchanged in the other
 267            if aSha:
 268                print 'Removing ' + path
 269            removeFile(True, path)
 270        else:
 271    # Deleted in one and changed in the other
 272            cleanMerge = False
 273            if not aSha:
 274                print 'CONFLICT (del/mod): "' + path + '" deleted in', \
 275                      branch1Name, 'and modified in', branch2Name, \
 276                      '. Version', branch2Name, ' of "' + path + \
 277                      '" left in tree'
 278                mode = bMode
 279                sha = bSha
 280            else:
 281                print 'CONFLICT (mod/del): "' + path + '" deleted in', \
 282                      branch2Name, 'and modified in', branch1Name + \
 283                      '. Version', branch1Name, 'of "' + path + \
 284                      '" left in tree'
 285                mode = aMode
 286                sha = aSha
 287
 288            updateFile(False, sha, mode, path)
 289    
 290    elif (not oSha and aSha     and not bSha) or \
 291         (not oSha and not aSha and bSha):
 292    #
 293    # Case B: Added in one.
 294    #
 295        if aSha:
 296            addBranch = branch1Name
 297            otherBranch = branch2Name
 298            mode = aMode
 299            sha = aSha
 300            conf = 'file/dir'
 301        else:
 302            addBranch = branch2Name
 303            otherBranch = branch1Name
 304            mode = bMode
 305            sha = bSha
 306            conf = 'dir/file'
 307    
 308        if path in dirs:
 309            cleanMerge = False
 310            newPath = uniquePath(path, addBranch)
 311            print 'CONFLICT (' + conf + \
 312                  '): There is a directory with name "' + path + '" in', \
 313                  otherBranch + '. Adding "' + path + '" as "' + newPath + '"'
 314
 315            removeFile(False, path)
 316            path = newPath
 317        else:
 318            print 'Adding "' + path + '"'
 319
 320        updateFile(True, sha, mode, path)
 321    
 322    elif not oSha and aSha and bSha:
 323    #
 324    # Case C: Added in both (check for same permissions).
 325    #
 326        if aSha == bSha:
 327            if aMode != bMode:
 328                cleanMerge = False
 329                print 'CONFLICT: File "' + path + \
 330                      '" added identically in both branches,'
 331                print 'CONFLICT: but permissions conflict', '0%o' % aMode, \
 332                      '->', '0%o' % bMode
 333                print 'CONFLICT: adding with permission:', '0%o' % aMode
 334
 335                updateFile(False, aSha, aMode, path)
 336            else:
 337                # This case is handled by git-read-tree
 338                assert(False)
 339        else:
 340            cleanMerge = False
 341            newPath1 = uniquePath(path, branch1Name)
 342            newPath2 = uniquePath(path, branch2Name)
 343            print 'CONFLICT (add/add): File "' + path + \
 344                  '" added non-identically in both branches.', \
 345                  'Adding "' + newPath1 + '" and "' + newPath2 + '" instead.'
 346            removeFile(False, path)
 347            updateFile(False, aSha, aMode, newPath1)
 348            updateFile(False, bSha, bMode, newPath2)
 349
 350    elif oSha and aSha and bSha:
 351    #
 352    # case D: Modified in both, but differently.
 353    #
 354        print 'Auto-merging', path 
 355        orig = runProgram(['git-unpack-file', oSha]).rstrip()
 356        src1 = runProgram(['git-unpack-file', aSha]).rstrip()
 357        src2 = runProgram(['git-unpack-file', bSha]).rstrip()
 358        [out, ret] = runProgram(['merge',
 359                                 '-L', branch1Name + '/' + path,
 360                                 '-L', 'orig/' + path,
 361                                 '-L', branch2Name + '/' + path,
 362                                 src1, orig, src2], returnCode=True)
 363
 364        if aMode == oMode:
 365            mode = bMode
 366        else:
 367            mode = aMode
 368
 369        sha = runProgram(['git-hash-object', '-t', 'blob', '-w',
 370                          src1]).rstrip()
 371
 372        if ret != 0:
 373            cleanMerge = False
 374            print 'CONFLICT (content): Merge conflict in "' + path + '".'
 375            updateFile(False, sha, mode, path)
 376        else:
 377            updateFile(True, sha, mode, path)
 378
 379        os.unlink(orig)
 380        os.unlink(src1)
 381        os.unlink(src2)
 382    else:
 383        die("ERROR: Fatal merge failure, shouldn't happen.")
 384
 385    return cleanMerge
 386
 387def usage():
 388    die('Usage:', sys.argv[0], ' <base>... -- <head> <remote>..')
 389
 390# main entry point as merge strategy module
 391# The first parameters up to -- are merge bases, and the rest are heads.
 392# This strategy module figures out merge bases itself, so we only
 393# get heads.
 394
 395if len(sys.argv) < 4:
 396    usage()
 397
 398for nextArg in xrange(1, len(sys.argv)):
 399    if sys.argv[nextArg] == '--':
 400        if len(sys.argv) != nextArg + 3:
 401            die('Not handling anything other than two heads merge.')
 402        try:
 403            h1 = firstBranch = sys.argv[nextArg + 1]
 404            h2 = secondBranch = sys.argv[nextArg + 2]
 405        except IndexError:
 406            usage()
 407        break
 408
 409print 'Merging', h1, 'with', h2
 410
 411try:
 412    h1 = runProgram(['git-rev-parse', '--verify', h1 + '^0']).rstrip()
 413    h2 = runProgram(['git-rev-parse', '--verify', h2 + '^0']).rstrip()
 414
 415    graph = buildGraph([h1, h2])
 416
 417    [res, clean] = merge(graph.shaMap[h1], graph.shaMap[h2],
 418                         firstBranch, secondBranch, graph)
 419
 420    print ''
 421except:
 422    traceback.print_exc(None, sys.stderr)
 423    sys.exit(2)
 424
 425if clean:
 426    sys.exit(0)
 427else:
 428    print 'Automatic merge failed, fix up by hand'
 429    sys.exit(1)