match-trees.con commit column: add columnar layout (077539d)
   1#include "cache.h"
   2#include "tree.h"
   3#include "tree-walk.h"
   4
   5static int score_missing(unsigned mode, const char *path)
   6{
   7        int score;
   8
   9        if (S_ISDIR(mode))
  10                score = -1000;
  11        else if (S_ISLNK(mode))
  12                score = -500;
  13        else
  14                score = -50;
  15        return score;
  16}
  17
  18static int score_differs(unsigned mode1, unsigned mode2, const char *path)
  19{
  20        int score;
  21
  22        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  23                score = -100;
  24        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  25                score = -50;
  26        else
  27                score = -5;
  28        return score;
  29}
  30
  31static int score_matches(unsigned mode1, unsigned mode2, const char *path)
  32{
  33        int score;
  34
  35        /* Heh, we found SHA-1 collisions between different kind of objects */
  36        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  37                score = -100;
  38        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  39                score = -50;
  40
  41        else if (S_ISDIR(mode1))
  42                score = 1000;
  43        else if (S_ISLNK(mode1))
  44                score = 500;
  45        else
  46                score = 250;
  47        return score;
  48}
  49
  50/*
  51 * Inspect two trees, and give a score that tells how similar they are.
  52 */
  53static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
  54{
  55        struct tree_desc one;
  56        struct tree_desc two;
  57        void *one_buf, *two_buf;
  58        int score = 0;
  59        enum object_type type;
  60        unsigned long size;
  61
  62        one_buf = read_sha1_file(hash1, &type, &size);
  63        if (!one_buf)
  64                die("unable to read tree (%s)", sha1_to_hex(hash1));
  65        if (type != OBJ_TREE)
  66                die("%s is not a tree", sha1_to_hex(hash1));
  67        init_tree_desc(&one, one_buf, size);
  68        two_buf = read_sha1_file(hash2, &type, &size);
  69        if (!two_buf)
  70                die("unable to read tree (%s)", sha1_to_hex(hash2));
  71        if (type != OBJ_TREE)
  72                die("%s is not a tree", sha1_to_hex(hash2));
  73        init_tree_desc(&two, two_buf, size);
  74        while (one.size | two.size) {
  75                const unsigned char *elem1 = elem1;
  76                const unsigned char *elem2 = elem2;
  77                const char *path1 = path1;
  78                const char *path2 = path2;
  79                unsigned mode1 = mode1;
  80                unsigned mode2 = mode2;
  81                int cmp;
  82
  83                if (one.size)
  84                        elem1 = tree_entry_extract(&one, &path1, &mode1);
  85                if (two.size)
  86                        elem2 = tree_entry_extract(&two, &path2, &mode2);
  87
  88                if (!one.size) {
  89                        /* two has more entries */
  90                        score += score_missing(mode2, path2);
  91                        update_tree_entry(&two);
  92                        continue;
  93                }
  94                if (!two.size) {
  95                        /* two lacks this entry */
  96                        score += score_missing(mode1, path1);
  97                        update_tree_entry(&one);
  98                        continue;
  99                }
 100                cmp = base_name_compare(path1, strlen(path1), mode1,
 101                                        path2, strlen(path2), mode2);
 102                if (cmp < 0) {
 103                        /* path1 does not appear in two */
 104                        score += score_missing(mode1, path1);
 105                        update_tree_entry(&one);
 106                        continue;
 107                }
 108                else if (cmp > 0) {
 109                        /* path2 does not appear in one */
 110                        score += score_missing(mode2, path2);
 111                        update_tree_entry(&two);
 112                        continue;
 113                }
 114                else if (hashcmp(elem1, elem2))
 115                        /* they are different */
 116                        score += score_differs(mode1, mode2, path1);
 117                else
 118                        /* same subtree or blob */
 119                        score += score_matches(mode1, mode2, path1);
 120                update_tree_entry(&one);
 121                update_tree_entry(&two);
 122        }
 123        free(one_buf);
 124        free(two_buf);
 125        return score;
 126}
 127
 128/*
 129 * Match one itself and its subtrees with two and pick the best match.
 130 */
 131static void match_trees(const unsigned char *hash1,
 132                        const unsigned char *hash2,
 133                        int *best_score,
 134                        char **best_match,
 135                        const char *base,
 136                        int recurse_limit)
 137{
 138        struct tree_desc one;
 139        void *one_buf;
 140        enum object_type type;
 141        unsigned long size;
 142
 143        one_buf = read_sha1_file(hash1, &type, &size);
 144        if (!one_buf)
 145                die("unable to read tree (%s)", sha1_to_hex(hash1));
 146        if (type != OBJ_TREE)
 147                die("%s is not a tree", sha1_to_hex(hash1));
 148        init_tree_desc(&one, one_buf, size);
 149
 150        while (one.size) {
 151                const char *path;
 152                const unsigned char *elem;
 153                unsigned mode;
 154                int score;
 155
 156                elem = tree_entry_extract(&one, &path, &mode);
 157                if (!S_ISDIR(mode))
 158                        goto next;
 159                score = score_trees(elem, hash2);
 160                if (*best_score < score) {
 161                        char *newpath;
 162                        newpath = xmalloc(strlen(base) + strlen(path) + 1);
 163                        sprintf(newpath, "%s%s", base, path);
 164                        free(*best_match);
 165                        *best_match = newpath;
 166                        *best_score = score;
 167                }
 168                if (recurse_limit) {
 169                        char *newbase;
 170                        newbase = xmalloc(strlen(base) + strlen(path) + 2);
 171                        sprintf(newbase, "%s%s/", base, path);
 172                        match_trees(elem, hash2, best_score, best_match,
 173                                    newbase, recurse_limit - 1);
 174                        free(newbase);
 175                }
 176
 177        next:
 178                update_tree_entry(&one);
 179        }
 180        free(one_buf);
 181}
 182
 183/*
 184 * A tree "hash1" has a subdirectory at "prefix".  Come up with a
 185 * tree object by replacing it with another tree "hash2".
 186 */
 187static int splice_tree(const unsigned char *hash1,
 188                       const char *prefix,
 189                       const unsigned char *hash2,
 190                       unsigned char *result)
 191{
 192        char *subpath;
 193        int toplen;
 194        char *buf;
 195        unsigned long sz;
 196        struct tree_desc desc;
 197        unsigned char *rewrite_here;
 198        const unsigned char *rewrite_with;
 199        unsigned char subtree[20];
 200        enum object_type type;
 201        int status;
 202
 203        subpath = strchr(prefix, '/');
 204        if (!subpath)
 205                toplen = strlen(prefix);
 206        else {
 207                toplen = subpath - prefix;
 208                subpath++;
 209        }
 210
 211        buf = read_sha1_file(hash1, &type, &sz);
 212        if (!buf)
 213                die("cannot read tree %s", sha1_to_hex(hash1));
 214        init_tree_desc(&desc, buf, sz);
 215
 216        rewrite_here = NULL;
 217        while (desc.size) {
 218                const char *name;
 219                unsigned mode;
 220                const unsigned char *sha1;
 221
 222                sha1 = tree_entry_extract(&desc, &name, &mode);
 223                if (strlen(name) == toplen &&
 224                    !memcmp(name, prefix, toplen)) {
 225                        if (!S_ISDIR(mode))
 226                                die("entry %s in tree %s is not a tree",
 227                                    name, sha1_to_hex(hash1));
 228                        rewrite_here = (unsigned char *) sha1;
 229                        break;
 230                }
 231                update_tree_entry(&desc);
 232        }
 233        if (!rewrite_here)
 234                die("entry %.*s not found in tree %s",
 235                    toplen, prefix, sha1_to_hex(hash1));
 236        if (subpath) {
 237                status = splice_tree(rewrite_here, subpath, hash2, subtree);
 238                if (status)
 239                        return status;
 240                rewrite_with = subtree;
 241        }
 242        else
 243                rewrite_with = hash2;
 244        hashcpy(rewrite_here, rewrite_with);
 245        status = write_sha1_file(buf, sz, tree_type, result);
 246        free(buf);
 247        return status;
 248}
 249
 250/*
 251 * We are trying to come up with a merge between one and two that
 252 * results in a tree shape similar to one.  The tree two might
 253 * correspond to a subtree of one, in which case it needs to be
 254 * shifted down by prefixing otherwise empty directories.  On the
 255 * other hand, it could cover tree one and we might need to pick a
 256 * subtree of it.
 257 */
 258void shift_tree(const unsigned char *hash1,
 259                const unsigned char *hash2,
 260                unsigned char *shifted,
 261                int depth_limit)
 262{
 263        char *add_prefix;
 264        char *del_prefix;
 265        int add_score, del_score;
 266
 267        /*
 268         * NEEDSWORK: this limits the recursion depth to hardcoded
 269         * value '2' to avoid excessive overhead.
 270         */
 271        if (!depth_limit)
 272                depth_limit = 2;
 273
 274        add_score = del_score = score_trees(hash1, hash2);
 275        add_prefix = xcalloc(1, 1);
 276        del_prefix = xcalloc(1, 1);
 277
 278        /*
 279         * See if one's subtree resembles two; if so we need to prefix
 280         * two with a few fake trees to match the prefix.
 281         */
 282        match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
 283
 284        /*
 285         * See if two's subtree resembles one; if so we need to
 286         * pick only subtree of two.
 287         */
 288        match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
 289
 290        /* Assume we do not have to do any shifting */
 291        hashcpy(shifted, hash2);
 292
 293        if (add_score < del_score) {
 294                /* We need to pick a subtree of two */
 295                unsigned mode;
 296
 297                if (!*del_prefix)
 298                        return;
 299
 300                if (get_tree_entry(hash2, del_prefix, shifted, &mode))
 301                        die("cannot find path %s in tree %s",
 302                            del_prefix, sha1_to_hex(hash2));
 303                return;
 304        }
 305
 306        if (!*add_prefix)
 307                return;
 308
 309        splice_tree(hash1, add_prefix, hash2, shifted);
 310}
 311
 312/*
 313 * The user says the trees will be shifted by this much.
 314 * Unfortunately we cannot fundamentally tell which one to
 315 * be prefixed, as recursive merge can work in either direction.
 316 */
 317void shift_tree_by(const unsigned char *hash1,
 318                   const unsigned char *hash2,
 319                   unsigned char *shifted,
 320                   const char *shift_prefix)
 321{
 322        unsigned char sub1[20], sub2[20];
 323        unsigned mode1, mode2;
 324        unsigned candidate = 0;
 325
 326        /* Can hash2 be a tree at shift_prefix in tree hash1? */
 327        if (!get_tree_entry(hash1, shift_prefix, sub1, &mode1) &&
 328            S_ISDIR(mode1))
 329                candidate |= 1;
 330
 331        /* Can hash1 be a tree at shift_prefix in tree hash2? */
 332        if (!get_tree_entry(hash2, shift_prefix, sub2, &mode2) &&
 333            S_ISDIR(mode2))
 334                candidate |= 2;
 335
 336        if (candidate == 3) {
 337                /* Both are plausible -- we need to evaluate the score */
 338                int best_score = score_trees(hash1, hash2);
 339                int score;
 340
 341                candidate = 0;
 342                score = score_trees(sub1, hash2);
 343                if (score > best_score) {
 344                        candidate = 1;
 345                        best_score = score;
 346                }
 347                score = score_trees(sub2, hash1);
 348                if (score > best_score)
 349                        candidate = 2;
 350        }
 351
 352        if (!candidate) {
 353                /* Neither is plausible -- do not shift */
 354                hashcpy(shifted, hash2);
 355                return;
 356        }
 357
 358        if (candidate == 1)
 359                /*
 360                 * shift tree2 down by adding shift_prefix above it
 361                 * to match tree1.
 362                 */
 363                splice_tree(hash1, shift_prefix, hash2, shifted);
 364        else
 365                /*
 366                 * shift tree2 up by removing shift_prefix from it
 367                 * to match tree1.
 368                 */
 369                hashcpy(shifted, sub2);
 370}