match-trees.con commit git.1: switch homepage for stats (405869d)
   1#include "cache.h"
   2#include "tree.h"
   3#include "tree-walk.h"
   4
   5static int score_missing(unsigned mode, const char *path)
   6{
   7        int score;
   8
   9        if (S_ISDIR(mode))
  10                score = -1000;
  11        else if (S_ISLNK(mode))
  12                score = -500;
  13        else
  14                score = -50;
  15        return score;
  16}
  17
  18static int score_differs(unsigned mode1, unsigned mode2, const char *path)
  19{
  20        int score;
  21
  22        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  23                score = -100;
  24        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  25                score = -50;
  26        else
  27                score = -5;
  28        return score;
  29}
  30
  31static int score_matches(unsigned mode1, unsigned mode2, const char *path)
  32{
  33        int score;
  34
  35        /* Heh, we found SHA-1 collisions between different kind of objects */
  36        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  37                score = -100;
  38        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  39                score = -50;
  40
  41        else if (S_ISDIR(mode1))
  42                score = 1000;
  43        else if (S_ISLNK(mode1))
  44                score = 500;
  45        else
  46                score = 250;
  47        return score;
  48}
  49
  50static void *fill_tree_desc_strict(struct tree_desc *desc,
  51                                   const unsigned char *hash)
  52{
  53        void *buffer;
  54        enum object_type type;
  55        unsigned long size;
  56
  57        buffer = read_sha1_file(hash, &type, &size);
  58        if (!buffer)
  59                die("unable to read tree (%s)", sha1_to_hex(hash));
  60        if (type != OBJ_TREE)
  61                die("%s is not a tree", sha1_to_hex(hash));
  62        init_tree_desc(desc, buffer, size);
  63        return buffer;
  64}
  65
  66static int base_name_entries_compare(const struct name_entry *a,
  67                                     const struct name_entry *b)
  68{
  69        return base_name_compare(a->path, tree_entry_len(a), a->mode,
  70                                 b->path, tree_entry_len(b), b->mode);
  71}
  72
  73/*
  74 * Inspect two trees, and give a score that tells how similar they are.
  75 */
  76static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
  77{
  78        struct tree_desc one;
  79        struct tree_desc two;
  80        void *one_buf = fill_tree_desc_strict(&one, hash1);
  81        void *two_buf = fill_tree_desc_strict(&two, hash2);
  82        int score = 0;
  83
  84        for (;;) {
  85                struct name_entry e1, e2;
  86                int got_entry_from_one = tree_entry(&one, &e1);
  87                int got_entry_from_two = tree_entry(&two, &e2);
  88                int cmp;
  89
  90                if (got_entry_from_one && got_entry_from_two)
  91                        cmp = base_name_entries_compare(&e1, &e2);
  92                else if (got_entry_from_one)
  93                        /* two lacks this entry */
  94                        cmp = -1;
  95                else if (got_entry_from_two)
  96                        /* two has more entries */
  97                        cmp = 1;
  98                else
  99                        break;
 100
 101                if (cmp < 0)
 102                        /* path1 does not appear in two */
 103                        score += score_missing(e1.mode, e1.path);
 104                else if (cmp > 0)
 105                        /* path2 does not appear in one */
 106                        score += score_missing(e2.mode, e2.path);
 107                else if (hashcmp(e1.sha1, e2.sha1))
 108                        /* they are different */
 109                        score += score_differs(e1.mode, e2.mode, e1.path);
 110                else
 111                        /* same subtree or blob */
 112                        score += score_matches(e1.mode, e2.mode, e1.path);
 113        }
 114        free(one_buf);
 115        free(two_buf);
 116        return score;
 117}
 118
 119/*
 120 * Match one itself and its subtrees with two and pick the best match.
 121 */
 122static void match_trees(const unsigned char *hash1,
 123                        const unsigned char *hash2,
 124                        int *best_score,
 125                        char **best_match,
 126                        const char *base,
 127                        int recurse_limit)
 128{
 129        struct tree_desc one;
 130        void *one_buf = fill_tree_desc_strict(&one, hash1);
 131
 132        while (one.size) {
 133                const char *path;
 134                const unsigned char *elem;
 135                unsigned mode;
 136                int score;
 137
 138                elem = tree_entry_extract(&one, &path, &mode);
 139                if (!S_ISDIR(mode))
 140                        goto next;
 141                score = score_trees(elem, hash2);
 142                if (*best_score < score) {
 143                        char *newpath;
 144                        newpath = xmalloc(strlen(base) + strlen(path) + 1);
 145                        sprintf(newpath, "%s%s", base, path);
 146                        free(*best_match);
 147                        *best_match = newpath;
 148                        *best_score = score;
 149                }
 150                if (recurse_limit) {
 151                        char *newbase;
 152                        newbase = xmalloc(strlen(base) + strlen(path) + 2);
 153                        sprintf(newbase, "%s%s/", base, path);
 154                        match_trees(elem, hash2, best_score, best_match,
 155                                    newbase, recurse_limit - 1);
 156                        free(newbase);
 157                }
 158
 159        next:
 160                update_tree_entry(&one);
 161        }
 162        free(one_buf);
 163}
 164
 165/*
 166 * A tree "hash1" has a subdirectory at "prefix".  Come up with a
 167 * tree object by replacing it with another tree "hash2".
 168 */
 169static int splice_tree(const unsigned char *hash1,
 170                       const char *prefix,
 171                       const unsigned char *hash2,
 172                       unsigned char *result)
 173{
 174        char *subpath;
 175        int toplen;
 176        char *buf;
 177        unsigned long sz;
 178        struct tree_desc desc;
 179        unsigned char *rewrite_here;
 180        const unsigned char *rewrite_with;
 181        unsigned char subtree[20];
 182        enum object_type type;
 183        int status;
 184
 185        subpath = strchrnul(prefix, '/');
 186        toplen = subpath - prefix;
 187        if (*subpath)
 188                subpath++;
 189
 190        buf = read_sha1_file(hash1, &type, &sz);
 191        if (!buf)
 192                die("cannot read tree %s", sha1_to_hex(hash1));
 193        init_tree_desc(&desc, buf, sz);
 194
 195        rewrite_here = NULL;
 196        while (desc.size) {
 197                const char *name;
 198                unsigned mode;
 199                const unsigned char *sha1;
 200
 201                sha1 = tree_entry_extract(&desc, &name, &mode);
 202                if (strlen(name) == toplen &&
 203                    !memcmp(name, prefix, toplen)) {
 204                        if (!S_ISDIR(mode))
 205                                die("entry %s in tree %s is not a tree",
 206                                    name, sha1_to_hex(hash1));
 207                        rewrite_here = (unsigned char *) sha1;
 208                        break;
 209                }
 210                update_tree_entry(&desc);
 211        }
 212        if (!rewrite_here)
 213                die("entry %.*s not found in tree %s",
 214                    toplen, prefix, sha1_to_hex(hash1));
 215        if (*subpath) {
 216                status = splice_tree(rewrite_here, subpath, hash2, subtree);
 217                if (status)
 218                        return status;
 219                rewrite_with = subtree;
 220        }
 221        else
 222                rewrite_with = hash2;
 223        hashcpy(rewrite_here, rewrite_with);
 224        status = write_sha1_file(buf, sz, tree_type, result);
 225        free(buf);
 226        return status;
 227}
 228
 229/*
 230 * We are trying to come up with a merge between one and two that
 231 * results in a tree shape similar to one.  The tree two might
 232 * correspond to a subtree of one, in which case it needs to be
 233 * shifted down by prefixing otherwise empty directories.  On the
 234 * other hand, it could cover tree one and we might need to pick a
 235 * subtree of it.
 236 */
 237void shift_tree(const unsigned char *hash1,
 238                const unsigned char *hash2,
 239                unsigned char *shifted,
 240                int depth_limit)
 241{
 242        char *add_prefix;
 243        char *del_prefix;
 244        int add_score, del_score;
 245
 246        /*
 247         * NEEDSWORK: this limits the recursion depth to hardcoded
 248         * value '2' to avoid excessive overhead.
 249         */
 250        if (!depth_limit)
 251                depth_limit = 2;
 252
 253        add_score = del_score = score_trees(hash1, hash2);
 254        add_prefix = xcalloc(1, 1);
 255        del_prefix = xcalloc(1, 1);
 256
 257        /*
 258         * See if one's subtree resembles two; if so we need to prefix
 259         * two with a few fake trees to match the prefix.
 260         */
 261        match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
 262
 263        /*
 264         * See if two's subtree resembles one; if so we need to
 265         * pick only subtree of two.
 266         */
 267        match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
 268
 269        /* Assume we do not have to do any shifting */
 270        hashcpy(shifted, hash2);
 271
 272        if (add_score < del_score) {
 273                /* We need to pick a subtree of two */
 274                unsigned mode;
 275
 276                if (!*del_prefix)
 277                        return;
 278
 279                if (get_tree_entry(hash2, del_prefix, shifted, &mode))
 280                        die("cannot find path %s in tree %s",
 281                            del_prefix, sha1_to_hex(hash2));
 282                return;
 283        }
 284
 285        if (!*add_prefix)
 286                return;
 287
 288        splice_tree(hash1, add_prefix, hash2, shifted);
 289}
 290
 291/*
 292 * The user says the trees will be shifted by this much.
 293 * Unfortunately we cannot fundamentally tell which one to
 294 * be prefixed, as recursive merge can work in either direction.
 295 */
 296void shift_tree_by(const unsigned char *hash1,
 297                   const unsigned char *hash2,
 298                   unsigned char *shifted,
 299                   const char *shift_prefix)
 300{
 301        unsigned char sub1[20], sub2[20];
 302        unsigned mode1, mode2;
 303        unsigned candidate = 0;
 304
 305        /* Can hash2 be a tree at shift_prefix in tree hash1? */
 306        if (!get_tree_entry(hash1, shift_prefix, sub1, &mode1) &&
 307            S_ISDIR(mode1))
 308                candidate |= 1;
 309
 310        /* Can hash1 be a tree at shift_prefix in tree hash2? */
 311        if (!get_tree_entry(hash2, shift_prefix, sub2, &mode2) &&
 312            S_ISDIR(mode2))
 313                candidate |= 2;
 314
 315        if (candidate == 3) {
 316                /* Both are plausible -- we need to evaluate the score */
 317                int best_score = score_trees(hash1, hash2);
 318                int score;
 319
 320                candidate = 0;
 321                score = score_trees(sub1, hash2);
 322                if (score > best_score) {
 323                        candidate = 1;
 324                        best_score = score;
 325                }
 326                score = score_trees(sub2, hash1);
 327                if (score > best_score)
 328                        candidate = 2;
 329        }
 330
 331        if (!candidate) {
 332                /* Neither is plausible -- do not shift */
 333                hashcpy(shifted, hash2);
 334                return;
 335        }
 336
 337        if (candidate == 1)
 338                /*
 339                 * shift tree2 down by adding shift_prefix above it
 340                 * to match tree1.
 341                 */
 342                splice_tree(hash1, shift_prefix, hash2, shifted);
 343        else
 344                /*
 345                 * shift tree2 up by removing shift_prefix from it
 346                 * to match tree1.
 347                 */
 348                hashcpy(shifted, sub2);
 349}