match-trees.con commit score_trees(): fix iteration over trees with missing entries (2ec4150)
   1#include "cache.h"
   2#include "tree.h"
   3#include "tree-walk.h"
   4
   5static int score_missing(unsigned mode, const char *path)
   6{
   7        int score;
   8
   9        if (S_ISDIR(mode))
  10                score = -1000;
  11        else if (S_ISLNK(mode))
  12                score = -500;
  13        else
  14                score = -50;
  15        return score;
  16}
  17
  18static int score_differs(unsigned mode1, unsigned mode2, const char *path)
  19{
  20        int score;
  21
  22        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  23                score = -100;
  24        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  25                score = -50;
  26        else
  27                score = -5;
  28        return score;
  29}
  30
  31static int score_matches(unsigned mode1, unsigned mode2, const char *path)
  32{
  33        int score;
  34
  35        /* Heh, we found SHA-1 collisions between different kind of objects */
  36        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  37                score = -100;
  38        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  39                score = -50;
  40
  41        else if (S_ISDIR(mode1))
  42                score = 1000;
  43        else if (S_ISLNK(mode1))
  44                score = 500;
  45        else
  46                score = 250;
  47        return score;
  48}
  49
  50static void *fill_tree_desc_strict(struct tree_desc *desc,
  51                                   const struct object_id *hash)
  52{
  53        void *buffer;
  54        enum object_type type;
  55        unsigned long size;
  56
  57        buffer = read_sha1_file(hash->hash, &type, &size);
  58        if (!buffer)
  59                die("unable to read tree (%s)", oid_to_hex(hash));
  60        if (type != OBJ_TREE)
  61                die("%s is not a tree", oid_to_hex(hash));
  62        init_tree_desc(desc, buffer, size);
  63        return buffer;
  64}
  65
  66static int base_name_entries_compare(const struct name_entry *a,
  67                                     const struct name_entry *b)
  68{
  69        return base_name_compare(a->path, tree_entry_len(a), a->mode,
  70                                 b->path, tree_entry_len(b), b->mode);
  71}
  72
  73/*
  74 * Inspect two trees, and give a score that tells how similar they are.
  75 */
  76static int score_trees(const struct object_id *hash1, const struct object_id *hash2)
  77{
  78        struct tree_desc one;
  79        struct tree_desc two;
  80        void *one_buf = fill_tree_desc_strict(&one, hash1);
  81        void *two_buf = fill_tree_desc_strict(&two, hash2);
  82        int score = 0;
  83
  84        for (;;) {
  85                int cmp;
  86
  87                if (one.size && two.size)
  88                        cmp = base_name_entries_compare(&one.entry, &two.entry);
  89                else if (one.size)
  90                        /* two lacks this entry */
  91                        cmp = -1;
  92                else if (two.size)
  93                        /* two has more entries */
  94                        cmp = 1;
  95                else
  96                        break;
  97
  98                if (cmp < 0) {
  99                        /* path1 does not appear in two */
 100                        score += score_missing(one.entry.mode, one.entry.path);
 101                        update_tree_entry(&one);
 102                } else if (cmp > 0) {
 103                        /* path2 does not appear in one */
 104                        score += score_missing(two.entry.mode, two.entry.path);
 105                        update_tree_entry(&two);
 106                } else {
 107                        /* path appears in both */
 108                        if (oidcmp(one.entry.oid, two.entry.oid)) {
 109                                /* they are different */
 110                                score += score_differs(one.entry.mode,
 111                                                       two.entry.mode,
 112                                                       one.entry.path);
 113                        } else {
 114                                /* same subtree or blob */
 115                                score += score_matches(one.entry.mode,
 116                                                       two.entry.mode,
 117                                                       one.entry.path);
 118                        }
 119                        update_tree_entry(&one);
 120                        update_tree_entry(&two);
 121                }
 122        }
 123        free(one_buf);
 124        free(two_buf);
 125        return score;
 126}
 127
 128/*
 129 * Match one itself and its subtrees with two and pick the best match.
 130 */
 131static void match_trees(const struct object_id *hash1,
 132                        const struct object_id *hash2,
 133                        int *best_score,
 134                        char **best_match,
 135                        const char *base,
 136                        int recurse_limit)
 137{
 138        struct tree_desc one;
 139        void *one_buf = fill_tree_desc_strict(&one, hash1);
 140
 141        while (one.size) {
 142                const char *path;
 143                const struct object_id *elem;
 144                unsigned mode;
 145                int score;
 146
 147                elem = tree_entry_extract(&one, &path, &mode);
 148                if (!S_ISDIR(mode))
 149                        goto next;
 150                score = score_trees(elem, hash2);
 151                if (*best_score < score) {
 152                        free(*best_match);
 153                        *best_match = xstrfmt("%s%s", base, path);
 154                        *best_score = score;
 155                }
 156                if (recurse_limit) {
 157                        char *newbase = xstrfmt("%s%s/", base, path);
 158                        match_trees(elem, hash2, best_score, best_match,
 159                                    newbase, recurse_limit - 1);
 160                        free(newbase);
 161                }
 162
 163        next:
 164                update_tree_entry(&one);
 165        }
 166        free(one_buf);
 167}
 168
 169/*
 170 * A tree "hash1" has a subdirectory at "prefix".  Come up with a
 171 * tree object by replacing it with another tree "hash2".
 172 */
 173static int splice_tree(const unsigned char *hash1,
 174                       const char *prefix,
 175                       const unsigned char *hash2,
 176                       unsigned char *result)
 177{
 178        char *subpath;
 179        int toplen;
 180        char *buf;
 181        unsigned long sz;
 182        struct tree_desc desc;
 183        unsigned char *rewrite_here;
 184        const unsigned char *rewrite_with;
 185        unsigned char subtree[20];
 186        enum object_type type;
 187        int status;
 188
 189        subpath = strchrnul(prefix, '/');
 190        toplen = subpath - prefix;
 191        if (*subpath)
 192                subpath++;
 193
 194        buf = read_sha1_file(hash1, &type, &sz);
 195        if (!buf)
 196                die("cannot read tree %s", sha1_to_hex(hash1));
 197        init_tree_desc(&desc, buf, sz);
 198
 199        rewrite_here = NULL;
 200        while (desc.size) {
 201                const char *name;
 202                unsigned mode;
 203                const struct object_id *oid;
 204
 205                oid = tree_entry_extract(&desc, &name, &mode);
 206                if (strlen(name) == toplen &&
 207                    !memcmp(name, prefix, toplen)) {
 208                        if (!S_ISDIR(mode))
 209                                die("entry %s in tree %s is not a tree",
 210                                    name, sha1_to_hex(hash1));
 211                        rewrite_here = (unsigned char *) oid->hash;
 212                        break;
 213                }
 214                update_tree_entry(&desc);
 215        }
 216        if (!rewrite_here)
 217                die("entry %.*s not found in tree %s",
 218                    toplen, prefix, sha1_to_hex(hash1));
 219        if (*subpath) {
 220                status = splice_tree(rewrite_here, subpath, hash2, subtree);
 221                if (status)
 222                        return status;
 223                rewrite_with = subtree;
 224        }
 225        else
 226                rewrite_with = hash2;
 227        hashcpy(rewrite_here, rewrite_with);
 228        status = write_sha1_file(buf, sz, tree_type, result);
 229        free(buf);
 230        return status;
 231}
 232
 233/*
 234 * We are trying to come up with a merge between one and two that
 235 * results in a tree shape similar to one.  The tree two might
 236 * correspond to a subtree of one, in which case it needs to be
 237 * shifted down by prefixing otherwise empty directories.  On the
 238 * other hand, it could cover tree one and we might need to pick a
 239 * subtree of it.
 240 */
 241void shift_tree(const struct object_id *hash1,
 242                const struct object_id *hash2,
 243                struct object_id *shifted,
 244                int depth_limit)
 245{
 246        char *add_prefix;
 247        char *del_prefix;
 248        int add_score, del_score;
 249
 250        /*
 251         * NEEDSWORK: this limits the recursion depth to hardcoded
 252         * value '2' to avoid excessive overhead.
 253         */
 254        if (!depth_limit)
 255                depth_limit = 2;
 256
 257        add_score = del_score = score_trees(hash1, hash2);
 258        add_prefix = xcalloc(1, 1);
 259        del_prefix = xcalloc(1, 1);
 260
 261        /*
 262         * See if one's subtree resembles two; if so we need to prefix
 263         * two with a few fake trees to match the prefix.
 264         */
 265        match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
 266
 267        /*
 268         * See if two's subtree resembles one; if so we need to
 269         * pick only subtree of two.
 270         */
 271        match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
 272
 273        /* Assume we do not have to do any shifting */
 274        oidcpy(shifted, hash2);
 275
 276        if (add_score < del_score) {
 277                /* We need to pick a subtree of two */
 278                unsigned mode;
 279
 280                if (!*del_prefix)
 281                        return;
 282
 283                if (get_tree_entry(hash2->hash, del_prefix, shifted->hash, &mode))
 284                        die("cannot find path %s in tree %s",
 285                            del_prefix, oid_to_hex(hash2));
 286                return;
 287        }
 288
 289        if (!*add_prefix)
 290                return;
 291
 292        splice_tree(hash1->hash, add_prefix, hash2->hash, shifted->hash);
 293}
 294
 295/*
 296 * The user says the trees will be shifted by this much.
 297 * Unfortunately we cannot fundamentally tell which one to
 298 * be prefixed, as recursive merge can work in either direction.
 299 */
 300void shift_tree_by(const struct object_id *hash1,
 301                   const struct object_id *hash2,
 302                   struct object_id *shifted,
 303                   const char *shift_prefix)
 304{
 305        struct object_id sub1, sub2;
 306        unsigned mode1, mode2;
 307        unsigned candidate = 0;
 308
 309        /* Can hash2 be a tree at shift_prefix in tree hash1? */
 310        if (!get_tree_entry(hash1->hash, shift_prefix, sub1.hash, &mode1) &&
 311            S_ISDIR(mode1))
 312                candidate |= 1;
 313
 314        /* Can hash1 be a tree at shift_prefix in tree hash2? */
 315        if (!get_tree_entry(hash2->hash, shift_prefix, sub2.hash, &mode2) &&
 316            S_ISDIR(mode2))
 317                candidate |= 2;
 318
 319        if (candidate == 3) {
 320                /* Both are plausible -- we need to evaluate the score */
 321                int best_score = score_trees(hash1, hash2);
 322                int score;
 323
 324                candidate = 0;
 325                score = score_trees(&sub1, hash2);
 326                if (score > best_score) {
 327                        candidate = 1;
 328                        best_score = score;
 329                }
 330                score = score_trees(&sub2, hash1);
 331                if (score > best_score)
 332                        candidate = 2;
 333        }
 334
 335        if (!candidate) {
 336                /* Neither is plausible -- do not shift */
 337                oidcpy(shifted, hash2);
 338                return;
 339        }
 340
 341        if (candidate == 1)
 342                /*
 343                 * shift tree2 down by adding shift_prefix above it
 344                 * to match tree1.
 345                 */
 346                splice_tree(hash1->hash, shift_prefix, hash2->hash, shifted->hash);
 347        else
 348                /*
 349                 * shift tree2 up by removing shift_prefix from it
 350                 * to match tree1.
 351                 */
 352                oidcpy(shifted, &sub2);
 353}