match-trees.con commit Start preparing the API documents. (530e741)
   1#include "cache.h"
   2#include "tree.h"
   3#include "tree-walk.h"
   4
   5static int score_missing(unsigned mode, const char *path)
   6{
   7        int score;
   8
   9        if (S_ISDIR(mode))
  10                score = -1000;
  11        else if (S_ISLNK(mode))
  12                score = -500;
  13        else
  14                score = -50;
  15        return score;
  16}
  17
  18static int score_differs(unsigned mode1, unsigned mode2, const char *path)
  19{
  20        int score;
  21
  22        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  23                score = -100;
  24        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  25                score = -50;
  26        else
  27                score = -5;
  28        return score;
  29}
  30
  31static int score_matches(unsigned mode1, unsigned mode2, const char *path)
  32{
  33        int score;
  34
  35        /* Heh, we found SHA-1 collisions between different kind of objects */
  36        if (S_ISDIR(mode1) != S_ISDIR(mode2))
  37                score = -100;
  38        else if (S_ISLNK(mode1) != S_ISLNK(mode2))
  39                score = -50;
  40
  41        else if (S_ISDIR(mode1))
  42                score = 1000;
  43        else if (S_ISLNK(mode1))
  44                score = 500;
  45        else
  46                score = 250;
  47        return score;
  48}
  49
  50/*
  51 * Inspect two trees, and give a score that tells how similar they are.
  52 */
  53static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
  54{
  55        struct tree_desc one;
  56        struct tree_desc two;
  57        void *one_buf, *two_buf;
  58        int score = 0;
  59        enum object_type type;
  60        unsigned long size;
  61
  62        one_buf = read_sha1_file(hash1, &type, &size);
  63        if (!one_buf)
  64                die("unable to read tree (%s)", sha1_to_hex(hash1));
  65        if (type != OBJ_TREE)
  66                die("%s is not a tree", sha1_to_hex(hash1));
  67        init_tree_desc(&one, one_buf, size);
  68        two_buf = read_sha1_file(hash2, &type, &size);
  69        if (!two_buf)
  70                die("unable to read tree (%s)", sha1_to_hex(hash2));
  71        if (type != OBJ_TREE)
  72                die("%s is not a tree", sha1_to_hex(hash2));
  73        init_tree_desc(&two, two_buf, size);
  74        while (one.size | two.size) {
  75                const unsigned char *elem1 = elem1;
  76                const unsigned char *elem2 = elem2;
  77                const char *path1 = path1;
  78                const char *path2 = path2;
  79                unsigned mode1 = mode1;
  80                unsigned mode2 = mode2;
  81                int cmp;
  82
  83                if (one.size)
  84                        elem1 = tree_entry_extract(&one, &path1, &mode1);
  85                if (two.size)
  86                        elem2 = tree_entry_extract(&two, &path2, &mode2);
  87
  88                if (!one.size) {
  89                        /* two has more entries */
  90                        score += score_missing(mode2, path2);
  91                        update_tree_entry(&two);
  92                        continue;
  93                }
  94                if (!two.size) {
  95                        /* two lacks this entry */
  96                        score += score_missing(mode1, path1);
  97                        update_tree_entry(&one);
  98                        continue;
  99                }
 100                cmp = base_name_compare(path1, strlen(path1), mode1,
 101                                        path2, strlen(path2), mode2);
 102                if (cmp < 0) {
 103                        /* path1 does not appear in two */
 104                        score += score_missing(mode1, path1);
 105                        update_tree_entry(&one);
 106                        continue;
 107                }
 108                else if (cmp > 0) {
 109                        /* path2 does not appear in one */
 110                        score += score_missing(mode2, path2);
 111                        update_tree_entry(&two);
 112                        continue;
 113                }
 114                else if (hashcmp(elem1, elem2))
 115                        /* they are different */
 116                        score += score_differs(mode1, mode2, path1);
 117                else
 118                        /* same subtree or blob */
 119                        score += score_matches(mode1, mode2, path1);
 120                update_tree_entry(&one);
 121                update_tree_entry(&two);
 122        }
 123        free(one_buf);
 124        free(two_buf);
 125        return score;
 126}
 127
 128/*
 129 * Match one itself and its subtrees with two and pick the best match.
 130 */
 131static void match_trees(const unsigned char *hash1,
 132                        const unsigned char *hash2,
 133                        int *best_score,
 134                        char **best_match,
 135                        const char *base,
 136                        int recurse_limit)
 137{
 138        struct tree_desc one;
 139        void *one_buf;
 140        enum object_type type;
 141        unsigned long size;
 142
 143        one_buf = read_sha1_file(hash1, &type, &size);
 144        if (!one_buf)
 145                die("unable to read tree (%s)", sha1_to_hex(hash1));
 146        if (type != OBJ_TREE)
 147                die("%s is not a tree", sha1_to_hex(hash1));
 148        init_tree_desc(&one, one_buf, size);
 149
 150        while (one.size) {
 151                const char *path;
 152                const unsigned char *elem;
 153                unsigned mode;
 154                int score;
 155
 156                elem = tree_entry_extract(&one, &path, &mode);
 157                if (!S_ISDIR(mode))
 158                        goto next;
 159                score = score_trees(elem, hash2);
 160                if (*best_score < score) {
 161                        char *newpath;
 162                        newpath = xmalloc(strlen(base) + strlen(path) + 1);
 163                        sprintf(newpath, "%s%s", base, path);
 164                        free(*best_match);
 165                        *best_match = newpath;
 166                        *best_score = score;
 167                }
 168                if (recurse_limit) {
 169                        char *newbase;
 170                        newbase = xmalloc(strlen(base) + strlen(path) + 2);
 171                        sprintf(newbase, "%s%s/", base, path);
 172                        match_trees(elem, hash2, best_score, best_match,
 173                                    newbase, recurse_limit - 1);
 174                        free(newbase);
 175                }
 176
 177        next:
 178                update_tree_entry(&one);
 179        }
 180        free(one_buf);
 181}
 182
 183/*
 184 * A tree "hash1" has a subdirectory at "prefix".  Come up with a
 185 * tree object by replacing it with another tree "hash2".
 186 */
 187static int splice_tree(const unsigned char *hash1,
 188                       char *prefix,
 189                       const unsigned char *hash2,
 190                       unsigned char *result)
 191{
 192        char *subpath;
 193        int toplen;
 194        char *buf;
 195        unsigned long sz;
 196        struct tree_desc desc;
 197        unsigned char *rewrite_here;
 198        const unsigned char *rewrite_with;
 199        unsigned char subtree[20];
 200        enum object_type type;
 201        int status;
 202
 203        subpath = strchr(prefix, '/');
 204        if (!subpath)
 205                toplen = strlen(prefix);
 206        else {
 207                toplen = subpath - prefix;
 208                subpath++;
 209        }
 210
 211        buf = read_sha1_file(hash1, &type, &sz);
 212        if (!buf)
 213                die("cannot read tree %s", sha1_to_hex(hash1));
 214        init_tree_desc(&desc, buf, sz);
 215
 216        rewrite_here = NULL;
 217        while (desc.size) {
 218                const char *name;
 219                unsigned mode;
 220                const unsigned char *sha1;
 221
 222                sha1 = tree_entry_extract(&desc, &name, &mode);
 223                if (strlen(name) == toplen &&
 224                    !memcmp(name, prefix, toplen)) {
 225                        if (!S_ISDIR(mode))
 226                                die("entry %s in tree %s is not a tree",
 227                                    name, sha1_to_hex(hash1));
 228                        rewrite_here = (unsigned char *) sha1;
 229                        break;
 230                }
 231                update_tree_entry(&desc);
 232        }
 233        if (!rewrite_here)
 234                die("entry %.*s not found in tree %s",
 235                    toplen, prefix, sha1_to_hex(hash1));
 236        if (subpath) {
 237                status = splice_tree(rewrite_here, subpath, hash2, subtree);
 238                if (status)
 239                        return status;
 240                rewrite_with = subtree;
 241        }
 242        else
 243                rewrite_with = hash2;
 244        hashcpy(rewrite_here, rewrite_with);
 245        status = write_sha1_file(buf, sz, tree_type, result);
 246        free(buf);
 247        return status;
 248}
 249
 250/*
 251 * We are trying to come up with a merge between one and two that
 252 * results in a tree shape similar to one.  The tree two might
 253 * correspond to a subtree of one, in which case it needs to be
 254 * shifted down by prefixing otherwise empty directories.  On the
 255 * other hand, it could cover tree one and we might need to pick a
 256 * subtree of it.
 257 */
 258void shift_tree(const unsigned char *hash1,
 259                const unsigned char *hash2,
 260                unsigned char *shifted,
 261                int depth_limit)
 262{
 263        char *add_prefix;
 264        char *del_prefix;
 265        int add_score, del_score;
 266
 267        add_score = del_score = score_trees(hash1, hash2);
 268        add_prefix = xcalloc(1, 1);
 269        del_prefix = xcalloc(1, 1);
 270
 271        /*
 272         * See if one's subtree resembles two; if so we need to prefix
 273         * two with a few fake trees to match the prefix.
 274         */
 275        match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
 276
 277        /*
 278         * See if two's subtree resembles one; if so we need to
 279         * pick only subtree of two.
 280         */
 281        match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
 282
 283        /* Assume we do not have to do any shifting */
 284        hashcpy(shifted, hash2);
 285
 286        if (add_score < del_score) {
 287                /* We need to pick a subtree of two */
 288                unsigned mode;
 289
 290                if (!*del_prefix)
 291                        return;
 292
 293                if (get_tree_entry(hash2, del_prefix, shifted, &mode))
 294                        die("cannot find path %s in tree %s",
 295                            del_prefix, sha1_to_hex(hash2));
 296                return;
 297        }
 298
 299        if (!*add_prefix)
 300                return;
 301
 302        splice_tree(hash1, add_prefix, hash2, shifted);
 303}