1#include "cache.h" 2#include "tree.h" 3#include "tree-walk.h" 4#include "object-store.h" 5 6static int score_missing(unsigned mode) 7{ 8 int score; 9 10 if (S_ISDIR(mode)) 11 score = -1000; 12 else if (S_ISLNK(mode)) 13 score = -500; 14 else 15 score = -50; 16 return score; 17} 18 19static int score_differs(unsigned mode1, unsigned mode2) 20{ 21 int score; 22 23 if (S_ISDIR(mode1) != S_ISDIR(mode2)) 24 score = -100; 25 else if (S_ISLNK(mode1) != S_ISLNK(mode2)) 26 score = -50; 27 else 28 score = -5; 29 return score; 30} 31 32static int score_matches(unsigned mode1, unsigned mode2) 33{ 34 int score; 35 36 /* Heh, we found SHA-1 collisions between different kind of objects */ 37 if (S_ISDIR(mode1) != S_ISDIR(mode2)) 38 score = -100; 39 else if (S_ISLNK(mode1) != S_ISLNK(mode2)) 40 score = -50; 41 42 else if (S_ISDIR(mode1)) 43 score = 1000; 44 else if (S_ISLNK(mode1)) 45 score = 500; 46 else 47 score = 250; 48 return score; 49} 50 51static void *fill_tree_desc_strict(struct tree_desc *desc, 52 const struct object_id *hash) 53{ 54 void *buffer; 55 enum object_type type; 56 unsigned long size; 57 58 buffer = read_object_file(hash, &type, &size); 59 if (!buffer) 60 die("unable to read tree (%s)", oid_to_hex(hash)); 61 if (type != OBJ_TREE) 62 die("%s is not a tree", oid_to_hex(hash)); 63 init_tree_desc(desc, buffer, size); 64 return buffer; 65} 66 67static int base_name_entries_compare(const struct name_entry *a, 68 const struct name_entry *b) 69{ 70 return base_name_compare(a->path, tree_entry_len(a), a->mode, 71 b->path, tree_entry_len(b), b->mode); 72} 73 74/* 75 * Inspect two trees, and give a score that tells how similar they are. 76 */ 77static int score_trees(const struct object_id *hash1, const struct object_id *hash2) 78{ 79 struct tree_desc one; 80 struct tree_desc two; 81 void *one_buf = fill_tree_desc_strict(&one, hash1); 82 void *two_buf = fill_tree_desc_strict(&two, hash2); 83 int score = 0; 84 85 for (;;) { 86 int cmp; 87 88 if (one.size && two.size) 89 cmp = base_name_entries_compare(&one.entry, &two.entry); 90 else if (one.size) 91 /* two lacks this entry */ 92 cmp = -1; 93 else if (two.size) 94 /* two has more entries */ 95 cmp = 1; 96 else 97 break; 98 99 if (cmp < 0) { 100 /* path1 does not appear in two */ 101 score += score_missing(one.entry.mode); 102 update_tree_entry(&one); 103 } else if (cmp > 0) { 104 /* path2 does not appear in one */ 105 score += score_missing(two.entry.mode); 106 update_tree_entry(&two); 107 } else { 108 /* path appears in both */ 109 if (!oideq(&one.entry.oid, &two.entry.oid)) { 110 /* they are different */ 111 score += score_differs(one.entry.mode, 112 two.entry.mode); 113 } else { 114 /* same subtree or blob */ 115 score += score_matches(one.entry.mode, 116 two.entry.mode); 117 } 118 update_tree_entry(&one); 119 update_tree_entry(&two); 120 } 121 } 122 free(one_buf); 123 free(two_buf); 124 return score; 125} 126 127/* 128 * Match one itself and its subtrees with two and pick the best match. 129 */ 130static void match_trees(const struct object_id *hash1, 131 const struct object_id *hash2, 132 int *best_score, 133 char **best_match, 134 const char *base, 135 int recurse_limit) 136{ 137 struct tree_desc one; 138 void *one_buf = fill_tree_desc_strict(&one, hash1); 139 140 while (one.size) { 141 const char *path; 142 const struct object_id *elem; 143 unsigned short mode; 144 int score; 145 146 elem = tree_entry_extract(&one, &path, &mode); 147 if (!S_ISDIR(mode)) 148 goto next; 149 score = score_trees(elem, hash2); 150 if (*best_score < score) { 151 free(*best_match); 152 *best_match = xstrfmt("%s%s", base, path); 153 *best_score = score; 154 } 155 if (recurse_limit) { 156 char *newbase = xstrfmt("%s%s/", base, path); 157 match_trees(elem, hash2, best_score, best_match, 158 newbase, recurse_limit - 1); 159 free(newbase); 160 } 161 162 next: 163 update_tree_entry(&one); 164 } 165 free(one_buf); 166} 167 168/* 169 * A tree "oid1" has a subdirectory at "prefix". Come up with a tree object by 170 * replacing it with another tree "oid2". 171 */ 172static int splice_tree(const struct object_id *oid1, const char *prefix, 173 const struct object_id *oid2, struct object_id *result) 174{ 175 char *subpath; 176 int toplen; 177 char *buf; 178 unsigned long sz; 179 struct tree_desc desc; 180 unsigned char *rewrite_here; 181 const struct object_id *rewrite_with; 182 struct object_id subtree; 183 enum object_type type; 184 int status; 185 186 subpath = strchrnul(prefix, '/'); 187 toplen = subpath - prefix; 188 if (*subpath) 189 subpath++; 190 191 buf = read_object_file(oid1, &type, &sz); 192 if (!buf) 193 die("cannot read tree %s", oid_to_hex(oid1)); 194 init_tree_desc(&desc, buf, sz); 195 196 rewrite_here = NULL; 197 while (desc.size) { 198 const char *name; 199 unsigned short mode; 200 201 tree_entry_extract(&desc, &name, &mode); 202 if (strlen(name) == toplen && 203 !memcmp(name, prefix, toplen)) { 204 if (!S_ISDIR(mode)) 205 die("entry %s in tree %s is not a tree", name, 206 oid_to_hex(oid1)); 207 208 /* 209 * We cast here for two reasons: 210 * 211 * - to flip the "char *" (for the path) to "unsigned 212 * char *" (for the hash stored after it) 213 * 214 * - to discard the "const"; this is OK because we 215 * know it points into our non-const "buf" 216 */ 217 rewrite_here = (unsigned char *)(desc.entry.path + 218 strlen(desc.entry.path) + 219 1); 220 break; 221 } 222 update_tree_entry(&desc); 223 } 224 if (!rewrite_here) 225 die("entry %.*s not found in tree %s", toplen, prefix, 226 oid_to_hex(oid1)); 227 if (*subpath) { 228 struct object_id tree_oid; 229 hashcpy(tree_oid.hash, rewrite_here); 230 status = splice_tree(&tree_oid, subpath, oid2, &subtree); 231 if (status) 232 return status; 233 rewrite_with = &subtree; 234 } else { 235 rewrite_with = oid2; 236 } 237 hashcpy(rewrite_here, rewrite_with->hash); 238 status = write_object_file(buf, sz, tree_type, result); 239 free(buf); 240 return status; 241} 242 243/* 244 * We are trying to come up with a merge between one and two that 245 * results in a tree shape similar to one. The tree two might 246 * correspond to a subtree of one, in which case it needs to be 247 * shifted down by prefixing otherwise empty directories. On the 248 * other hand, it could cover tree one and we might need to pick a 249 * subtree of it. 250 */ 251void shift_tree(const struct object_id *hash1, 252 const struct object_id *hash2, 253 struct object_id *shifted, 254 int depth_limit) 255{ 256 char *add_prefix; 257 char *del_prefix; 258 int add_score, del_score; 259 260 /* 261 * NEEDSWORK: this limits the recursion depth to hardcoded 262 * value '2' to avoid excessive overhead. 263 */ 264 if (!depth_limit) 265 depth_limit = 2; 266 267 add_score = del_score = score_trees(hash1, hash2); 268 add_prefix = xcalloc(1, 1); 269 del_prefix = xcalloc(1, 1); 270 271 /* 272 * See if one's subtree resembles two; if so we need to prefix 273 * two with a few fake trees to match the prefix. 274 */ 275 match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit); 276 277 /* 278 * See if two's subtree resembles one; if so we need to 279 * pick only subtree of two. 280 */ 281 match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit); 282 283 /* Assume we do not have to do any shifting */ 284 oidcpy(shifted, hash2); 285 286 if (add_score < del_score) { 287 /* We need to pick a subtree of two */ 288 unsigned short mode; 289 290 if (!*del_prefix) 291 return; 292 293 if (get_tree_entry(hash2, del_prefix, shifted, &mode)) 294 die("cannot find path %s in tree %s", 295 del_prefix, oid_to_hex(hash2)); 296 return; 297 } 298 299 if (!*add_prefix) 300 return; 301 302 splice_tree(hash1, add_prefix, hash2, shifted); 303} 304 305/* 306 * The user says the trees will be shifted by this much. 307 * Unfortunately we cannot fundamentally tell which one to 308 * be prefixed, as recursive merge can work in either direction. 309 */ 310void shift_tree_by(const struct object_id *hash1, 311 const struct object_id *hash2, 312 struct object_id *shifted, 313 const char *shift_prefix) 314{ 315 struct object_id sub1, sub2; 316 unsigned short mode1, mode2; 317 unsigned candidate = 0; 318 319 /* Can hash2 be a tree at shift_prefix in tree hash1? */ 320 if (!get_tree_entry(hash1, shift_prefix, &sub1, &mode1) && 321 S_ISDIR(mode1)) 322 candidate |= 1; 323 324 /* Can hash1 be a tree at shift_prefix in tree hash2? */ 325 if (!get_tree_entry(hash2, shift_prefix, &sub2, &mode2) && 326 S_ISDIR(mode2)) 327 candidate |= 2; 328 329 if (candidate == 3) { 330 /* Both are plausible -- we need to evaluate the score */ 331 int best_score = score_trees(hash1, hash2); 332 int score; 333 334 candidate = 0; 335 score = score_trees(&sub1, hash2); 336 if (score > best_score) { 337 candidate = 1; 338 best_score = score; 339 } 340 score = score_trees(&sub2, hash1); 341 if (score > best_score) 342 candidate = 2; 343 } 344 345 if (!candidate) { 346 /* Neither is plausible -- do not shift */ 347 oidcpy(shifted, hash2); 348 return; 349 } 350 351 if (candidate == 1) 352 /* 353 * shift tree2 down by adding shift_prefix above it 354 * to match tree1. 355 */ 356 splice_tree(hash1, shift_prefix, hash2, shifted); 357 else 358 /* 359 * shift tree2 up by removing shift_prefix from it 360 * to match tree1. 361 */ 362 oidcpy(shifted, &sub2); 363}