Merge branch 'jc/merge-subtree'
authorJunio C Hamano <junkio@cox.net>
Mon, 9 Apr 2007 06:51:14 +0000 (23:51 -0700)
committerJunio C Hamano <junkio@cox.net>
Mon, 9 Apr 2007 06:54:17 +0000 (23:54 -0700)
* jc/merge-subtree:
A new merge stragety 'subtree'.

It is safe to merge this early as this is a feature that user
explicitly needs to ask for and would not trigger otherwise. A
known issue with the current implementation is that the subtree
matching heuristics is very stupid. It could run ls-tree twice
and try to count intersection.

Giving it wider audience would help it to get improved by
motivated volunteers.

Signed-off-by: Junio C Hamano <junkio@cox.net>
.gitignore
Makefile
cache.h
git-merge.sh
match-trees.c [new file with mode: 0644]
merge-recursive.c
test-match-trees.c [new file with mode: 0644]
index b39f78fcdf18c201d89a4e69fa24a853185e97e5..9229e918cd1c8a43b6c73b0083ddfe973b1e3b88 100644 (file)
@@ -77,6 +77,7 @@ git-merge-ours
 git-merge-recursive
 git-merge-resolve
 git-merge-stupid
+git-merge-subtree
 git-mergetool
 git-mktag
 git-mktree
@@ -148,6 +149,7 @@ test-chmtime
 test-date
 test-delta
 test-dump-cache-tree
+test-match-trees
 common-cmds.h
 *.tar.gz
 *.dsc
index ac29c629e3927ad59142cf73d03eea501fc30962..a77d31de989f6de63bb2fbbd5ccef3c1c83352a8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -251,6 +251,8 @@ BUILT_INS = \
 # what 'all' will build and 'install' will install, in gitexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
 
+ALL_PROGRAMS += git-merge-subtree$X
+
 # what 'all' will build but not install in gitexecdir
 OTHER_PROGRAMS = git$X gitweb/gitweb.cgi
 ifndef NO_TCLTK
@@ -299,7 +301,7 @@ LIB_OBJS = \
        server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \
        tag.o tree.o usage.o config.o environment.o ctype.o copy.o \
        revision.o pager.o tree-walk.o xdiff-interface.o \
-       write_or_die.o trace.o list-objects.o grep.o \
+       write_or_die.o trace.o list-objects.o grep.o match-trees.o \
        alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
        color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
        convert.o
@@ -725,6 +727,9 @@ git$X: git.c common-cmds.h $(BUILTIN_OBJS) $(GITLIBS) GIT-CFLAGS
 
 help.o: common-cmds.h
 
+git-merge-subtree$X: git-merge-recursive$X
+       rm -f $@ && ln git-merge-recursive$X $@
+
 $(BUILT_INS): git$X
        $(QUIET_BUILT_IN)rm -f $@ && ln git$X $@
 
@@ -942,6 +947,9 @@ test-dump-cache-tree$X: dump-cache-tree.o $(GITLIBS)
 test-sha1$X: test-sha1.o $(GITLIBS)
        $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
+test-match-trees$X: test-match-trees.o $(GITLIBS)
+       $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
 test-chmtime$X: test-chmtime.c
        $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $<
 
diff --git a/cache.h b/cache.h
index 1b50c32b139256c5d8be96a85b02f1ce9855f15a..eb57507b804019f7d6b27a27a2b262d4b051e43b 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -496,4 +496,7 @@ extern void trace_argv_printf(const char **argv, int count, const char *format,
 extern int convert_to_git(const char *path, char **bufp, unsigned long *sizep);
 extern int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep);
 
+/* match-trees.c */
+void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
+
 #endif /* CACHE_H */
index fa4589173f426d6172883c47479c52b8700cafa8..7ebbce4bdbaf243a7a5612c024216b8ccf8eae44 100755 (executable)
@@ -16,10 +16,10 @@ test -z "$(git ls-files -u)" ||
 LF='
 '
 
-all_strategies='recur recursive octopus resolve stupid ours'
+all_strategies='recur recursive octopus resolve stupid ours subtree'
 default_twohead_strategies='recursive'
 default_octopus_strategies='octopus'
-no_trivial_merge_strategies='ours'
+no_trivial_merge_strategies='ours subtree'
 use_strategies=
 
 index_merge=t
diff --git a/match-trees.c b/match-trees.c
new file mode 100644 (file)
index 0000000..23cafe4
--- /dev/null
@@ -0,0 +1,304 @@
+#include "cache.h"
+#include "tree.h"
+#include "tree-walk.h"
+
+static int score_missing(unsigned mode, const char *path)
+{
+       int score;
+
+       if (S_ISDIR(mode))
+               score = -1000;
+       else if (S_ISLNK(mode))
+               score = -500;
+       else
+               score = -50;
+       return score;
+}
+
+static int score_differs(unsigned mode1, unsigned mode2, const char *path)
+{
+       int score;
+
+       if (S_ISDIR(mode1) != S_ISDIR(mode2))
+               score = -100;
+       else if (S_ISLNK(mode1) != S_ISLNK(mode2))
+               score = -50;
+       else
+               score = -5;
+       return score;
+}
+
+static int score_matches(unsigned mode1, unsigned mode2, const char *path)
+{
+       int score;
+
+       /* Heh, we found SHA-1 collisions between different kind of objects */
+       if (S_ISDIR(mode1) != S_ISDIR(mode2))
+               score = -100;
+       else if (S_ISLNK(mode1) != S_ISLNK(mode2))
+               score = -50;
+
+       else if (S_ISDIR(mode1))
+               score = 1000;
+       else if (S_ISLNK(mode1))
+               score = 500;
+       else
+               score = 250;
+       return score;
+}
+
+/*
+ * Inspect two trees, and give a score that tells how similar they are.
+ */
+static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
+{
+       struct tree_desc one;
+       struct tree_desc two;
+       void *one_buf, *two_buf;
+       int score = 0;
+       enum object_type type;
+       unsigned long size;
+
+       one_buf = read_sha1_file(hash1, &type, &size);
+       if (!one_buf)
+               die("unable to read tree (%s)", sha1_to_hex(hash1));
+       if (type != OBJ_TREE)
+               die("%s is not a tree", sha1_to_hex(hash1));
+       init_tree_desc(&one, one_buf, size);
+       two_buf = read_sha1_file(hash2, &type, &size);
+       if (!two_buf)
+               die("unable to read tree (%s)", sha1_to_hex(hash2));
+       if (type != OBJ_TREE)
+               die("%s is not a tree", sha1_to_hex(hash2));
+       init_tree_desc(&two, two_buf, size);
+       while (one.size | two.size) {
+               const unsigned char *elem1 = elem1;
+               const unsigned char *elem2 = elem2;
+               const char *path1 = path1;
+               const char *path2 = path2;
+               unsigned mode1 = mode1;
+               unsigned mode2 = mode2;
+               int cmp;
+
+               if (one.size)
+                       elem1 = tree_entry_extract(&one, &path1, &mode1);
+               if (two.size)
+                       elem2 = tree_entry_extract(&two, &path2, &mode2);
+
+               if (!one.size) {
+                       /* two has more entries */
+                       score += score_missing(mode2, path2);
+                       update_tree_entry(&two);
+                       continue;
+               }
+               if (!two.size) {
+                       /* two lacks this entry */
+                       score += score_missing(mode1, path1);
+                       update_tree_entry(&one);
+                       continue;
+               }
+               cmp = base_name_compare(path1, strlen(path1), mode1,
+                                       path2, strlen(path2), mode2);
+               if (cmp < 0) {
+                       /* path1 does not appear in two */
+                       score += score_missing(mode1, path1);
+                       update_tree_entry(&one);
+                       continue;
+               }
+               else if (cmp > 0) {
+                       /* path2 does not appear in one */
+                       score += score_missing(mode2, path2);
+                       update_tree_entry(&two);
+                       continue;
+               }
+               else if (hashcmp(elem1, elem2))
+                       /* they are different */
+                       score += score_differs(mode1, mode2, path1);
+               else
+                       /* same subtree or blob */
+                       score += score_matches(mode1, mode2, path1);
+               update_tree_entry(&one);
+               update_tree_entry(&two);
+       }
+       free(one_buf);
+       free(two_buf);
+       return score;
+}
+
+/*
+ * Match one itself and its subtrees with two and pick the best match.
+ */
+static void match_trees(const unsigned char *hash1,
+                       const unsigned char *hash2,
+                       int *best_score,
+                       char **best_match,
+                       char *base,
+                       int recurse_limit)
+{
+       struct tree_desc one;
+       void *one_buf;
+       enum object_type type;
+       unsigned long size;
+
+       one_buf = read_sha1_file(hash1, &type, &size);
+       if (!one_buf)
+               die("unable to read tree (%s)", sha1_to_hex(hash1));
+       if (type != OBJ_TREE)
+               die("%s is not a tree", sha1_to_hex(hash1));
+       init_tree_desc(&one, one_buf, size);
+
+       while (one.size) {
+               const char *path;
+               const unsigned char *elem;
+               unsigned mode;
+               int score;
+
+               elem = tree_entry_extract(&one, &path, &mode);
+               if (!S_ISDIR(mode))
+                       goto next;
+               score = score_trees(elem, hash2);
+               if (*best_score < score) {
+                       char *newpath;
+                       newpath = xmalloc(strlen(base) + strlen(path) + 1);
+                       sprintf(newpath, "%s%s", base, path);
+                       free(*best_match);
+                       *best_match = newpath;
+                       *best_score = score;
+               }
+               if (recurse_limit) {
+                       char *newbase;
+                       newbase = xmalloc(strlen(base) + strlen(path) + 2);
+                       sprintf(newbase, "%s%s/", base, path);
+                       match_trees(elem, hash2, best_score, best_match,
+                                   newbase, recurse_limit - 1);
+                       free(newbase);
+               }
+
+       next:
+               update_tree_entry(&one);
+       }
+       free(one_buf);
+}
+
+/*
+ * A tree "hash1" has a subdirectory at "prefix".  Come up with a
+ * tree object by replacing it with another tree "hash2".
+ */
+static int splice_tree(const unsigned char *hash1,
+                      char *prefix,
+                      const unsigned char *hash2,
+                      unsigned char *result)
+{
+       char *subpath;
+       int toplen;
+       char *buf;
+       unsigned long sz;
+       struct tree_desc desc;
+       unsigned char *rewrite_here;
+       const unsigned char *rewrite_with;
+       unsigned char subtree[20];
+       enum object_type type;
+       int status;
+
+       subpath = strchr(prefix, '/');
+       if (!subpath)
+               toplen = strlen(prefix);
+       else {
+               toplen = subpath - prefix;
+               subpath++;
+       }
+
+       buf = read_sha1_file(hash1, &type, &sz);
+       if (!buf)
+               die("cannot read tree %s", sha1_to_hex(hash1));
+       init_tree_desc(&desc, buf, sz);
+
+       rewrite_here = NULL;
+       while (desc.size) {
+               const char *name;
+               unsigned mode;
+               const unsigned char *sha1;
+
+               sha1 = tree_entry_extract(&desc, &name, &mode);
+               if (strlen(name) == toplen &&
+                   !memcmp(name, prefix, toplen)) {
+                       if (!S_ISDIR(mode))
+                               die("entry %s in tree %s is not a tree",
+                                   name, sha1_to_hex(hash1));
+                       rewrite_here = (unsigned char *) sha1;
+                       break;
+               }
+               update_tree_entry(&desc);
+       }
+       if (!rewrite_here)
+               die("entry %.*s not found in tree %s",
+                   toplen, prefix, sha1_to_hex(hash1));
+       if (subpath) {
+               status = splice_tree(rewrite_here, subpath, hash2, subtree);
+               if (status)
+                       return status;
+               rewrite_with = subtree;
+       }
+       else
+               rewrite_with = hash2;
+       hashcpy(rewrite_here, rewrite_with);
+       status = write_sha1_file(buf, sz, tree_type, result);
+       free(buf);
+       return status;
+}
+
+/*
+ * We are trying to come up with a merge between one and two that
+ * results in a tree shape similar to one.  The tree two might
+ * correspond to a subtree of one, in which case it needs to be
+ * shifted down by prefixing otherwise empty directories.  On the
+ * other hand, it could cover tree one and we might need to pick a
+ * subtree of it.
+ */
+void shift_tree(const unsigned char *hash1,
+               const unsigned char *hash2,
+               unsigned char *shifted,
+               int depth_limit)
+{
+       char *add_prefix;
+       char *del_prefix;
+       int add_score, del_score;
+
+       add_score = del_score = score_trees(hash1, hash2);
+       add_prefix = xcalloc(1, 1);
+       del_prefix = xcalloc(1, 1);
+
+       /*
+        * See if one's subtree resembles two; if so we need to prefix
+        * two with a few fake trees to match the prefix.
+        */
+       match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
+
+       /*
+        * See if two's subtree resembles one; if so we need to
+        * pick only subtree of two.
+        */
+       match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
+
+       /* Assume we do not have to do any shifting */
+       hashcpy(shifted, hash2);
+
+       if (add_score < del_score) {
+               /* We need to pick a subtree of two */
+               unsigned mode;
+
+               if (!*del_prefix)
+                       return;
+
+               if (get_tree_entry(hash2, del_prefix, shifted, &mode))
+                       die("cannot find path %s in tree %s",
+                           del_prefix, sha1_to_hex(hash2));
+               return;
+       }
+
+       if (!*add_prefix)
+               return;
+
+       splice_tree(hash1, add_prefix, hash2, shifted);
+}
+
index 2b614b64ba71f4006685ed1e08c300385ece5dec..3096594b3e9f07785229236700c6039efd61c305 100644 (file)
 #include "path-list.h"
 #include "xdiff-interface.h"
 
+static int subtree_merge;
+
+static struct tree *shift_tree_object(struct tree *one, struct tree *two)
+{
+       unsigned char shifted[20];
+
+       /*
+        * NEEDSWORK: this limits the recursion depth to hardcoded
+        * value '2' to avoid excessive overhead.
+        */
+       shift_tree(one->object.sha1, two->object.sha1, shifted, 2);
+       if (!hashcmp(two->object.sha1, shifted))
+               return two;
+       return lookup_tree(shifted);
+}
+
 /*
  * A virtual commit has
  * - (const char *)commit->util set to the name, and
@@ -1137,6 +1153,12 @@ static int merge_trees(struct tree *head,
                       struct tree **result)
 {
        int code, clean;
+
+       if (subtree_merge) {
+               merge = shift_tree_object(head, merge);
+               common = shift_tree_object(head, common);
+       }
+
        if (sha_eq(common->object.sha1, merge->object.sha1)) {
                output(0, "Already uptodate!");
                *result = head;
@@ -1342,6 +1364,13 @@ int main(int argc, char *argv[])
        struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
        int index_fd;
 
+       if (argv[0]) {
+               int namelen = strlen(argv[0]);
+               if (8 < namelen &&
+                   !strcmp(argv[0] + namelen - 8, "-subtree"))
+                       subtree_merge = 1;
+       }
+
        git_config(merge_config);
        if (getenv("GIT_MERGE_VERBOSITY"))
                verbosity = strtol(getenv("GIT_MERGE_VERBOSITY"), NULL, 10);
diff --git a/test-match-trees.c b/test-match-trees.c
new file mode 100644 (file)
index 0000000..a3c4688
--- /dev/null
@@ -0,0 +1,24 @@
+#include "cache.h"
+#include "tree.h"
+
+int main(int ac, char **av)
+{
+       unsigned char hash1[20], hash2[20], shifted[20];
+       struct tree *one, *two;
+
+       if (get_sha1(av[1], hash1))
+               die("cannot parse %s as an object name", av[1]);
+       if (get_sha1(av[2], hash2))
+               die("cannot parse %s as an object name", av[2]);
+       one = parse_tree_indirect(hash1);
+       if (!one)
+               die("not a treeish %s", av[1]);
+       two = parse_tree_indirect(hash2);
+       if (!two)
+               die("not a treeish %s", av[2]);
+
+       shift_tree(one->object.sha1, two->object.sha1, shifted, -1);
+       printf("shifted: %s\n", sha1_to_hex(shifted));
+
+       exit(0);
+}