[PATCH] Redo rename/copy detection logic.
[gitweb.git] / diff-cache.c
index 015fd5fec08a7e9b200dc8c9124e84f3a933eff4..744fffb7c8c684e9d0206da36a0fba1c502570c9 100644 (file)
 #include "cache.h"
+#include "diff.h"
 
 static int cached_only = 0;
-static int recursive = 0;
-static int line_termination = '\n';
+static int diff_output_format = DIFF_FORMAT_HUMAN;
+static int match_nonexisting = 0;
+static int detect_rename = 0;
+static int reverse_diff = 0;
+static int diff_score_opt = 0;
+static const char *pickaxe = NULL;
 
-static int diff_cache(void *tree, unsigned long size, struct cache_entry **ac, int entries, const char *base);
-
-static void update_tree_entry(void **bufp, unsigned long *sizep)
-{
-       void *buf = *bufp;
-       unsigned long size = *sizep;
-       int len = strlen(buf) + 1 + 20;
-
-       if (size < len)
-               die("corrupt tree file 1 (%s)", size);
-       *bufp = buf + len;
-       *sizep = size - len;
-}
-
-static const unsigned char *extract(void *tree, unsigned long size, const char **pathp, unsigned int *modep)
+/* A file entry went away or appeared */
+static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode)
 {
-       int len = strlen(tree)+1;
-       const unsigned char *sha1 = tree + len;
-       const char *path = strchr(tree, ' ');
-
-       if (!path || size < len + 20 || sscanf(tree, "%o", modep) != 1)
-               die("corrupt tree file 2 (%d)", size);
-       *pathp = path+1;
-       return sha1;
+       diff_addremove(prefix[0], ntohl(mode), sha1, ce->name, NULL);
 }
 
-static char *malloc_base(const char *base, const char *path, int pathlen)
+static int get_stat_data(struct cache_entry *ce, unsigned char **sha1p, unsigned int *modep)
 {
-       int baselen = strlen(base);
-       char *newbase = malloc(baselen + pathlen + 2);
-       memcpy(newbase, base, baselen);
-       memcpy(newbase + baselen, path, pathlen);
-       memcpy(newbase + baselen + pathlen, "/", 2);
-       return newbase;
-}
-
-static void show_file(const char *prefix, const char *path, unsigned int mode, const unsigned char *sha1, const char *base);
+       unsigned char *sha1 = ce->sha1;
+       unsigned int mode = ce->ce_mode;
 
-/* A whole sub-tree went away or appeared */
-static void show_tree(const char *prefix, void *tree, unsigned long size, const char *base)
-{
-       while (size) {
-               const char *path;
-               unsigned int mode;
-               const unsigned char *sha1 = extract(tree, size, &path, &mode);
-               
-               show_file(prefix, path, mode, sha1, base);
-               update_tree_entry(&tree, &size);
+       if (!cached_only) {
+               static unsigned char no_sha1[20];
+               int changed;
+               struct stat st;
+               if (lstat(ce->name, &st) < 0) {
+                       if (errno == ENOENT && match_nonexisting) {
+                               *sha1p = sha1;
+                               *modep = mode;
+                               return 0;
+                       }
+                       return -1;
+               }
+               changed = ce_match_stat(ce, &st);
+               if (changed) {
+                       mode = create_ce_mode(st.st_mode);
+                       sha1 = no_sha1;
+               }
        }
+
+       *sha1p = sha1;
+       *modep = mode;
+       return 0;
 }
 
-/* A file entry went away or appeared */
-static void show_file(const char *prefix, const char *path, unsigned int mode, const unsigned char *sha1, const char *base)
+static void show_new_file(struct cache_entry *new)
 {
-       if (recursive && S_ISDIR(mode)) {
-               char type[20];
-               unsigned long size;
-               char *newbase = malloc_base(base, path, strlen(path));
-               void *tree;
-
-               tree = read_sha1_file(sha1, type, &size);
-               if (!tree || strcmp(type, "tree"))
-                       die("corrupt tree sha %s", sha1_to_hex(sha1));
-
-               show_tree(prefix, tree, size, newbase);
-               
-               free(tree);
-               free(newbase);
+       unsigned char *sha1;
+       unsigned int mode;
+
+       /* New file in the index: it might actually be different in the working copy */
+       if (get_stat_data(new, &sha1, &mode) < 0)
                return;
-       }
 
-       printf("%s%o\t%s\t%s\t%s%s%c", prefix, mode,
-              S_ISDIR(mode) ? "tree" : "blob",
-              sha1_to_hex(sha1), base, path,
-              line_termination);
+       show_file("+", new, sha1, mode);
 }
 
-static int compare_tree_entry(const char *path1, unsigned int mode1, const unsigned char *sha1,
-                             struct cache_entry **ac, int *entries, const char *base)
+static int show_modified(struct cache_entry *old,
+                        struct cache_entry *new,
+                        int report_missing)
 {
-       int baselen = strlen(base);
-       struct cache_entry *ce = *ac;
-       const char *path2 = ce->name + baselen;
-       unsigned int mode2 = ntohl(ce->ce_mode);
-       const unsigned char *sha2 = ce->sha1;
-       int cmp, pathlen1, pathlen2;
-       char old_sha1_hex[50];
-
-       pathlen1 = strlen(path1);
-       pathlen2 = strlen(path2);
-       cmp = cache_name_compare(path1, pathlen1, path2, pathlen2);
-       if (cmp < 0) {
-               if (S_ISDIR(mode1)) {
-                       char type[20];
-                       unsigned long size;
-                       void *tree = read_sha1_file(sha1, type, &size);
-                       char *newbase = malloc(baselen + 2 + pathlen1);
-
-                       memcpy(newbase, base, baselen);
-                       memcpy(newbase + baselen, path1, pathlen1);
-                       memcpy(newbase + baselen + pathlen1, "/", 2);
-                       if (!tree || strcmp(type, "tree"))
-                               die("unable to read tree object %s", sha1_to_hex(sha1));
-                       *entries = diff_cache(tree, size, ac, *entries, newbase);
-                       free(newbase);
-                       free(tree);
-                       return -1;
-               }
-               show_file("-", path1, mode1, sha1, base);
-               return -1;
-       }
+       unsigned int mode, oldmode;
+       unsigned char *sha1;
 
-       if (!cached_only) {
-               static unsigned char no_sha1[20];
-               int fd, changed;
-               struct stat st;
-               fd = open(ce->name, O_RDONLY);
-               if (fd < 0 || fstat(fd, &st) < 0) {
-                       show_file("-", path1, mode1, sha1, base);
-                       return -1;
-               }
-               changed = cache_match_stat(ce, &st);
-               close(fd);
-               if (changed) {
-                       mode2 = st.st_mode;
-                       sha2 = no_sha1;
-               }
+       if (get_stat_data(new, &sha1, &mode) < 0) {
+               if (report_missing)
+                       show_file("-", old, old->sha1, old->ce_mode);
+               return -1;
        }
 
-       if (cmp > 0) {
-               show_file("+", path2, mode2, sha2, base);
-               return 1;
-       }
-       if (!memcmp(sha1, sha2, 20) && mode1 == mode2)
+       oldmode = old->ce_mode;
+       if (mode == oldmode && !memcmp(sha1, old->sha1, 20) &&
+           detect_rename < DIFF_DETECT_COPY)
                return 0;
 
-       /*
-        * If the filemode has changed to/from a directory from/to a regular
-        * file, we need to consider it a remove and an add.
-        */
-       if (S_ISDIR(mode1) || S_ISDIR(mode2)) {
-               show_file("-", path1, mode1, sha1, base);
-               show_file("+", path2, mode2, sha2, base);
-               return 0;
-       }
+       mode = ntohl(mode);
+       oldmode = ntohl(oldmode);
 
-       strcpy(old_sha1_hex, sha1_to_hex(sha1));
-       printf("*%o->%o\t%s\t%s->%s\t%s%s%c", mode1, mode2,
-              S_ISDIR(mode1) ? "tree" : "blob",
-              old_sha1_hex, sha1_to_hex(sha2), base, path1,
-              line_termination);
+       diff_change(oldmode, mode,
+                   old->sha1, sha1, old->name, NULL);
        return 0;
 }
 
-static int diff_cache(void *tree, unsigned long size, struct cache_entry **ac, int entries, const char *base)
+static int diff_cache(struct cache_entry **ac, int entries)
 {
-       int baselen = strlen(base);
+       while (entries) {
+               struct cache_entry *ce = *ac;
+               int same = (entries > 1) && ce_same_name(ce, ac[1]);
 
-       for (;;) {
-               struct cache_entry *ce;
-               unsigned int mode;
-               const char *path;
-               const unsigned char *sha1;
-               int left;
-
-               /*
-                * No entries in the cache (with this base)?
-                * Output the tree contents.
-                */
-               if (!entries || ce_namelen(ce = *ac) < baselen || memcmp(ce->name, base, baselen)) {
-                       if (!size)
-                               return entries;
-                       sha1 = extract(tree, size, &path, &mode);
-                       show_file("-", path, mode, sha1, base);
-                       update_tree_entry(&tree, &size);
-                       continue;
+               switch (ce_stage(ce)) {
+               case 0:
+                       /* No stage 1 entry? That means it's a new file */
+                       if (!same) {
+                               show_new_file(ce);
+                               break;
+                       }
+                       /* Show difference between old and new */
+                       show_modified(ac[1], ce, 1);
+                       break;
+               case 1:
+                       /* No stage 3 (merge) entry? That means it's been deleted */
+                       if (!same) {
+                               show_file("-", ce, ce->sha1, ce->ce_mode);
+                               break;
+                       }
+                       /* We come here with ce pointing at stage 1
+                        * (original tree) and ac[1] pointing at stage
+                        * 3 (unmerged).  show-modified with
+                        * report-mising set to false does not say the
+                        * file is deleted but reports true if work
+                        * tree does not have it, in which case we
+                        * fall through to report the unmerged state.
+                        * Otherwise, we show the differences between
+                        * the original tree and the work tree.
+                        */
+                       if (!cached_only && !show_modified(ce, ac[1], 0))
+                               break;
+                       /* fallthru */
+               case 3:
+                       diff_unmerge(ce->name);
+                       break;
+
+               default:
+                       die("impossible cache entry stage");
                }
 
                /*
-                * No entries in the tree? Output the cache contents
+                * Ignore all the different stages for this file,
+                * we've handled the relevant cases now.
                 */
-               if (!size) {
-                       show_file("+", ce->name, ntohl(ce->ce_mode), ce->sha1, "");
+               do {
                        ac++;
                        entries--;
-                       continue;
-               }
+               } while (entries && ce_same_name(ce, ac[0]));
+       }
+       return 0;
+}
 
-               sha1 = extract(tree, size, &path, &mode);
-               left = entries;
-               switch (compare_tree_entry(path, mode, sha1, ac, &left, base)) {
-               case -1:
-                       update_tree_entry(&tree, &size);
-                       if (left < entries) {
-                               ac += (entries - left);
-                               entries = left;
-                       }
-                       continue;
-               case 0:
-                       update_tree_entry(&tree, &size);
-                       /* Fallthrough */
-               case 1:
-                       ac++;
-                       entries--;
+/*
+ * This turns all merge entries into "stage 3". That guarantees that
+ * when we read in the new tree (into "stage 1"), we won't lose sight
+ * of the fact that we had unmerged entries.
+ */
+static void mark_merge_entries(void)
+{
+       int i;
+       for (i = 0; i < active_nr; i++) {
+               struct cache_entry *ce = active_cache[i];
+               if (!ce_stage(ce))
                        continue;
-               }
-               die("diff-cache: internal error");
+               ce->ce_flags |= htons(CE_STAGEMASK);
        }
-       return 0;
 }
 
-int main(int argc, char **argv)
+static char *diff_cache_usage =
+"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [-S<string>] [--cached] <tree-ish> [<path>...]";
+
+int main(int argc, const char **argv)
 {
        unsigned char tree_sha1[20];
        void *tree;
        unsigned long size;
+       int ret;
 
        read_cache();
        while (argc > 2) {
-               char *arg = argv[1];
+               const char *arg = argv[1];
                argv++;
                argc--;
                if (!strcmp(arg, "-r")) {
-                       recursive = 1;
+                       /* We accept the -r flag just to look like git-diff-tree */
+                       continue;
+               }
+               if (!strcmp(arg, "-p")) {
+                       diff_output_format = DIFF_FORMAT_PATCH;
+                       continue;
+               }
+               if (!strncmp(arg, "-M", 2)) {
+                       detect_rename = DIFF_DETECT_RENAME;
+                       diff_score_opt = diff_scoreopt_parse(arg);
+                       continue;
+               }
+               if (!strncmp(arg, "-C", 2)) {
+                       detect_rename = DIFF_DETECT_COPY;
+                       diff_score_opt = diff_scoreopt_parse(arg);
                        continue;
                }
                if (!strcmp(arg, "-z")) {
-                       line_termination = '\0';
+                       diff_output_format = DIFF_FORMAT_MACHINE;
+                       continue;
+               }
+               if (!strcmp(arg, "-R")) {
+                       reverse_diff = 1;
+                       continue;
+               }
+               if (!strcmp(arg, "-S")) {
+                       pickaxe = arg + 2;
+                       continue;
+               }
+               if (!strcmp(arg, "-m")) {
+                       match_nonexisting = 1;
                        continue;
                }
                if (!strcmp(arg, "--cached")) {
                        cached_only = 1;
                        continue;
                }
-               usage("diff-cache [-r] [-z] <tree sha1>");
+               usage(diff_cache_usage);
        }
 
-       if (argc != 2 || get_sha1_hex(argv[1], tree_sha1))
-               usage("diff-cache [-r] [-z] <tree sha1>");
+       if (argc < 2 || get_sha1(argv[1], tree_sha1))
+               usage(diff_cache_usage);
+       argv++;
+       argc--;
 
-       tree = read_tree_with_tree_or_commit_sha1(tree_sha1, &size, 0);
+       /* The rest is for paths restriction. */
+
+       diff_setup(reverse_diff);
+
+       mark_merge_entries();
+
+       tree = read_object_with_reference(tree_sha1, "tree", &size, NULL);
        if (!tree)
                die("bad tree object %s", argv[1]);
-
-       return diff_cache(tree, size, active_cache, active_nr, "");
+       if (read_tree(tree, size, 1))
+               die("unable to read tree object %s", argv[1]);
+
+       ret = diff_cache(active_cache, active_nr);
+       if (detect_rename)
+               diffcore_rename(detect_rename, diff_score_opt);
+       if (pickaxe)
+               diffcore_pickaxe(pickaxe);
+       if (2 <= argc)
+               diffcore_pathspec(argv + 1);
+       diff_flush(diff_output_format, 1);
+       return ret;
 }