[PATCH] Fix math thinko in similarity estimator.
[gitweb.git] / rev-tree.c
index bbf6281f64ff1a3eac982e0c56f88d3a2c08f538..7f9281985086971d3877aca27704f2aaf9c448ce 100644 (file)
 #include "cache.h"
+#include "commit.h"
 
-struct relationship {
-       unsigned char sha1[20];
-       unsigned char parent[20];
-};
+/*
+ * revision.h leaves the low 16 bits of the "flags" field of the
+ * revision data structure unused. We use it for a "reachable from
+ * this commit <N>" bitmask.
+ */
+#define MAX_COMMITS 16
+#define REACHABLE (1U << 16)
 
-static struct relationship **rels;
-static int nr_rels, rel_allocs;
+#define cmit_flags(cmit) ((cmit)->object.flags & ~REACHABLE)
 
-static int find_relationship(unsigned char *sha1, unsigned char *parent)
-{
-       int first = 0, last = nr_rels;
-
-       while (first < last) {
-               int next = (first + last) / 2;
-               struct relationship *rel = rels[next];
-               int cmp;
-
-               cmp = memcmp(sha1, rel->sha1, 20);
-               if (!cmp) {
-                       cmp = memcmp(parent, rel->parent, 20);
-                       if (!cmp)
-                               return next;
-               }
-               if (cmp < 0) {
-                       last = next;
-                       continue;
-               }
-               first = next+1;
-       }
-       return -first-1;
-}
+static int show_edges = 0;
+static int basemask = 0;
 
-static int add_relationship(unsigned char *sha1, unsigned char *parent)
+static void read_cache_file(const char *path)
 {
-       struct relationship *n;
-       int pos;
-
-       pos = find_relationship(sha1, parent);
-       if (pos >= 0)
-               return 0;
-       pos = -pos-1;
-
-       if (rel_allocs == nr_rels) {
-               rel_allocs = alloc_nr(rel_allocs);
-               rels = realloc(rels, rel_allocs * sizeof(struct relationship *));
-       }
-       n = malloc(sizeof(struct relationship));
-       
-       memmove(rels + pos + 1, rels + pos, (nr_rels - pos) * sizeof(struct relationship *));
-       rels[pos] = n;
-       nr_rels++;
-       memcpy(n->sha1, sha1, 20);
-       memcpy(n->parent, parent, 20);
-       return 1;
+       die("no revtree cache file yet");
 }
 
-static int already_seen(unsigned char *sha1)
+/*
+ * Some revisions are less interesting than others.
+ *
+ * For example, if we use a cache-file, that one may contain
+ * revisions that were never used. They are never interesting.
+ *
+ * And sometimes we're only interested in "edge" commits, ie
+ * places where the marking changes between parent and child.
+ */
+static int interesting(struct commit *rev)
 {
-       static char null_sha[20];
-       int pos = find_relationship(sha1, null_sha);
-
-       if (pos < 0) 
-               pos = -pos-1;
-       if (pos < nr_rels && !memcmp(sha1, rels[pos]->sha1, 20))
-               return 1;
-       return 0;
-}
+       unsigned mask = cmit_flags(rev);
 
-static int parse_commit(unsigned char *sha1)
-{
-       if (!already_seen(sha1)) {
-               void *buffer;
-               unsigned long size;
-               char type[20];
-               unsigned char parent[20];
-
-               buffer = read_sha1_file(sha1, type, &size);
-               if (!buffer || strcmp(type, "commit"))
-                       return -1;
-               buffer += 46; /* "tree " + "hex sha1" + "\n" */
-               while (!memcmp(buffer, "parent ", 7) && !get_sha1_hex(buffer+7, parent)) {
-                       add_relationship(sha1, parent);
-                       parse_commit(parent);
-                       buffer += 48;   /* "parent " + "hex sha1" + "\n" */
+       if (!mask)
+               return 0;
+       if (show_edges) {
+               struct commit_list *p = rev->parents;
+               while (p) {
+                       if (mask != cmit_flags(p->item))
+                               return 1;
+                       p = p->next;
                }
+               return 0;
        }
-       return 0;       
-}
+       if (mask & basemask)
+               return 0;
 
-static void read_cache_file(const char *path)
-{
-       FILE *file = fopen(path, "r");
-       char line[100];
-
-       while (fgets(line, sizeof(line), file)) {
-               unsigned char sha1[20], parent[20];
-               if (get_sha1_hex(line, sha1) || get_sha1_hex(line + 41, parent))
-                       usage("bad rev-tree cache file %s", path);
-               add_relationship(sha1, parent);
-       }
-       fclose(file);
+       return 1;
 }
 
 /*
- * Usage: rev-tree [--cache <cache-file>] <commit-id>
+ * Usage: git-rev-tree [--edges] [--cache <cache-file>] <commit-id> [<commit-id2>]
  *
  * The cache-file can be quite important for big trees. This is an
  * expensive operation if you have to walk the whole chain of
@@ -113,26 +59,82 @@ static void read_cache_file(const char *path)
 int main(int argc, char **argv)
 {
        int i;
-       unsigned char sha1[20];
+       int nr = 0;
+       unsigned char sha1[MAX_COMMITS][20];
+       struct commit_list *list = NULL;
+
+       /*
+        * First - pick up all the revisions we can (both from
+        * caches and from commit file chains).
+        */
+       for (i = 1; i < argc ; i++) {
+               char *arg = argv[i];
+               struct commit *commit;
+
+               if (!strcmp(arg, "--cache")) {
+                       read_cache_file(argv[++i]);
+                       continue;
+               }
 
-       while (argc > 2) {
-               if (!strcmp(argv[1], "--cache")) {
-                       read_cache_file(argv[2]);
-                       argv += 2;
-                       argc -= 2;
+               if (!strcmp(arg, "--edges")) {
+                       show_edges = 1;
                        continue;
                }
-               usage("unknown option %s", argv[1]);
+
+               if (arg[0] == '^') {
+                       arg++;
+                       basemask |= 1<<nr;
+               }
+               if (nr >= MAX_COMMITS || get_sha1(arg, sha1[nr]))
+                       usage("git-rev-tree [--edges] [--cache <cache-file>] <commit-id> [<commit-id>]");
+
+               commit = lookup_commit_reference(sha1[nr]);
+               if (!commit || parse_commit(commit) < 0)
+                       die("bad commit object");
+               commit_list_insert(commit, &list);
+               nr++;
        }
 
-       if (argc != 2 || get_sha1_hex(argv[1], sha1))
-               usage("rev-tree [--cache <cache-file>] <commit-id>");
-       parse_commit(sha1);
-       for (i = 0; i < nr_rels; i++) {
-               char parent[60];
-               struct relationship *rel = rels[i];
-               strcpy(parent, sha1_to_hex(rel->parent));
-               printf("%s %s\n", sha1_to_hex(rel->sha1), parent);
+       /*
+        * Parse all the commits in date order.
+        *
+        * We really should stop once we know enough, but that's a
+        * decision that isn't trivial to make.
+        */
+       while (list)
+               pop_most_recent_commit(&list, REACHABLE);
+
+       /*
+        * Now we have the maximal tree. Walk the different sha files back to the root.
+        */
+       for (i = 0; i < nr; i++)
+               mark_reachable(&lookup_commit_reference(sha1[i])->object, 1 << i);
+
+       /*
+        * Now print out the results..
+        */
+       for (i = 0; i < nr_objs; i++) {
+               struct object *obj = objs[i];
+               struct commit *commit;
+               struct commit_list *p;
+
+               if (obj->type != commit_type)
+                       continue;
+
+               commit = (struct commit *) obj;
+
+               if (!interesting(commit))
+                       continue;
+
+               printf("%lu %s:%d", commit->date, sha1_to_hex(obj->sha1),
+                                   cmit_flags(commit));
+               p = commit->parents;
+               while (p) {
+                       printf(" %s:%d", sha1_to_hex(p->item->object.sha1), 
+                              cmit_flags(p->item));
+                       p = p->next;
+               }
+               printf("\n");
        }
        return 0;
 }