Thin pack generation: optimization.
[gitweb.git] / revision.c
index 67ff4de2d1e70e061aebc834e7b9632c8d7bf6cb..ce35b5a7e6960383debb4541c573ecf02d192cae 100644 (file)
@@ -3,6 +3,7 @@
 #include "blob.h"
 #include "tree.h"
 #include "commit.h"
+#include "diff.h"
 #include "refs.h"
 #include "revision.h"
 
@@ -81,18 +82,20 @@ void mark_parents_uninteresting(struct commit *commit)
 
        while (parents) {
                struct commit *commit = parents->item;
-               commit->object.flags |= UNINTERESTING;
+               if (!(commit->object.flags & UNINTERESTING)) {
+                       commit->object.flags |= UNINTERESTING;
 
-               /*
-                * Normally we haven't parsed the parent
-                * yet, so we won't have a parent of a parent
-                * here. However, it may turn out that we've
-                * reached this commit some other way (where it
-                * wasn't uninteresting), in which case we need
-                * to mark its parents recursively too..
-                */
-               if (commit->parents)
-                       mark_parents_uninteresting(commit);
+                       /*
+                        * Normally we haven't parsed the parent
+                        * yet, so we won't have a parent of a parent
+                        * here. However, it may turn out that we've
+                        * reached this commit some other way (where it
+                        * wasn't uninteresting), in which case we need
+                        * to mark its parents recursively too..
+                        */
+                       if (commit->parents)
+                               mark_parents_uninteresting(commit);
+               }
 
                /*
                 * A missing commit is ok iff its parent is marked
@@ -183,6 +186,272 @@ static struct commit *get_commit_reference(struct rev_info *revs, const char *na
        die("%s is unknown object", name);
 }
 
+static int everybody_uninteresting(struct commit_list *orig)
+{
+       struct commit_list *list = orig;
+       while (list) {
+               struct commit *commit = list->item;
+               list = list->next;
+               if (commit->object.flags & UNINTERESTING)
+                       continue;
+               return 0;
+       }
+       return 1;
+}
+
+static int tree_difference = REV_TREE_SAME;
+
+static void file_add_remove(struct diff_options *options,
+                   int addremove, unsigned mode,
+                   const unsigned char *sha1,
+                   const char *base, const char *path)
+{
+       int diff = REV_TREE_DIFFERENT;
+
+       /*
+        * Is it an add of a new file? It means that the old tree
+        * didn't have it at all, so we will turn "REV_TREE_SAME" ->
+        * "REV_TREE_NEW", but leave any "REV_TREE_DIFFERENT" alone
+        * (and if it already was "REV_TREE_NEW", we'll keep it
+        * "REV_TREE_NEW" of course).
+        */
+       if (addremove == '+') {
+               diff = tree_difference;
+               if (diff != REV_TREE_SAME)
+                       return;
+               diff = REV_TREE_NEW;
+       }
+       tree_difference = diff;
+}
+
+static void file_change(struct diff_options *options,
+                unsigned old_mode, unsigned new_mode,
+                const unsigned char *old_sha1,
+                const unsigned char *new_sha1,
+                const char *base, const char *path)
+{
+       tree_difference = REV_TREE_DIFFERENT;
+}
+
+static struct diff_options diff_opt = {
+       .recursive = 1,
+       .add_remove = file_add_remove,
+       .change = file_change,
+};
+
+int rev_compare_tree(struct tree *t1, struct tree *t2)
+{
+       if (!t1)
+               return REV_TREE_NEW;
+       if (!t2)
+               return REV_TREE_DIFFERENT;
+       tree_difference = REV_TREE_SAME;
+       if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
+               return REV_TREE_DIFFERENT;
+       return tree_difference;
+}
+
+int rev_same_tree_as_empty(struct tree *t1)
+{
+       int retval;
+       void *tree;
+       struct tree_desc empty, real;
+
+       if (!t1)
+               return 0;
+
+       tree = read_object_with_reference(t1->object.sha1, tree_type, &real.size, NULL);
+       if (!tree)
+               return 0;
+       real.buf = tree;
+
+       empty.buf = "";
+       empty.size = 0;
+
+       tree_difference = 0;
+       retval = diff_tree(&empty, &real, "", &diff_opt);
+       free(tree);
+
+       return retval >= 0 && !tree_difference;
+}
+
+static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
+{
+       struct commit_list **pp, *parent;
+       int tree_changed = 0;
+
+       if (!commit->tree)
+               return;
+
+       if (!commit->parents) {
+               if (!rev_same_tree_as_empty(commit->tree))
+                       commit->object.flags |= TREECHANGE;
+               return;
+       }
+
+       pp = &commit->parents;
+       while ((parent = *pp) != NULL) {
+               struct commit *p = parent->item;
+
+               parse_commit(p);
+               switch (rev_compare_tree(p->tree, commit->tree)) {
+               case REV_TREE_SAME:
+                       if (p->object.flags & UNINTERESTING) {
+                               /* Even if a merge with an uninteresting
+                                * side branch brought the entire change
+                                * we are interested in, we do not want
+                                * to lose the other branches of this
+                                * merge, so we just keep going.
+                                */
+                               pp = &parent->next;
+                               continue;
+                       }
+                       parent->next = NULL;
+                       commit->parents = parent;
+                       return;
+
+               case REV_TREE_NEW:
+                       if (revs->remove_empty_trees &&
+                           rev_same_tree_as_empty(p->tree)) {
+                               /* We are adding all the specified
+                                * paths from this parent, so the
+                                * history beyond this parent is not
+                                * interesting.  Remove its parents
+                                * (they are grandparents for us).
+                                * IOW, we pretend this parent is a
+                                * "root" commit.
+                                */
+                               parse_commit(p);
+                               p->parents = NULL;
+                       }
+               /* fallthrough */
+               case REV_TREE_DIFFERENT:
+                       tree_changed = 1;
+                       pp = &parent->next;
+                       continue;
+               }
+               die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
+       }
+       if (tree_changed)
+               commit->object.flags |= TREECHANGE;
+}
+
+static void add_parents_to_list(struct rev_info *revs, struct commit *commit, struct commit_list **list)
+{
+       struct commit_list *parent = commit->parents;
+
+       /*
+        * If the commit is uninteresting, don't try to
+        * prune parents - we want the maximal uninteresting
+        * set.
+        *
+        * Normally we haven't parsed the parent
+        * yet, so we won't have a parent of a parent
+        * here. However, it may turn out that we've
+        * reached this commit some other way (where it
+        * wasn't uninteresting), in which case we need
+        * to mark its parents recursively too..
+        */
+       if (commit->object.flags & UNINTERESTING) {
+               while (parent) {
+                       struct commit *p = parent->item;
+                       parent = parent->next;
+                       parse_commit(p);
+                       p->object.flags |= UNINTERESTING;
+                       if (p->parents)
+                               mark_parents_uninteresting(p);
+                       if (p->object.flags & SEEN)
+                               continue;
+                       p->object.flags |= SEEN;
+                       insert_by_date(p, list);
+               }
+               return;
+       }
+
+       /*
+        * Ok, the commit wasn't uninteresting. Try to
+        * simplify the commit history and find the parent
+        * that has no differences in the path set if one exists.
+        */
+       if (revs->prune_fn)
+               revs->prune_fn(revs, commit);
+
+       parent = commit->parents;
+       while (parent) {
+               struct commit *p = parent->item;
+
+               parent = parent->next;
+
+               parse_commit(p);
+               if (p->object.flags & SEEN)
+                       continue;
+               p->object.flags |= SEEN;
+               insert_by_date(p, list);
+       }
+}
+
+static void limit_list(struct rev_info *revs)
+{
+       struct commit_list *list = revs->commits;
+       struct commit_list *newlist = NULL;
+       struct commit_list **p = &newlist;
+
+       while (list) {
+               struct commit_list *entry = list;
+               struct commit *commit = list->item;
+               struct object *obj = &commit->object;
+
+               list = list->next;
+               free(entry);
+
+               if (revs->max_age != -1 && (commit->date < revs->max_age))
+                       obj->flags |= UNINTERESTING;
+               if (revs->unpacked && has_sha1_pack(obj->sha1))
+                       obj->flags |= UNINTERESTING;
+               add_parents_to_list(revs, commit, &list);
+               if (obj->flags & UNINTERESTING) {
+                       mark_parents_uninteresting(commit);
+                       if (everybody_uninteresting(list))
+                               break;
+                       continue;
+               }
+               if (revs->min_age != -1 && (commit->date > revs->min_age))
+                       continue;
+               p = &commit_list_insert(commit, p)->next;
+       }
+       if (revs->boundary) {
+               /* mark the ones that are on the result list first */
+               for (list = newlist; list; list = list->next) {
+                       struct commit *commit = list->item;
+                       commit->object.flags |= TMP_MARK;
+               }
+               for (list = newlist; list; list = list->next) {
+                       struct commit *commit = list->item;
+                       struct object *obj = &commit->object;
+                       struct commit_list *parent;
+                       if (obj->flags & UNINTERESTING)
+                               continue;
+                       for (parent = commit->parents;
+                            parent;
+                            parent = parent->next) {
+                               struct commit *pcommit = parent->item;
+                               if (!(pcommit->object.flags & UNINTERESTING))
+                                       continue;
+                               pcommit->object.flags |= BOUNDARY;
+                               if (pcommit->object.flags & TMP_MARK)
+                                       continue;
+                               pcommit->object.flags |= TMP_MARK;
+                               p = &commit_list_insert(pcommit, p)->next;
+                       }
+               }
+               for (list = newlist; list; list = list->next) {
+                       struct commit *commit = list->item;
+                       commit->object.flags &= ~TMP_MARK;
+               }
+       }
+       revs->commits = newlist;
+}
+
 static void add_one_commit(struct commit *commit, struct rev_info *revs)
 {
        if (!commit || (commit->object.flags & SEEN))
@@ -208,26 +477,37 @@ static void handle_all(struct rev_info *revs, unsigned flags)
        for_each_ref(handle_one_ref);
 }
 
+void init_revisions(struct rev_info *revs)
+{
+       memset(revs, 0, sizeof(*revs));
+       revs->lifo = 1;
+       revs->dense = 1;
+       revs->prefix = setup_git_directory();
+       revs->max_age = -1;
+       revs->min_age = -1;
+       revs->max_count = -1;
+
+       revs->prune_fn = NULL;
+       revs->prune_data = NULL;
+
+       revs->topo_setter = topo_sort_default_setter;
+       revs->topo_getter = topo_sort_default_getter;
+}
+
 /*
  * Parse revision information, filling in the "rev_info" structure,
  * and removing the used arguments from the argument list.
  *
- * Returns the number of arguments left ("new argc").
+ * Returns the number of arguments left that weren't recognized
+ * (which are also moved to the head of the argument list)
  */
-int setup_revisions(int argc, const char **argv, struct rev_info *revs)
+int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def)
 {
        int i, flags, seen_dashdash;
-       const char *def = NULL;
-       const char **unrecognized = argv+1;
+       const char **unrecognized = argv + 1;
        int left = 1;
 
-       memset(revs, 0, sizeof(*revs));
-       revs->lifo = 1;
-       revs->dense = 1;
-       revs->prefix = setup_git_directory();
-       revs->max_age = -1;
-       revs->min_age = -1;
-       revs->max_count = -1;
+       init_revisions(revs);
 
        /* First, search for "--" */
        seen_dashdash = 0;
@@ -237,7 +517,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                        continue;
                argv[i] = NULL;
                argc = i;
-               revs->paths = get_pathspec(revs->prefix, argv + i + 1);
+               revs->prune_data = get_pathspec(revs->prefix, argv + i + 1);
                seen_dashdash = 1;
                break;
        }
@@ -255,14 +535,43 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                                revs->max_count = atoi(arg + 12);
                                continue;
                        }
+                       /* accept -<digit>, like traditilnal "head" */
+                       if ((*arg == '-') && isdigit(arg[1])) {
+                               revs->max_count = atoi(arg + 1);
+                               continue;
+                       }
+                       if (!strcmp(arg, "-n")) {
+                               if (argc <= i + 1)
+                                       die("-n requires an argument");
+                               revs->max_count = atoi(argv[++i]);
+                               continue;
+                       }
+                       if (!strncmp(arg,"-n",2)) {
+                               revs->max_count = atoi(arg + 2);
+                               continue;
+                       }
                        if (!strncmp(arg, "--max-age=", 10)) {
                                revs->max_age = atoi(arg + 10);
-                               revs->limited = 1;
+                               continue;
+                       }
+                       if (!strncmp(arg, "--since=", 8)) {
+                               revs->max_age = approxidate(arg + 8);
+                               continue;
+                       }
+                       if (!strncmp(arg, "--after=", 8)) {
+                               revs->max_age = approxidate(arg + 8);
                                continue;
                        }
                        if (!strncmp(arg, "--min-age=", 10)) {
                                revs->min_age = atoi(arg + 10);
-                               revs->limited = 1;
+                               continue;
+                       }
+                       if (!strncmp(arg, "--before=", 9)) {
+                               revs->min_age = approxidate(arg + 9);
+                               continue;
+                       }
+                       if (!strncmp(arg, "--until=", 8)) {
+                               revs->min_age = approxidate(arg + 8);
                                continue;
                        }
                        if (!strcmp(arg, "--all")) {
@@ -281,13 +590,15 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                        }
                        if (!strcmp(arg, "--topo-order")) {
                                revs->topo_order = 1;
-                               revs->limited = 1;
                                continue;
                        }
                        if (!strcmp(arg, "--date-order")) {
                                revs->lifo = 0;
                                revs->topo_order = 1;
-                               revs->limited = 1;
+                               continue;
+                       }
+                       if (!strcmp(arg, "--parents")) {
+                               revs->parents = 1;
                                continue;
                        }
                        if (!strcmp(arg, "--dense")) {
@@ -302,6 +613,14 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                                revs->remove_empty_trees = 1;
                                continue;
                        }
+                       if (!strcmp(arg, "--no-merges")) {
+                               revs->no_merges = 1;
+                               continue;
+                       }
+                       if (!strcmp(arg, "--boundary")) {
+                               revs->boundary = 1;
+                               continue;
+                       }
                        if (!strcmp(arg, "--objects")) {
                                revs->tag_objects = 1;
                                revs->tree_objects = 1;
@@ -317,7 +636,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                        }
                        if (!strcmp(arg, "--unpacked")) {
                                revs->unpacked = 1;
-                               revs->limited = 1;
                                continue;
                        }
                        *unrecognized++ = arg;
@@ -327,15 +645,19 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                dotdot = strstr(arg, "..");
                if (dotdot) {
                        unsigned char from_sha1[20];
-                       char *next = dotdot + 2;
+                       const char *next = dotdot + 2;
+                       const char *this = arg;
                        *dotdot = 0;
                        if (!*next)
                                next = "HEAD";
-                       if (!get_sha1(arg, from_sha1) && !get_sha1(next, sha1)) {
+                       if (dotdot == arg)
+                               this = "HEAD";
+                       if (!get_sha1(this, from_sha1) &&
+                           !get_sha1(next, sha1)) {
                                struct commit *exclude;
                                struct commit *include;
 
-                               exclude = get_commit_reference(revs, arg, from_sha1, flags ^ UNINTERESTING);
+                               exclude = get_commit_reference(revs, this, from_sha1, flags ^ UNINTERESTING);
                                include = get_commit_reference(revs, next, sha1, flags);
                                if (!exclude || !include)
                                        die("Invalid revision range %s..%s", arg, next);
@@ -360,9 +682,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                        /* If we didn't have a "--", all filenames must exist */
                        for (j = i; j < argc; j++) {
                                if (lstat(argv[j], &st) < 0)
-                                       die("'%s': %s", arg, strerror(errno));
+                                       die("'%s': %s", argv[j], strerror(errno));
                        }
-                       revs->paths = get_pathspec(revs->prefix, argv + i);
+                       revs->prune_data = get_pathspec(revs->prefix, argv + i);
                        break;
                }
                commit = get_commit_reference(revs, arg, sha1, flags ^ local_flags);
@@ -376,8 +698,114 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
                commit = get_commit_reference(revs, def, sha1, 0);
                add_one_commit(commit, revs);
        }
-       if (revs->paths)
+
+       if (revs->topo_order || revs->unpacked)
                revs->limited = 1;
-       *unrecognized = NULL;
+
+       if (revs->prune_data) {
+               diff_tree_setup_paths(revs->prune_data);
+               revs->prune_fn = try_to_simplify_commit;
+
+               /*
+                * If we fix up parent data, we currently cannot
+                * do that on-the-fly.
+                */
+               if (revs->parents)
+                       revs->limited = 1;
+       }
+
        return left;
 }
+
+void prepare_revision_walk(struct rev_info *revs)
+{
+       sort_by_date(&revs->commits);
+       if (revs->limited)
+               limit_list(revs);
+       if (revs->topo_order)
+               sort_in_topological_order_fn(&revs->commits, revs->lifo,
+                                            revs->topo_setter,
+                                            revs->topo_getter);
+}
+
+static int rewrite_one(struct commit **pp)
+{
+       for (;;) {
+               struct commit *p = *pp;
+               if (p->object.flags & (TREECHANGE | UNINTERESTING))
+                       return 0;
+               if (!p->parents)
+                       return -1;
+               *pp = p->parents->item;
+       }
+}
+
+static void rewrite_parents(struct commit *commit)
+{
+       struct commit_list **pp = &commit->parents;
+       while (*pp) {
+               struct commit_list *parent = *pp;
+               if (rewrite_one(&parent->item) < 0) {
+                       *pp = parent->next;
+                       continue;
+               }
+               pp = &parent->next;
+       }
+}
+
+struct commit *get_revision(struct rev_info *revs)
+{
+       struct commit_list *list = revs->commits;
+
+       if (!list)
+               return NULL;
+
+       /* Check the max_count ... */
+       switch (revs->max_count) {
+       case -1:
+               break;
+       case 0:
+               return NULL;
+       default:
+               revs->max_count--;
+       }
+
+       do {
+               struct commit *commit = revs->commits->item;
+
+               revs->commits = revs->commits->next;
+
+               /*
+                * If we haven't done the list limiting, we need to look at
+                * the parents here. We also need to do the date-based limiting
+                * that we'd otherwise have done in limit_list().
+                */
+               if (!revs->limited) {
+                       if ((revs->unpacked &&
+                            has_sha1_pack(commit->object.sha1)) ||
+                           (revs->max_age != -1 &&
+                            (commit->date < revs->max_age)))
+                               continue;
+                       add_parents_to_list(revs, commit, &revs->commits);
+               }
+               if (commit->object.flags & SHOWN)
+                       continue;
+               if (!(commit->object.flags & BOUNDARY) &&
+                   (commit->object.flags & UNINTERESTING))
+                       continue;
+               if (revs->min_age != -1 && (commit->date > revs->min_age))
+                       continue;
+               if (revs->no_merges &&
+                   commit->parents && commit->parents->next)
+                       continue;
+               if (revs->prune_fn && revs->dense) {
+                       if (!(commit->object.flags & TREECHANGE))
+                               continue;
+                       if (revs->parents)
+                               rewrite_parents(commit);
+               }
+               commit->object.flags |= SHOWN;
+               return commit;
+       } while (revs->commits);
+       return NULL;
+}