Rework object refs tracking to reduce memory usage
authorSergey Vlasov <vsu@altlinux.ru>
Tue, 15 Nov 2005 16:08:08 +0000 (19:08 +0300)
committerJunio C Hamano <junkio@cox.net>
Tue, 15 Nov 2005 19:42:29 +0000 (11:42 -0800)
Store pointers to referenced objects in a variable sized array instead
of linked list. This cuts down memory usage of utilities which use
object references; e.g., git-fsck-objects --full on the git.git
repository consumes about 2 MB of memory tracked by Massif instead of
7 MB before the change. Object refs are still the biggest consumer of
memory (57%), but the malloc overhead for a single block instead of a
linked list is substantially smaller.

Signed-off-by: Sergey Vlasov <vsu@altlinux.ru>
Signed-off-by: Junio C Hamano <junkio@cox.net>
commit.c
fsck-objects.c
object.c
object.h
server-info.c
tag.c
tree.c
index ebf4db6416497ba6586d359c6d37c8ee942a1395..e867b86e6a10d64354226b04f71df98d7448e072 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -204,6 +204,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
        unsigned char parent[20];
        struct commit_list **pptr;
        struct commit_graft *graft;
+       unsigned n_refs = 0;
 
        if (item->object.parsed)
                return 0;
@@ -214,7 +215,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
                return error("bad tree pointer in commit %s\n", sha1_to_hex(item->object.sha1));
        item->tree = lookup_tree(parent);
        if (item->tree)
-               add_ref(&item->object, &item->tree->object);
+               n_refs++;
        bufptr += 46; /* "tree " + "hex sha1" + "\n" */
        pptr = &item->parents;
 
@@ -230,7 +231,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
                new_parent = lookup_commit(parent);
                if (new_parent) {
                        pptr = &commit_list_insert(new_parent, pptr)->next;
-                       add_ref(&item->object, &new_parent->object);
+                       n_refs++;
                }
        }
        if (graft) {
@@ -241,10 +242,22 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
                        if (!new_parent)
                                continue;
                        pptr = &commit_list_insert(new_parent, pptr)->next;
-                       add_ref(&item->object, &new_parent->object);
+                       n_refs++;
                }
        }
        item->date = parse_commit_date(bufptr);
+
+       if (track_object_refs) {
+               unsigned i = 0;
+               struct commit_list *p;
+               struct object_refs *refs = alloc_object_refs(n_refs);
+               if (item->tree)
+                       refs->ref[i++] = &item->tree->object;
+               for (p = item->parents; p; p = p->next)
+                       refs->ref[i++] = &p->item->object;
+               set_object_refs(&item->object, refs);
+       }
+
        return 0;
 }
 
index c1b279efcbb89ec75adb22ceca3edd4341095754..0433a1d0da6d6286c3db5a2395e2b2e60c9e9472 100644 (file)
@@ -56,7 +56,6 @@ static void check_connectivity(void)
        /* Look up all the requirements, warn about missing objects.. */
        for (i = 0; i < nr_objs; i++) {
                struct object *obj = objs[i];
-               struct object_list *refs;
 
                if (!obj->parsed) {
                        if (!standalone && has_sha1_file(obj->sha1))
@@ -67,14 +66,19 @@ static void check_connectivity(void)
                        continue;
                }
 
-               for (refs = obj->refs; refs; refs = refs->next) {
-                       if (refs->item->parsed ||
-                           (!standalone && has_sha1_file(refs->item->sha1)))
-                               continue;
-                       printf("broken link from %7s %s\n",
-                              obj->type, sha1_to_hex(obj->sha1));
-                       printf("              to %7s %s\n",
-                              refs->item->type, sha1_to_hex(refs->item->sha1));
+               if (obj->refs) {
+                       const struct object_refs *refs = obj->refs;
+                       unsigned j;
+                       for (j = 0; j < refs->count; j++) {
+                               struct object *ref = refs->ref[j];
+                               if (ref->parsed ||
+                                   (!standalone && has_sha1_file(ref->sha1)))
+                                       continue;
+                               printf("broken link from %7s %s\n",
+                                      obj->type, sha1_to_hex(obj->sha1));
+                               printf("              to %7s %s\n",
+                                      ref->type, sha1_to_hex(ref->sha1));
+                       }
                }
 
                if (show_unreachable && !(obj->flags & REACHABLE)) {
index 1fdebe012ba8a6db0b6eae443e40a7f74ae2d597..427e14cae2deb42138a439f7b2d69d3e06bb9417 100644 (file)
--- a/object.c
+++ b/object.c
@@ -67,40 +67,66 @@ void created_object(const unsigned char *sha1, struct object *obj)
        nr_objs++;
 }
 
-void add_ref(struct object *refer, struct object *target)
+struct object_refs *alloc_object_refs(unsigned count)
 {
-       struct object_list **pp, *p;
+       struct object_refs *refs;
+       size_t size = sizeof(*refs) + count*sizeof(struct object *);
 
-       if (!track_object_refs)
+       refs = xmalloc(size);
+       memset(refs, 0, size);
+       refs->count = count;
+       return refs;
+}
+
+static int compare_object_pointers(const void *a, const void *b)
+{
+       const struct object * const *pa = a;
+       const struct object * const *pb = b;
+       return *pa - *pb;
+}
+
+void set_object_refs(struct object *obj, struct object_refs *refs)
+{
+       unsigned int i, j;
+
+       /* Do not install empty list of references */
+       if (refs->count < 1) {
+               free(refs);
                return;
+       }
 
-       pp = &refer->refs;
-       while ((p = *pp) != NULL) {
-               if (p->item == target)
-                       return;
-               pp = &p->next;
+       /* Sort the list and filter out duplicates */
+       qsort(refs->ref, refs->count, sizeof(refs->ref[0]),
+             compare_object_pointers);
+       for (i = j = 1; i < refs->count; i++) {
+               if (refs->ref[i] != refs->ref[i - 1])
+                       refs->ref[j++] = refs->ref[i];
+       }
+       if (j < refs->count) {
+               /* Duplicates were found - reallocate list */
+               size_t size = sizeof(*refs) + j*sizeof(struct object *);
+               refs->count = j;
+               refs = xrealloc(refs, size);
        }
 
-       target->used = 1;
-       p = xmalloc(sizeof(*p));
-       p->item = target;
-       p->next = NULL;
-       *pp = p;
+       for (i = 0; i < refs->count; i++)
+               refs->ref[i]->used = 1;
+       obj->refs = refs;
 }
 
 void mark_reachable(struct object *obj, unsigned int mask)
 {
-       struct object_list *p = obj->refs;
-
        if (!track_object_refs)
                die("cannot do reachability with object refs turned off");
        /* If we've been here already, don't bother */
        if (obj->flags & mask)
                return;
        obj->flags |= mask;
-       while (p) {
-               mark_reachable(p->item, mask);
-               p = p->next;
+       if (obj->refs) {
+               const struct object_refs *refs = obj->refs;
+               unsigned i;
+               for (i = 0; i < refs->count; i++)
+                       mark_reachable(refs->ref[i], mask);
        }
 }
 
index 6accda33d8c8d6b8ef33a4df50bf350d9f088dc6..336d986b51831db7812fd9bbd6a818803935fde2 100644 (file)
--- a/object.h
+++ b/object.h
@@ -7,13 +7,18 @@ struct object_list {
        const char *name;
 };
 
+struct object_refs {
+       unsigned count;
+       struct object *ref[0];
+};
+
 struct object {
        unsigned parsed : 1;
        unsigned used : 1;
        unsigned int flags;
        unsigned char sha1[20];
        const char *type;
-       struct object_list *refs;
+       struct object_refs *refs;
        void *util;
 };
 
@@ -35,7 +40,8 @@ struct object *parse_object(const unsigned char *sha1);
 /** Returns the object, with potentially excess memory allocated. **/
 struct object *lookup_unknown_object(const unsigned  char *sha1);
 
-void add_ref(struct object *refer, struct object *target);
+struct object_refs *alloc_object_refs(unsigned count);
+void set_object_refs(struct object *obj, struct object_refs *refs);
 
 void mark_reachable(struct object *obj, unsigned int mask);
 
index 0cba8e19f7582fa5771ed6ab94a76e1d87efc53a..e4006f0b5bc14f39d28ac98120b2fcccec34de8d 100644 (file)
@@ -424,7 +424,6 @@ static void find_pack_info_one(int pack_ix)
 {
        unsigned char sha1[20];
        struct object *o;
-       struct object_list *ref;
        int i;
        struct packed_git *p = info[pack_ix]->p;
        int num = num_packed_objects(p);
@@ -437,8 +436,12 @@ static void find_pack_info_one(int pack_ix)
                        die("corrupt pack file %s?", p->pack_name);
                if ((o = lookup_object(sha1)) == NULL)
                        die("cannot parse %s", sha1_to_hex(sha1));
-               for (ref = o->refs; ref; ref = ref->next)
-                       ref->item->flags = 0;
+               if (o->refs) {
+                       struct object_refs *refs = o->refs;
+                       int j;
+                       for (j = 0; j < refs->count; j++)
+                               refs->ref[j]->flags = 0;
+               }
                o->flags = 0;
        }
 
@@ -448,8 +451,12 @@ static void find_pack_info_one(int pack_ix)
                        die("corrupt pack file %s?", p->pack_name);
                if ((o = lookup_object(sha1)) == NULL)
                        die("cannot find %s", sha1_to_hex(sha1));
-               for (ref = o->refs; ref; ref = ref->next)
-                       ref->item->flags |= REFERENCED;
+               if (o->refs) {
+                       struct object_refs *refs = o->refs;
+                       int j;
+                       for (j = 0; j < refs->count; j++)
+                               refs->ref[j]->flags |= REFERENCED;
+               }
                o->flags |= INTERNAL;
        }
 
@@ -460,8 +467,12 @@ static void find_pack_info_one(int pack_ix)
                        die("cannot find %s", sha1_to_hex(sha1));
 
                show(o, pack_ix);
-               for (ref = o->refs; ref; ref = ref->next)
-                       show(ref->item, pack_ix);
+               if (o->refs) {
+                       struct object_refs *refs = o->refs;
+                       int j;
+                       for (j = 0; j < refs->count; j++)
+                               show(refs->ref[j], pack_ix);
+               }
        }
 
 }
diff --git a/tag.c b/tag.c
index e574c4b7a496d3a1d113801214197a04d5c74d42..61ac434d6b6330e24900beab29c924ccffb886c1 100644 (file)
--- a/tag.c
+++ b/tag.c
@@ -75,8 +75,11 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
        item->tag[taglen] = '\0';
 
        item->tagged = lookup_object_type(object, type);
-       if (item->tagged)
-               add_ref(&item->object, item->tagged);
+       if (item->tagged && track_object_refs) {
+               struct object_refs *refs = alloc_object_refs(1);
+               refs->ref[0] = item->tagged;
+               set_object_refs(&item->object, refs);
+       }
 
        return 0;
 }
diff --git a/tree.c b/tree.c
index 315b6a5d1ce0e83fc75a399a3b74e51f46c5b392..8b42a07b208aceeaa4900c034eea8a5b6b7ac323 100644 (file)
--- a/tree.c
+++ b/tree.c
@@ -148,6 +148,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 {
        void *bufptr = buffer;
        struct tree_entry_list **list_p;
+       int n_refs = 0;
 
        if (item->object.parsed)
                return 0;
@@ -184,11 +185,21 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
                        obj = &entry->item.blob->object;
                }
                if (obj)
-                       add_ref(&item->object, obj);
+                       n_refs++;
                entry->parent = NULL; /* needs to be filled by the user */
                *list_p = entry;
                list_p = &entry->next;
        }
+
+       if (track_object_refs) {
+               struct tree_entry_list *entry;
+               unsigned i = 0;
+               struct object_refs *refs = alloc_object_refs(n_refs);
+               for (entry = item->entries; entry; entry = entry->next)
+                       refs->ref[i++] = entry->item.any;
+               set_object_refs(&item->object, refs);
+       }
+
        return 0;
 }