reachable.con commit prune: keep objects reachable from recent objects (d3038d2)
   1#include "cache.h"
   2#include "refs.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "blob.h"
   6#include "diff.h"
   7#include "revision.h"
   8#include "reachable.h"
   9#include "cache-tree.h"
  10#include "progress.h"
  11#include "list-objects.h"
  12
  13struct connectivity_progress {
  14        struct progress *progress;
  15        unsigned long count;
  16};
  17
  18static void update_progress(struct connectivity_progress *cp)
  19{
  20        cp->count++;
  21        if ((cp->count & 1023) == 0)
  22                display_progress(cp->progress, cp->count);
  23}
  24
  25static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data)
  26{
  27        struct object *object = parse_object_or_die(sha1, path);
  28        struct rev_info *revs = (struct rev_info *)cb_data;
  29
  30        add_pending_object(revs, object, "");
  31
  32        return 0;
  33}
  34
  35static void add_one_tree(const unsigned char *sha1, struct rev_info *revs)
  36{
  37        struct tree *tree = lookup_tree(sha1);
  38        if (tree)
  39                add_pending_object(revs, &tree->object, "");
  40}
  41
  42static void add_cache_tree(struct cache_tree *it, struct rev_info *revs)
  43{
  44        int i;
  45
  46        if (it->entry_count >= 0)
  47                add_one_tree(it->sha1, revs);
  48        for (i = 0; i < it->subtree_nr; i++)
  49                add_cache_tree(it->down[i]->cache_tree, revs);
  50}
  51
  52static void add_cache_refs(struct rev_info *revs)
  53{
  54        int i;
  55
  56        read_cache();
  57        for (i = 0; i < active_nr; i++) {
  58                struct blob *blob;
  59
  60                /*
  61                 * The index can contain blobs and GITLINKs, GITLINKs are hashes
  62                 * that don't actually point to objects in the repository, it's
  63                 * almost guaranteed that they are NOT blobs, so we don't call
  64                 * lookup_blob() on them, to avoid populating the hash table
  65                 * with invalid information
  66                 */
  67                if (S_ISGITLINK(active_cache[i]->ce_mode))
  68                        continue;
  69
  70                blob = lookup_blob(active_cache[i]->sha1);
  71                if (blob)
  72                        blob->object.flags |= SEEN;
  73
  74                /*
  75                 * We could add the blobs to the pending list, but quite
  76                 * frankly, we don't care. Once we've looked them up, and
  77                 * added them as objects, we've really done everything
  78                 * there is to do for a blob
  79                 */
  80        }
  81        if (active_cache_tree)
  82                add_cache_tree(active_cache_tree, revs);
  83}
  84
  85/*
  86 * The traversal will have already marked us as SEEN, so we
  87 * only need to handle any progress reporting here.
  88 */
  89static void mark_object(struct object *obj, const struct name_path *path,
  90                        const char *name, void *data)
  91{
  92        update_progress(data);
  93}
  94
  95static void mark_commit(struct commit *c, void *data)
  96{
  97        mark_object(&c->object, NULL, NULL, data);
  98}
  99
 100struct recent_data {
 101        struct rev_info *revs;
 102        unsigned long timestamp;
 103};
 104
 105static void add_recent_object(const unsigned char *sha1,
 106                              unsigned long mtime,
 107                              struct recent_data *data)
 108{
 109        struct object *obj;
 110        enum object_type type;
 111
 112        if (mtime <= data->timestamp)
 113                return;
 114
 115        /*
 116         * We do not want to call parse_object here, because
 117         * inflating blobs and trees could be very expensive.
 118         * However, we do need to know the correct type for
 119         * later processing, and the revision machinery expects
 120         * commits and tags to have been parsed.
 121         */
 122        type = sha1_object_info(sha1, NULL);
 123        if (type < 0)
 124                die("unable to get object info for %s", sha1_to_hex(sha1));
 125
 126        switch (type) {
 127        case OBJ_TAG:
 128        case OBJ_COMMIT:
 129                obj = parse_object_or_die(sha1, NULL);
 130                break;
 131        case OBJ_TREE:
 132                obj = (struct object *)lookup_tree(sha1);
 133                break;
 134        case OBJ_BLOB:
 135                obj = (struct object *)lookup_blob(sha1);
 136                break;
 137        default:
 138                die("unknown object type for %s: %s",
 139                    sha1_to_hex(sha1), typename(type));
 140        }
 141
 142        if (!obj)
 143                die("unable to lookup %s", sha1_to_hex(sha1));
 144
 145        add_pending_object(data->revs, obj, "");
 146}
 147
 148static int add_recent_loose(const unsigned char *sha1,
 149                            const char *path, void *data)
 150{
 151        struct stat st;
 152        struct object *obj = lookup_object(sha1);
 153
 154        if (obj && obj->flags & SEEN)
 155                return 0;
 156
 157        if (stat(path, &st) < 0) {
 158                /*
 159                 * It's OK if an object went away during our iteration; this
 160                 * could be due to a simultaneous repack. But anything else
 161                 * we should abort, since we might then fail to mark objects
 162                 * which should not be pruned.
 163                 */
 164                if (errno == ENOENT)
 165                        return 0;
 166                return error("unable to stat %s: %s",
 167                             sha1_to_hex(sha1), strerror(errno));
 168        }
 169
 170        add_recent_object(sha1, st.st_mtime, data);
 171        return 0;
 172}
 173
 174static int add_recent_packed(const unsigned char *sha1,
 175                             struct packed_git *p, uint32_t pos,
 176                             void *data)
 177{
 178        struct object *obj = lookup_object(sha1);
 179
 180        if (obj && obj->flags & SEEN)
 181                return 0;
 182        add_recent_object(sha1, p->mtime, data);
 183        return 0;
 184}
 185
 186static int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
 187                                                  unsigned long timestamp)
 188{
 189        struct recent_data data;
 190        int r;
 191
 192        data.revs = revs;
 193        data.timestamp = timestamp;
 194
 195        r = for_each_loose_object(add_recent_loose, &data);
 196        if (r)
 197                return r;
 198        return for_each_packed_object(add_recent_packed, &data);
 199}
 200
 201void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
 202                            unsigned long mark_recent,
 203                            struct progress *progress)
 204{
 205        struct connectivity_progress cp;
 206
 207        /*
 208         * Set up revision parsing, and mark us as being interested
 209         * in all object types, not just commits.
 210         */
 211        revs->tag_objects = 1;
 212        revs->blob_objects = 1;
 213        revs->tree_objects = 1;
 214
 215        /* Add all refs from the index file */
 216        add_cache_refs(revs);
 217
 218        /* Add all external refs */
 219        for_each_ref(add_one_ref, revs);
 220
 221        /* detached HEAD is not included in the list above */
 222        head_ref(add_one_ref, revs);
 223
 224        /* Add all reflog info */
 225        if (mark_reflog)
 226                add_reflogs_to_pending(revs, 0);
 227
 228        cp.progress = progress;
 229        cp.count = 0;
 230
 231        /*
 232         * Set up the revision walk - this will move all commits
 233         * from the pending list to the commit walking list.
 234         */
 235        if (prepare_revision_walk(revs))
 236                die("revision walk setup failed");
 237        traverse_commit_list(revs, mark_commit, mark_object, &cp);
 238
 239        if (mark_recent) {
 240                revs->ignore_missing_links = 1;
 241                if (add_unseen_recent_objects_to_traversal(revs, mark_recent))
 242                        die("unable to mark recent objects");
 243                if (prepare_revision_walk(revs))
 244                        die("revision walk setup failed");
 245                traverse_commit_list(revs, mark_commit, mark_object, &cp);
 246        }
 247
 248        display_progress(cp.progress, cp.count);
 249}