git-pack-refs --prune
[gitweb.git] / builtin-pack-objects.c
index 2301cd5c0f0e6d7a1dc662b9c352f844e2dce027..8d7a1209d5effe83eb93ad4d0f5088806d625c70 100644 (file)
@@ -9,10 +9,13 @@
 #include "pack.h"
 #include "csum-file.h"
 #include "tree-walk.h"
+#include "diff.h"
+#include "revision.h"
+#include "list-objects.h"
 #include <sys/time.h>
 #include <signal.h>
 
-static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
+static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
 
 struct object_entry {
        unsigned char sha1[20];
@@ -53,18 +56,20 @@ struct object_entry {
  */
 
 static unsigned char object_list_sha1[20];
-static int non_empty = 0;
-static int no_reuse_delta = 0;
-static int local = 0;
-static int incremental = 0;
+static int non_empty;
+static int no_reuse_delta;
+static int local;
+static int incremental;
 static struct object_entry **sorted_by_sha, **sorted_by_type;
-static struct object_entry *objects = NULL;
-static int nr_objects = 0, nr_alloc = 0, nr_result = 0;
+static struct object_entry *objects;
+static int nr_objects, nr_alloc, nr_result;
 static const char *base_name;
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
-static volatile sig_atomic_t progress_update = 0;
+static volatile sig_atomic_t progress_update;
 static int window = 10;
+static int pack_to_stdout;
+static int num_preferred_base;
 
 /*
  * The object names in objects array are hashed with this hashtable,
@@ -72,8 +77,8 @@ static int window = 10;
  * sorted_by_sha is also possible but this was easier to code and faster.
  * This hashtable is built after all the objects are seen.
  */
-static int *object_ix = NULL;
-static int object_ix_hashsz = 0;
+static int *object_ix;
+static int object_ix_hashsz;
 
 /*
  * Pack index for existing packs give us easy access to the offsets into
@@ -90,15 +95,15 @@ struct pack_revindex {
        struct packed_git *p;
        unsigned long *revindex;
 } *pack_revindex = NULL;
-static int pack_revindex_hashsz = 0;
+static int pack_revindex_hashsz;
 
 /*
  * stats
  */
-static int written = 0;
-static int written_delta = 0;
-static int reused = 0;
-static int reused_delta = 0;
+static int written;
+static int written_delta;
+static int reused;
+static int reused_delta;
 
 static int pack_revindex_ix(struct packed_git *p)
 {
@@ -242,6 +247,82 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
        return n;
 }
 
+static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
+{
+       z_stream stream;
+       unsigned char fakebuf[4096];
+       int st;
+
+       memset(&stream, 0, sizeof(stream));
+       stream.next_in = data;
+       stream.avail_in = len;
+       stream.next_out = fakebuf;
+       stream.avail_out = sizeof(fakebuf);
+       inflateInit(&stream);
+
+       while (1) {
+               st = inflate(&stream, Z_FINISH);
+               if (st == Z_STREAM_END || st == Z_OK) {
+                       st = (stream.total_out == expect &&
+                             stream.total_in == len) ? 0 : -1;
+                       break;
+               }
+               if (st != Z_BUF_ERROR) {
+                       st = -1;
+                       break;
+               }
+               stream.next_out = fakebuf;
+               stream.avail_out = sizeof(fakebuf);
+       }
+       inflateEnd(&stream);
+       return st;
+}
+
+/*
+ * we are going to reuse the existing pack entry data.  make
+ * sure it is not corrupt.
+ */
+static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len)
+{
+       enum object_type type;
+       unsigned long size, used;
+
+       if (pack_to_stdout)
+               return 0;
+
+       /* the caller has already called use_packed_git() for us,
+        * so it is safe to access the pack data from mmapped location.
+        * make sure the entry inflates correctly.
+        */
+       used = unpack_object_header_gently(data, len, &type, &size);
+       if (!used)
+               return -1;
+       if (type == OBJ_DELTA)
+               used += 20; /* skip base object name */
+       data += used;
+       len -= used;
+       return check_inflate(data, len, entry->size);
+}
+
+static int revalidate_loose_object(struct object_entry *entry,
+                                  unsigned char *map,
+                                  unsigned long mapsize)
+{
+       /* we already know this is a loose object with new type header. */
+       enum object_type type;
+       unsigned long size, used;
+
+       if (pack_to_stdout)
+               return 0;
+
+       used = unpack_object_header_gently(map, mapsize, &type, &size);
+       if (!used)
+               return -1;
+       map += used;
+       mapsize -= used;
+       return check_inflate(map, mapsize, size);
+}
+
 static unsigned long write_object(struct sha1file *f,
                                  struct object_entry *entry)
 {
@@ -270,7 +351,26 @@ static unsigned long write_object(struct sha1file *f,
                                 * and we do not need to deltify it.
                                 */
 
-       if (! to_reuse) {
+       if (!entry->in_pack && !entry->delta) {
+               unsigned char *map;
+               unsigned long mapsize;
+               map = map_sha1_file(entry->sha1, &mapsize);
+               if (map && !legacy_loose_object(map)) {
+                       /* We can copy straight into the pack file */
+                       if (revalidate_loose_object(entry, map, mapsize))
+                               die("corrupt loose object %s",
+                                   sha1_to_hex(entry->sha1));
+                       sha1write(f, map, mapsize);
+                       munmap(map, mapsize);
+                       written++;
+                       reused++;
+                       return mapsize;
+               }
+               if (map)
+                       munmap(map, mapsize);
+       }
+
+       if (!to_reuse) {
                buf = read_sha1_file(entry->sha1, type, &size);
                if (!buf)
                        die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -303,6 +403,9 @@ static unsigned long write_object(struct sha1file *f,
 
                datalen = find_packed_object_size(p, entry->in_pack_offset);
                buf = (char *) p->pack_base + entry->in_pack_offset;
+
+               if (revalidate_pack_entry(entry, buf, datalen))
+                       die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
                sha1write(f, buf, datalen);
                unuse_packed_git(p);
                hdrlen = 0; /* not really */
@@ -425,7 +528,7 @@ static int locate_object_entry_hash(const unsigned char *sha1)
        memcpy(&ui, sha1, sizeof(unsigned int));
        i = ui % object_ix_hashsz;
        while (0 < object_ix[i]) {
-               if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
+               if (!hashcmp(sha1, objects[object_ix[i] - 1].sha1))
                        return i;
                if (++i == object_ix_hashsz)
                        i = 0;
@@ -518,7 +621,7 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
        entry = objects + idx;
        nr_objects = idx + 1;
        memset(entry, 0, sizeof(*entry));
-       memcpy(entry->sha1, sha1, 20);
+       hashcpy(entry->sha1, sha1);
        entry->hash = hash;
 
        if (object_ix_hashsz * 3 <= nr_objects * 4)
@@ -591,7 +694,7 @@ static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
         */
        for (neigh = 0; neigh < 8; neigh++) {
                ent = pbase_tree_cache[my_ix];
-               if (ent && !memcmp(ent->sha1, sha1, 20)) {
+               if (ent && !hashcmp(ent->sha1, sha1)) {
                        ent->ref++;
                        return ent;
                }
@@ -633,7 +736,7 @@ static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
                free(ent->tree_data);
                nent = ent;
        }
-       memcpy(nent->sha1, sha1, 20);
+       hashcpy(nent->sha1, sha1);
        nent->tree_data = data;
        nent->tree_size = size;
        nent->ref = 1;
@@ -739,7 +842,7 @@ static int check_pbase_path(unsigned hash)
        return 0;
 }
 
-static void add_preferred_base_object(char *name, unsigned hash)
+static void add_preferred_base_object(const char *name, unsigned hash)
 {
        struct pbase_tree *it;
        int cmplen = name_cmp_len(name);
@@ -768,12 +871,15 @@ static void add_preferred_base(unsigned char *sha1)
        unsigned long size;
        unsigned char tree_sha1[20];
 
+       if (window <= num_preferred_base++)
+               return;
+
        data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
        if (!data)
                return;
 
        for (it = pbase_tree; it; it = it->next) {
-               if (!memcmp(it->pcache.sha1, tree_sha1, 20)) {
+               if (!hashcmp(it->pcache.sha1, tree_sha1)) {
                        free(data);
                        return;
                }
@@ -783,7 +889,7 @@ static void add_preferred_base(unsigned char *sha1)
        it->next = pbase_tree;
        pbase_tree = it;
 
-       memcpy(it->pcache.sha1, tree_sha1, 20);
+       hashcpy(it->pcache.sha1, tree_sha1);
        it->pcache.tree_data = data;
        it->pcache.tree_size = size;
 }
@@ -915,7 +1021,7 @@ static struct object_entry **create_sorted_list(entry_sort_t sort)
 
 static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 {
-       return memcmp(a->sha1, b->sha1, 20);
+       return hashcmp(a->sha1, b->sha1);
 }
 
 static struct object_entry **create_final_object_list(void)
@@ -1147,7 +1253,7 @@ static void prepare_pack(int window, int depth)
                find_deltas(sorted_by_type, window+1, depth);
 }
 
-static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
+static int reuse_cached_pack(unsigned char *sha1)
 {
        static const char cache[] = "pack-cache/pack-%s.%s";
        char *cached_pack, *cached_idx;
@@ -1227,14 +1333,105 @@ static int git_pack_config(const char *k, const char *v)
        return git_default_config(k, v);
 }
 
+static void read_object_list_from_stdin(void)
+{
+       char line[40 + 1 + PATH_MAX + 2];
+       unsigned char sha1[20];
+       unsigned hash;
+
+       for (;;) {
+               if (!fgets(line, sizeof(line), stdin)) {
+                       if (feof(stdin))
+                               break;
+                       if (!ferror(stdin))
+                               die("fgets returned NULL, not EOF, not error!");
+                       if (errno != EINTR)
+                               die("fgets: %s", strerror(errno));
+                       clearerr(stdin);
+                       continue;
+               }
+               if (line[0] == '-') {
+                       if (get_sha1_hex(line+1, sha1))
+                               die("expected edge sha1, got garbage:\n %s",
+                                   line);
+                       add_preferred_base(sha1);
+                       continue;
+               }
+               if (get_sha1_hex(line, sha1))
+                       die("expected sha1, got garbage:\n %s", line);
+
+               hash = name_hash(line+41);
+               add_preferred_base_object(line+41, hash);
+               add_object_entry(sha1, hash, 0);
+       }
+}
+
+static void show_commit(struct commit *commit)
+{
+       unsigned hash = name_hash("");
+       add_preferred_base_object("", hash);
+       add_object_entry(commit->object.sha1, hash, 0);
+}
+
+static void show_object(struct object_array_entry *p)
+{
+       unsigned hash = name_hash(p->name);
+       add_preferred_base_object(p->name, hash);
+       add_object_entry(p->item->sha1, hash, 0);
+}
+
+static void show_edge(struct commit *commit)
+{
+       add_preferred_base(commit->object.sha1);
+}
+
+static void get_object_list(int ac, const char **av)
+{
+       struct rev_info revs;
+       char line[1000];
+       int flags = 0;
+
+       init_revisions(&revs, NULL);
+       save_commit_buffer = 0;
+       track_object_refs = 0;
+       setup_revisions(ac, av, &revs, NULL);
+
+       while (fgets(line, sizeof(line), stdin) != NULL) {
+               int len = strlen(line);
+               if (line[len - 1] == '\n')
+                       line[--len] = 0;
+               if (!len)
+                       break;
+               if (*line == '-') {
+                       if (!strcmp(line, "--not")) {
+                               flags ^= UNINTERESTING;
+                               continue;
+                       }
+                       die("not a rev '%s'", line);
+               }
+               if (handle_revision_arg(line, &revs, flags, 1))
+                       die("bad revision '%s'", line);
+       }
+
+       prepare_revision_walk(&revs);
+       mark_edges_uninteresting(revs.commits, &revs, show_edge);
+       traverse_commit_list(&revs, show_commit, show_object);
+}
+
 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 {
        SHA_CTX ctx;
-       char line[40 + 1 + PATH_MAX + 2];
-       int depth = 10, pack_to_stdout = 0;
+       int depth = 10;
        struct object_entry **list;
-       int num_preferred_base = 0;
+       int use_internal_rev_list = 0;
+       int thin = 0;
        int i;
+       const char *rp_av[64];
+       int rp_ac;
+
+       rp_av[0] = "pack-objects";
+       rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
+       rp_ac = 2;
 
        git_config(git_pack_config);
 
@@ -1242,63 +1439,99 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        for (i = 1; i < argc; i++) {
                const char *arg = argv[i];
 
-               if (*arg == '-') {
-                       if (!strcmp("--non-empty", arg)) {
-                               non_empty = 1;
-                               continue;
-                       }
-                       if (!strcmp("--local", arg)) {
-                               local = 1;
-                               continue;
-                       }
-                       if (!strcmp("--progress", arg)) {
-                               progress = 1;
-                               continue;
-                       }
-                       if (!strcmp("--incremental", arg)) {
-                               incremental = 1;
-                               continue;
-                       }
-                       if (!strncmp("--window=", arg, 9)) {
-                               char *end;
-                               window = strtoul(arg+9, &end, 0);
-                               if (!arg[9] || *end)
-                                       usage(pack_usage);
-                               continue;
-                       }
-                       if (!strncmp("--depth=", arg, 8)) {
-                               char *end;
-                               depth = strtoul(arg+8, &end, 0);
-                               if (!arg[8] || *end)
-                                       usage(pack_usage);
-                               continue;
-                       }
-                       if (!strcmp("--progress", arg)) {
-                               progress = 1;
-                               continue;
-                       }
-                       if (!strcmp("-q", arg)) {
-                               progress = 0;
-                               continue;
-                       }
-                       if (!strcmp("--no-reuse-delta", arg)) {
-                               no_reuse_delta = 1;
-                               continue;
-                       }
-                       if (!strcmp("--stdout", arg)) {
-                               pack_to_stdout = 1;
-                               continue;
-                       }
-                       usage(pack_usage);
+               if (*arg != '-')
+                       break;
+
+               if (!strcmp("--non-empty", arg)) {
+                       non_empty = 1;
+                       continue;
+               }
+               if (!strcmp("--local", arg)) {
+                       local = 1;
+                       continue;
+               }
+               if (!strcmp("--progress", arg)) {
+                       progress = 1;
+                       continue;
+               }
+               if (!strcmp("--incremental", arg)) {
+                       incremental = 1;
+                       continue;
+               }
+               if (!strncmp("--window=", arg, 9)) {
+                       char *end;
+                       window = strtoul(arg+9, &end, 0);
+                       if (!arg[9] || *end)
+                               usage(pack_usage);
+                       continue;
+               }
+               if (!strncmp("--depth=", arg, 8)) {
+                       char *end;
+                       depth = strtoul(arg+8, &end, 0);
+                       if (!arg[8] || *end)
+                               usage(pack_usage);
+                       continue;
                }
-               if (base_name)
-                       usage(pack_usage);
-               base_name = arg;
+               if (!strcmp("--progress", arg)) {
+                       progress = 1;
+                       continue;
+               }
+               if (!strcmp("-q", arg)) {
+                       progress = 0;
+                       continue;
+               }
+               if (!strcmp("--no-reuse-delta", arg)) {
+                       no_reuse_delta = 1;
+                       continue;
+               }
+               if (!strcmp("--stdout", arg)) {
+                       pack_to_stdout = 1;
+                       continue;
+               }
+               if (!strcmp("--revs", arg)) {
+                       use_internal_rev_list = 1;
+                       continue;
+               }
+               if (!strcmp("--unpacked", arg) ||
+                   !strncmp("--unpacked=", arg, 11) ||
+                   !strcmp("--all", arg)) {
+                       use_internal_rev_list = 1;
+                       if (ARRAY_SIZE(rp_av) - 1 <= rp_ac)
+                               die("too many internal rev-list options");
+                       rp_av[rp_ac++] = arg;
+                       continue;
+               }
+               if (!strcmp("--thin", arg)) {
+                       use_internal_rev_list = 1;
+                       thin = 1;
+                       rp_av[1] = "--objects-edge";
+                       continue;
+               }
+               usage(pack_usage);
        }
 
+       /* Traditionally "pack-objects [options] base extra" failed;
+        * we would however want to take refs parameter that would
+        * have been given to upstream rev-list ourselves, which means
+        * we somehow want to say what the base name is.  So the
+        * syntax would be:
+        *
+        * pack-objects [options] base <refs...>
+        *
+        * in other words, we would treat the first non-option as the
+        * base_name and send everything else to the internal revision
+        * walker.
+        */
+
+       if (!pack_to_stdout)
+               base_name = argv[i++];
+
        if (pack_to_stdout != !base_name)
                usage(pack_usage);
 
+       if (!pack_to_stdout && thin)
+               die("--thin cannot be used to build an indexable pack.");
+
        prepare_packed_git();
 
        if (progress) {
@@ -1306,35 +1539,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                setup_progress_signal();
        }
 
-       for (;;) {
-               unsigned char sha1[20];
-               unsigned hash;
-
-               if (!fgets(line, sizeof(line), stdin)) {
-                       if (feof(stdin))
-                               break;
-                       if (!ferror(stdin))
-                               die("fgets returned NULL, not EOF, not error!");
-                       if (errno != EINTR)
-                               die("fgets: %s", strerror(errno));
-                       clearerr(stdin);
-                       continue;
-               }
-
-               if (line[0] == '-') {
-                       if (get_sha1_hex(line+1, sha1))
-                               die("expected edge sha1, got garbage:\n %s",
-                                   line+1);
-                       if (num_preferred_base++ < window)
-                               add_preferred_base(sha1);
-                       continue;
-               }
-               if (get_sha1_hex(line, sha1))
-                       die("expected sha1, got garbage:\n %s", line);
-               hash = name_hash(line+41);
-               add_preferred_base_object(line+41, hash);
-               add_object_entry(sha1, hash, 0);
+       if (!use_internal_rev_list)
+               read_object_list_from_stdin();
+       else {
+               rp_av[rp_ac] = NULL;
+               get_object_list(rp_ac, rp_av);
        }
+
        if (progress)
                fprintf(stderr, "Done counting %d objects.\n", nr_objects);
        sorted_by_sha = create_final_object_list();
@@ -1351,7 +1562,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (progress && (nr_objects != nr_result))
                fprintf(stderr, "Result has %d objects.\n", nr_result);
 
-       if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
+       if (reuse_cached_pack(object_list_sha1))
                ;
        else {
                if (nr_result)