Fix broken sha1 locking
[gitweb.git] / builtin-pack-objects.c
index 46f524dfc32a5eaea06df6e3502c83710a1c09bb..8d7a1209d5effe83eb93ad4d0f5088806d625c70 100644 (file)
@@ -9,10 +9,13 @@
 #include "pack.h"
 #include "csum-file.h"
 #include "tree-walk.h"
+#include "diff.h"
+#include "revision.h"
+#include "list-objects.h"
 #include <sys/time.h>
 #include <signal.h>
 
-static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
+static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
 
 struct object_entry {
        unsigned char sha1[20];
@@ -65,6 +68,8 @@ static unsigned char pack_file_sha1[20];
 static int progress = 1;
 static volatile sig_atomic_t progress_update;
 static int window = 10;
+static int pack_to_stdout;
+static int num_preferred_base;
 
 /*
  * The object names in objects array are hashed with this hashtable,
@@ -242,6 +247,82 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
        return n;
 }
 
+static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
+{
+       z_stream stream;
+       unsigned char fakebuf[4096];
+       int st;
+
+       memset(&stream, 0, sizeof(stream));
+       stream.next_in = data;
+       stream.avail_in = len;
+       stream.next_out = fakebuf;
+       stream.avail_out = sizeof(fakebuf);
+       inflateInit(&stream);
+
+       while (1) {
+               st = inflate(&stream, Z_FINISH);
+               if (st == Z_STREAM_END || st == Z_OK) {
+                       st = (stream.total_out == expect &&
+                             stream.total_in == len) ? 0 : -1;
+                       break;
+               }
+               if (st != Z_BUF_ERROR) {
+                       st = -1;
+                       break;
+               }
+               stream.next_out = fakebuf;
+               stream.avail_out = sizeof(fakebuf);
+       }
+       inflateEnd(&stream);
+       return st;
+}
+
+/*
+ * we are going to reuse the existing pack entry data.  make
+ * sure it is not corrupt.
+ */
+static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len)
+{
+       enum object_type type;
+       unsigned long size, used;
+
+       if (pack_to_stdout)
+               return 0;
+
+       /* the caller has already called use_packed_git() for us,
+        * so it is safe to access the pack data from mmapped location.
+        * make sure the entry inflates correctly.
+        */
+       used = unpack_object_header_gently(data, len, &type, &size);
+       if (!used)
+               return -1;
+       if (type == OBJ_DELTA)
+               used += 20; /* skip base object name */
+       data += used;
+       len -= used;
+       return check_inflate(data, len, entry->size);
+}
+
+static int revalidate_loose_object(struct object_entry *entry,
+                                  unsigned char *map,
+                                  unsigned long mapsize)
+{
+       /* we already know this is a loose object with new type header. */
+       enum object_type type;
+       unsigned long size, used;
+
+       if (pack_to_stdout)
+               return 0;
+
+       used = unpack_object_header_gently(map, mapsize, &type, &size);
+       if (!used)
+               return -1;
+       map += used;
+       mapsize -= used;
+       return check_inflate(map, mapsize, size);
+}
+
 static unsigned long write_object(struct sha1file *f,
                                  struct object_entry *entry)
 {
@@ -276,6 +357,9 @@ static unsigned long write_object(struct sha1file *f,
                map = map_sha1_file(entry->sha1, &mapsize);
                if (map && !legacy_loose_object(map)) {
                        /* We can copy straight into the pack file */
+                       if (revalidate_loose_object(entry, map, mapsize))
+                               die("corrupt loose object %s",
+                                   sha1_to_hex(entry->sha1));
                        sha1write(f, map, mapsize);
                        munmap(map, mapsize);
                        written++;
@@ -286,7 +370,7 @@ static unsigned long write_object(struct sha1file *f,
                        munmap(map, mapsize);
        }
 
-       if (! to_reuse) {
+       if (!to_reuse) {
                buf = read_sha1_file(entry->sha1, type, &size);
                if (!buf)
                        die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -319,6 +403,9 @@ static unsigned long write_object(struct sha1file *f,
 
                datalen = find_packed_object_size(p, entry->in_pack_offset);
                buf = (char *) p->pack_base + entry->in_pack_offset;
+
+               if (revalidate_pack_entry(entry, buf, datalen))
+                       die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
                sha1write(f, buf, datalen);
                unuse_packed_git(p);
                hdrlen = 0; /* not really */
@@ -755,7 +842,7 @@ static int check_pbase_path(unsigned hash)
        return 0;
 }
 
-static void add_preferred_base_object(char *name, unsigned hash)
+static void add_preferred_base_object(const char *name, unsigned hash)
 {
        struct pbase_tree *it;
        int cmplen = name_cmp_len(name);
@@ -784,6 +871,9 @@ static void add_preferred_base(unsigned char *sha1)
        unsigned long size;
        unsigned char tree_sha1[20];
 
+       if (window <= num_preferred_base++)
+               return;
+
        data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
        if (!data)
                return;
@@ -1163,7 +1253,7 @@ static void prepare_pack(int window, int depth)
                find_deltas(sorted_by_type, window+1, depth);
 }
 
-static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
+static int reuse_cached_pack(unsigned char *sha1)
 {
        static const char cache[] = "pack-cache/pack-%s.%s";
        char *cached_pack, *cached_idx;
@@ -1243,14 +1333,105 @@ static int git_pack_config(const char *k, const char *v)
        return git_default_config(k, v);
 }
 
+static void read_object_list_from_stdin(void)
+{
+       char line[40 + 1 + PATH_MAX + 2];
+       unsigned char sha1[20];
+       unsigned hash;
+
+       for (;;) {
+               if (!fgets(line, sizeof(line), stdin)) {
+                       if (feof(stdin))
+                               break;
+                       if (!ferror(stdin))
+                               die("fgets returned NULL, not EOF, not error!");
+                       if (errno != EINTR)
+                               die("fgets: %s", strerror(errno));
+                       clearerr(stdin);
+                       continue;
+               }
+               if (line[0] == '-') {
+                       if (get_sha1_hex(line+1, sha1))
+                               die("expected edge sha1, got garbage:\n %s",
+                                   line);
+                       add_preferred_base(sha1);
+                       continue;
+               }
+               if (get_sha1_hex(line, sha1))
+                       die("expected sha1, got garbage:\n %s", line);
+
+               hash = name_hash(line+41);
+               add_preferred_base_object(line+41, hash);
+               add_object_entry(sha1, hash, 0);
+       }
+}
+
+static void show_commit(struct commit *commit)
+{
+       unsigned hash = name_hash("");
+       add_preferred_base_object("", hash);
+       add_object_entry(commit->object.sha1, hash, 0);
+}
+
+static void show_object(struct object_array_entry *p)
+{
+       unsigned hash = name_hash(p->name);
+       add_preferred_base_object(p->name, hash);
+       add_object_entry(p->item->sha1, hash, 0);
+}
+
+static void show_edge(struct commit *commit)
+{
+       add_preferred_base(commit->object.sha1);
+}
+
+static void get_object_list(int ac, const char **av)
+{
+       struct rev_info revs;
+       char line[1000];
+       int flags = 0;
+
+       init_revisions(&revs, NULL);
+       save_commit_buffer = 0;
+       track_object_refs = 0;
+       setup_revisions(ac, av, &revs, NULL);
+
+       while (fgets(line, sizeof(line), stdin) != NULL) {
+               int len = strlen(line);
+               if (line[len - 1] == '\n')
+                       line[--len] = 0;
+               if (!len)
+                       break;
+               if (*line == '-') {
+                       if (!strcmp(line, "--not")) {
+                               flags ^= UNINTERESTING;
+                               continue;
+                       }
+                       die("not a rev '%s'", line);
+               }
+               if (handle_revision_arg(line, &revs, flags, 1))
+                       die("bad revision '%s'", line);
+       }
+
+       prepare_revision_walk(&revs);
+       mark_edges_uninteresting(revs.commits, &revs, show_edge);
+       traverse_commit_list(&revs, show_commit, show_object);
+}
+
 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 {
        SHA_CTX ctx;
-       char line[40 + 1 + PATH_MAX + 2];
-       int depth = 10, pack_to_stdout = 0;
+       int depth = 10;
        struct object_entry **list;
-       int num_preferred_base = 0;
+       int use_internal_rev_list = 0;
+       int thin = 0;
        int i;
+       const char *rp_av[64];
+       int rp_ac;
+
+       rp_av[0] = "pack-objects";
+       rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
+       rp_ac = 2;
 
        git_config(git_pack_config);
 
@@ -1258,63 +1439,99 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        for (i = 1; i < argc; i++) {
                const char *arg = argv[i];
 
-               if (*arg == '-') {
-                       if (!strcmp("--non-empty", arg)) {
-                               non_empty = 1;
-                               continue;
-                       }
-                       if (!strcmp("--local", arg)) {
-                               local = 1;
-                               continue;
-                       }
-                       if (!strcmp("--progress", arg)) {
-                               progress = 1;
-                               continue;
-                       }
-                       if (!strcmp("--incremental", arg)) {
-                               incremental = 1;
-                               continue;
-                       }
-                       if (!strncmp("--window=", arg, 9)) {
-                               char *end;
-                               window = strtoul(arg+9, &end, 0);
-                               if (!arg[9] || *end)
-                                       usage(pack_usage);
-                               continue;
-                       }
-                       if (!strncmp("--depth=", arg, 8)) {
-                               char *end;
-                               depth = strtoul(arg+8, &end, 0);
-                               if (!arg[8] || *end)
-                                       usage(pack_usage);
-                               continue;
-                       }
-                       if (!strcmp("--progress", arg)) {
-                               progress = 1;
-                               continue;
-                       }
-                       if (!strcmp("-q", arg)) {
-                               progress = 0;
-                               continue;
-                       }
-                       if (!strcmp("--no-reuse-delta", arg)) {
-                               no_reuse_delta = 1;
-                               continue;
-                       }
-                       if (!strcmp("--stdout", arg)) {
-                               pack_to_stdout = 1;
-                               continue;
-                       }
-                       usage(pack_usage);
+               if (*arg != '-')
+                       break;
+
+               if (!strcmp("--non-empty", arg)) {
+                       non_empty = 1;
+                       continue;
+               }
+               if (!strcmp("--local", arg)) {
+                       local = 1;
+                       continue;
+               }
+               if (!strcmp("--progress", arg)) {
+                       progress = 1;
+                       continue;
+               }
+               if (!strcmp("--incremental", arg)) {
+                       incremental = 1;
+                       continue;
+               }
+               if (!strncmp("--window=", arg, 9)) {
+                       char *end;
+                       window = strtoul(arg+9, &end, 0);
+                       if (!arg[9] || *end)
+                               usage(pack_usage);
+                       continue;
+               }
+               if (!strncmp("--depth=", arg, 8)) {
+                       char *end;
+                       depth = strtoul(arg+8, &end, 0);
+                       if (!arg[8] || *end)
+                               usage(pack_usage);
+                       continue;
+               }
+               if (!strcmp("--progress", arg)) {
+                       progress = 1;
+                       continue;
+               }
+               if (!strcmp("-q", arg)) {
+                       progress = 0;
+                       continue;
+               }
+               if (!strcmp("--no-reuse-delta", arg)) {
+                       no_reuse_delta = 1;
+                       continue;
+               }
+               if (!strcmp("--stdout", arg)) {
+                       pack_to_stdout = 1;
+                       continue;
+               }
+               if (!strcmp("--revs", arg)) {
+                       use_internal_rev_list = 1;
+                       continue;
                }
-               if (base_name)
-                       usage(pack_usage);
-               base_name = arg;
+               if (!strcmp("--unpacked", arg) ||
+                   !strncmp("--unpacked=", arg, 11) ||
+                   !strcmp("--all", arg)) {
+                       use_internal_rev_list = 1;
+                       if (ARRAY_SIZE(rp_av) - 1 <= rp_ac)
+                               die("too many internal rev-list options");
+                       rp_av[rp_ac++] = arg;
+                       continue;
+               }
+               if (!strcmp("--thin", arg)) {
+                       use_internal_rev_list = 1;
+                       thin = 1;
+                       rp_av[1] = "--objects-edge";
+                       continue;
+               }
+               usage(pack_usage);
        }
 
+       /* Traditionally "pack-objects [options] base extra" failed;
+        * we would however want to take refs parameter that would
+        * have been given to upstream rev-list ourselves, which means
+        * we somehow want to say what the base name is.  So the
+        * syntax would be:
+        *
+        * pack-objects [options] base <refs...>
+        *
+        * in other words, we would treat the first non-option as the
+        * base_name and send everything else to the internal revision
+        * walker.
+        */
+
+       if (!pack_to_stdout)
+               base_name = argv[i++];
+
        if (pack_to_stdout != !base_name)
                usage(pack_usage);
 
+       if (!pack_to_stdout && thin)
+               die("--thin cannot be used to build an indexable pack.");
+
        prepare_packed_git();
 
        if (progress) {
@@ -1322,35 +1539,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                setup_progress_signal();
        }
 
-       for (;;) {
-               unsigned char sha1[20];
-               unsigned hash;
-
-               if (!fgets(line, sizeof(line), stdin)) {
-                       if (feof(stdin))
-                               break;
-                       if (!ferror(stdin))
-                               die("fgets returned NULL, not EOF, not error!");
-                       if (errno != EINTR)
-                               die("fgets: %s", strerror(errno));
-                       clearerr(stdin);
-                       continue;
-               }
-
-               if (line[0] == '-') {
-                       if (get_sha1_hex(line+1, sha1))
-                               die("expected edge sha1, got garbage:\n %s",
-                                   line+1);
-                       if (num_preferred_base++ < window)
-                               add_preferred_base(sha1);
-                       continue;
-               }
-               if (get_sha1_hex(line, sha1))
-                       die("expected sha1, got garbage:\n %s", line);
-               hash = name_hash(line+41);
-               add_preferred_base_object(line+41, hash);
-               add_object_entry(sha1, hash, 0);
+       if (!use_internal_rev_list)
+               read_object_list_from_stdin();
+       else {
+               rp_av[rp_ac] = NULL;
+               get_object_list(rp_ac, rp_av);
        }
+
        if (progress)
                fprintf(stderr, "Done counting %d objects.\n", nr_objects);
        sorted_by_sha = create_final_object_list();
@@ -1367,7 +1562,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (progress && (nr_objects != nr_result))
                fprintf(stderr, "Result has %d objects.\n", nr_result);
 
-       if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
+       if (reuse_cached_pack(object_list_sha1))
                ;
        else {
                if (nr_result)