Clean up git-unpack-objects a bit
[gitweb.git] / sha1_file.c
index 7e4a3df3ad9f7be666256e3219651a5296da7aca..63cbdded86ca43e4c28626c4644037c3d1d2c694 100644 (file)
@@ -10,6 +10,7 @@
 #include <dirent.h>
 #include "cache.h"
 #include "delta.h"
+#include "pack.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -184,10 +185,7 @@ char *sha1_file_name(const unsigned char *sha1)
        return base;
 }
 
-static struct alternate_object_database {
-       char *base;
-       char *name;
-} *alt_odb;
+struct alternate_object_database *alt_odb;
 
 /*
  * Prepare alternate object database registry.
@@ -205,13 +203,15 @@ static struct alternate_object_database {
  * pointed by base fields of the array elements with one xmalloc();
  * the string pool immediately follows the array.
  */
-static void prepare_alt_odb(void)
+void prepare_alt_odb(void)
 {
        int pass, totlen, i;
        const char *cp, *last;
        char *op = NULL;
        const char *alt = gitenv(ALTERNATE_DB_ENVIRONMENT) ? : "";
 
+       if (alt_odb)
+               return;
        /* The first pass counts how large an area to allocate to
         * hold the entire alt_odb structure, including array of
         * structs and path buffers for them.  The second pass fills
@@ -258,8 +258,7 @@ static char *find_sha1_file(const unsigned char *sha1, struct stat *st)
 
        if (!stat(name, st))
                return name;
-       if (!alt_odb)
-               prepare_alt_odb();
+       prepare_alt_odb();
        for (i = 0; (name = alt_odb[i].name) != NULL; i++) {
                fill_sha1_path(name, sha1);
                if (!stat(alt_odb[i].base, st))
@@ -271,15 +270,7 @@ static char *find_sha1_file(const unsigned char *sha1, struct stat *st)
 #define PACK_MAX_SZ (1<<26)
 static int pack_used_ctr;
 static unsigned long pack_mapped;
-static struct packed_git {
-       struct packed_git *next;
-       unsigned long index_size;
-       unsigned long pack_size;
-       unsigned int *index_base;
-       void *pack_base;
-       unsigned int pack_last_used;
-       char pack_name[0]; /* something like ".git/objects/pack/xxxxx.pack" */
-} *packed_git;
+struct packed_git *packed_git;
 
 struct pack_entry {
        unsigned int offset;
@@ -311,7 +302,7 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_,
        index = idx_map;
 
        /* check index map */
-       if (idx_size < 4*256 + 20)
+       if (idx_size < 4*256 + 20 + 20)
                return error("index file too small");
        nr = 0;
        for (i = 0; i < 256; i++) {
@@ -336,12 +327,29 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_,
        return 0;
 }
 
-static void unuse_one_packed_git(void)
+static int unuse_one_packed_git(void)
 {
-       /* NOTYET */
+       struct packed_git *p, *lru = NULL;
+
+       for (p = packed_git; p; p = p->next) {
+               if (p->pack_use_cnt || !p->pack_base)
+                       continue;
+               if (!lru || p->pack_last_used < lru->pack_last_used)
+                       lru = p;
+       }
+       if (!lru)
+               return 0;
+       munmap(lru->pack_base, lru->pack_size);
+       lru->pack_base = NULL;
+       return 1;
 }
 
-static int use_packed_git(struct packed_git *p)
+void unuse_packed_git(struct packed_git *p)
+{
+       p->pack_use_cnt--;
+}
+
+int use_packed_git(struct packed_git *p)
 {
        if (!p->pack_base) {
                int fd;
@@ -349,28 +357,36 @@ static int use_packed_git(struct packed_git *p)
                void *map;
 
                pack_mapped += p->pack_size;
-               while (PACK_MAX_SZ < pack_mapped)
-                       unuse_one_packed_git();
+               while (PACK_MAX_SZ < pack_mapped && unuse_one_packed_git())
+                       ; /* nothing */
                fd = open(p->pack_name, O_RDONLY);
                if (fd < 0)
-                       return -1;
+                       die("packfile %s cannot be opened", p->pack_name);
                if (fstat(fd, &st)) {
                        close(fd);
-                       return -1;
+                       die("packfile %s cannot be opened", p->pack_name);
                }
                if (st.st_size != p->pack_size)
-                       return -1;
+                       die("packfile %s size mismatch.", p->pack_name);
                map = mmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
                close(fd);
                if (map == MAP_FAILED)
-                       return -1;
+                       die("packfile %s cannot be mapped.", p->pack_name);
                p->pack_base = map;
+
+               /* Check if the pack file matches with the index file.
+                * this is cheap.
+                */
+               if (memcmp((char*)(p->index_base) + p->index_size - 40,
+                          p->pack_base + p->pack_size - 20, 20))
+                       die("packfile %s does not match index.", p->pack_name);
        }
        p->pack_last_used = pack_used_ctr++;
+       p->pack_use_cnt++;
        return 0;
 }
 
-static struct packed_git *add_packed_git(char *path, int path_len)
+struct packed_git *add_packed_git(char *path, int path_len)
 {
        struct stat st;
        struct packed_git *p;
@@ -395,7 +411,9 @@ static struct packed_git *add_packed_git(char *path, int path_len)
        p->pack_size = st.st_size;
        p->index_base = idx_map;
        p->next = NULL;
+       p->pack_base = NULL;
        p->pack_last_used = 0;
+       p->pack_use_cnt = 0;
        return p;
 }
 
@@ -429,7 +447,7 @@ static void prepare_packed_git_one(char *objdir)
        }
 }
 
-static void prepare_packed_git(void)
+void prepare_packed_git(void)
 {
        int i;
        static int run_once = 0;
@@ -438,8 +456,7 @@ static void prepare_packed_git(void)
                return;
 
        prepare_packed_git_one(get_object_directory());
-       if (!alt_odb)
-               prepare_alt_odb();
+       prepare_alt_odb();
        for (i = 0; alt_odb[i].base != NULL; i++) {
                alt_odb[i].name[0] = 0;
                prepare_packed_git_one(alt_odb[i].base);
@@ -601,39 +618,116 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
        return unpack_sha1_rest(&stream, hdr, *size);
 }
 
-/* Returns 0 on fast-path success, returns 1 on deltified
- * and need to unpack to see info.
- */
+static int packed_delta_info(unsigned char *base_sha1,
+                            unsigned long delta_size,
+                            unsigned long left,
+                            char *type,
+                            unsigned long *sizep)
+{
+       const unsigned char *data;
+       unsigned char delta_head[64];
+       unsigned long result_size, base_size, verify_base_size;
+       z_stream stream;
+       int st;
+
+       if (left < 20)
+               die("truncated pack file");
+       if (sha1_object_info(base_sha1, type, &base_size))
+               die("cannot get info for delta-pack base");
+
+       memset(&stream, 0, sizeof(stream));
+
+       data = stream.next_in = base_sha1 + 20;
+       stream.avail_in = left - 20;
+       stream.next_out = delta_head;
+       stream.avail_out = sizeof(delta_head);
+
+       inflateInit(&stream);
+       st = inflate(&stream, Z_FINISH);
+       inflateEnd(&stream);
+       if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head))
+               die("delta data unpack-initial failed");
+
+       /* Examine the initial part of the delta to figure out
+        * the result size.  Verify the base size while we are at it.
+        */
+       data = delta_head;
+       verify_base_size = get_delta_hdr_size(&data);
+       if (verify_base_size != base_size)
+               die("delta base size mismatch");
+
+       /* Read the result size */
+       result_size = get_delta_hdr_size(&data);
+       *sizep = result_size;
+       return 0;
+}
+
+static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset,
+       enum object_type *type, unsigned long *sizep)
+{
+       unsigned shift;
+       unsigned char *pack, c;
+       unsigned long size;
+
+       if (offset >= p->pack_size)
+               die("object offset outside of pack file");
+
+       pack =  p->pack_base + offset;
+       c = *pack++;
+       offset++;
+       *type = (c >> 4) & 7;
+       size = c & 15;
+       shift = 4;
+       while (c & 0x80) {
+               if (offset >= p->pack_size)
+                       die("object offset outside of pack file");
+               c = *pack++;
+               offset++;
+               size += (c & 0x7f) << shift;
+               shift += 7;
+       }
+       *sizep = size;
+       return offset;
+}
+
 static int packed_object_info(struct pack_entry *entry,
                              char *type, unsigned long *sizep)
 {
        struct packed_git *p = entry->p;
        unsigned long offset, size, left;
        unsigned char *pack;
+       enum object_type kind;
+       int retval;
 
-       offset = entry->offset;
-       if (p->pack_size - 5 < offset)
-               die("object offset outside of pack file");
+       if (use_packed_git(p))
+               die("cannot map packed file");
+
+       offset = unpack_object_header(p, entry->offset, &kind, &size);
        pack = p->pack_base + offset;
-       size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
-       left = p->pack_size - offset - 5;
-       switch (*pack) {
-       case 'D':
-               return 1;
-               break;
-       case 'C':
+       left = p->pack_size - offset;
+
+       switch (kind) {
+       case OBJ_DELTA:
+               retval = packed_delta_info(pack, size, left, type, sizep);
+               unuse_packed_git(p);
+               return retval;
+       case OBJ_COMMIT:
                strcpy(type, "commit");
                break;
-       case 'T':
+       case OBJ_TREE:
                strcpy(type, "tree");
                break;
-       case 'B':
+       case OBJ_BLOB:
                strcpy(type, "blob");
                break;
+       case OBJ_TAG:
+               strcpy(type, "tag");
+               break;
        default:
                die("corrupted pack file");
        }
        *sizep = size;
+       unuse_packed_git(p);
        return 0;
 }
 
@@ -719,34 +813,55 @@ static void *unpack_entry(struct pack_entry *entry,
        struct packed_git *p = entry->p;
        unsigned long offset, size, left;
        unsigned char *pack;
-
-       offset = entry->offset;
-       if (p->pack_size - 5 < offset)
-               die("object offset outside of pack file");
+       enum object_type kind;
+       void *retval;
 
        if (use_packed_git(p))
                die("cannot map packed file");
 
+       offset = unpack_object_header(p, entry->offset, &kind, &size);
        pack = p->pack_base + offset;
-       size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
-       left = p->pack_size - offset - 5;
-       switch (*pack) {
-       case 'D':
-               return unpack_delta_entry(pack+5, size, left, type, sizep);
-       case 'C':
+       left = p->pack_size - offset;
+       switch (kind) {
+       case OBJ_DELTA:
+               retval = unpack_delta_entry(pack, size, left, type, sizep);
+               unuse_packed_git(p);
+               return retval;
+       case OBJ_COMMIT:
                strcpy(type, "commit");
                break;
-       case 'T':
+       case OBJ_TREE:
                strcpy(type, "tree");
                break;
-       case 'B':
+       case OBJ_BLOB:
                strcpy(type, "blob");
                break;
+       case OBJ_TAG:
+               strcpy(type, "tag");
+               break;
        default:
                die("corrupted pack file");
        }
        *sizep = size;
-       return unpack_non_delta_entry(pack+5, size, left);
+       retval = unpack_non_delta_entry(pack, size, left);
+       unuse_packed_git(p);
+       return retval;
+}
+
+int num_packed_objects(const struct packed_git *p)
+{
+       /* See check_packed_git_idx() */
+       return (p->index_size - 20 - 20 - 4*256) / 24;
+}
+
+int nth_packed_object_sha1(const struct packed_git *p, int n,
+                          unsigned char* sha1)
+{
+       void *index = p->index_base + 256;
+       if (n < 0 || num_packed_objects(p) <= n)
+               return -1;
+       memcpy(sha1, (index + 24 * n + 4), 20);
+       return 0;
 }
 
 static int find_pack_entry_1(const unsigned char *sha1,
@@ -891,31 +1006,46 @@ void *read_object_with_reference(const unsigned char *sha1,
        }
 }
 
+static char *write_sha1_file_prepare(void *buf,
+                                    unsigned long len,
+                                    const char *type,
+                                    unsigned char *sha1,
+                                    unsigned char *hdr,
+                                    int *hdrlen)
+{
+       SHA_CTX c;
+
+       /* Generate the header */
+       *hdrlen = sprintf((char *)hdr, "%s %lu", type, len)+1;
+
+       /* Sha1.. */
+       SHA1_Init(&c);
+       SHA1_Update(&c, hdr, *hdrlen);
+       SHA1_Update(&c, buf, len);
+       SHA1_Final(sha1, &c);
+
+       return sha1_file_name(sha1);
+}
+
 int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
 {
        int size;
        unsigned char *compressed;
        z_stream stream;
        unsigned char sha1[20];
-       SHA_CTX c;
        char *filename;
        static char tmpfile[PATH_MAX];
        unsigned char hdr[50];
        int fd, hdrlen, ret;
 
-       /* Generate the header */
-       hdrlen = sprintf((char *)hdr, "%s %lu", type, len)+1;
-
-       /* Sha1.. */
-       SHA1_Init(&c);
-       SHA1_Update(&c, hdr, hdrlen);
-       SHA1_Update(&c, buf, len);
-       SHA1_Final(sha1, &c);
-
+       /* Normally if we have it in the pack then we do not bother writing
+        * it out into .git/objects/??/?{38} file.
+        */
+       filename = write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
        if (returnsha1)
                memcpy(returnsha1, sha1, 20);
-
-       filename = sha1_file_name(sha1);
+       if (has_sha1_file(sha1))
+               return 0;
        fd = open(filename, O_RDONLY);
        if (fd >= 0) {
                /*