read-cache.c: read prefix-compressed names in index on-disk version v4
[gitweb.git] / read-cache.c
index 27e9fc6ee86005d4fd6cec5b3c2ae29dc2fd6bf3..1c173f7a63a1670530e3c30220635c85e60c1653 100644 (file)
@@ -12,6 +12,8 @@
 #include "commit.h"
 #include "blob.h"
 #include "resolve-undo.h"
+#include "strbuf.h"
+#include "varint.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
 
@@ -1120,11 +1122,16 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p
                struct cache_entry *ce, *new;
                int cache_errno = 0;
                int changed = 0;
+               int filtered = 0;
 
                ce = istate->cache[i];
                if (ignore_submodules && S_ISGITLINK(ce->ce_mode))
                        continue;
 
+               if (pathspec &&
+                   !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
+                       filtered = 1;
+
                if (ce_stage(ce)) {
                        while ((i < istate->cache_nr) &&
                               ! strcmp(istate->cache[i]->name, ce->name))
@@ -1132,12 +1139,14 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p
                        i--;
                        if (allow_unmerged)
                                continue;
-                       show_file(unmerged_fmt, ce->name, in_porcelain, &first, header_msg);
+                       if (!filtered)
+                               show_file(unmerged_fmt, ce->name, in_porcelain,
+                                         &first, header_msg);
                        has_errors = 1;
                        continue;
                }
 
-               if (pathspec && !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
+               if (filtered)
                        continue;
 
                new = refresh_cache_ent(istate, ce, options, &cache_errno, &changed);
@@ -1182,15 +1191,72 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
        return refresh_cache_ent(&the_index, ce, really, NULL, NULL);
 }
 
+
+/*****************************************************************
+ * Index File I/O
+ *****************************************************************/
+
+/*
+ * dev/ino/uid/gid/size are also just tracked to the low 32 bits
+ * Again - this is just a (very strong in practice) heuristic that
+ * the inode hasn't changed.
+ *
+ * We save the fields in big-endian order to allow using the
+ * index file over NFS transparently.
+ */
+struct ondisk_cache_entry {
+       struct cache_time ctime;
+       struct cache_time mtime;
+       unsigned int dev;
+       unsigned int ino;
+       unsigned int mode;
+       unsigned int uid;
+       unsigned int gid;
+       unsigned int size;
+       unsigned char sha1[20];
+       unsigned short flags;
+       char name[FLEX_ARRAY]; /* more */
+};
+
+/*
+ * This struct is used when CE_EXTENDED bit is 1
+ * The struct must match ondisk_cache_entry exactly from
+ * ctime till flags
+ */
+struct ondisk_cache_entry_extended {
+       struct cache_time ctime;
+       struct cache_time mtime;
+       unsigned int dev;
+       unsigned int ino;
+       unsigned int mode;
+       unsigned int uid;
+       unsigned int gid;
+       unsigned int size;
+       unsigned char sha1[20];
+       unsigned short flags;
+       unsigned short flags2;
+       char name[FLEX_ARRAY]; /* more */
+};
+
+/* These are only used for v3 or lower */
+#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
+#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
+#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
+#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
+                           ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
+                           ondisk_cache_entry_size(ce_namelen(ce)))
+
 static int verify_hdr(struct cache_header *hdr, unsigned long size)
 {
        git_SHA_CTX c;
        unsigned char sha1[20];
+       int hdr_version;
 
        if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
                return error("bad signature");
-       if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3))
-               return error("bad index version");
+       hdr_version = ntohl(hdr->hdr_version);
+       if (hdr_version < 2 || 4 < hdr_version)
+               return error("bad index version %d", hdr_version);
        git_SHA1_Init(&c);
        git_SHA1_Update(&c, hdr, size - 20);
        git_SHA1_Final(sha1, &c);
@@ -1224,63 +1290,115 @@ int read_index(struct index_state *istate)
        return read_index_from(istate, get_index_file());
 }
 
-static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce)
+#ifndef NEEDS_ALIGNED_ACCESS
+#define ntoh_s(var) ntohs(var)
+#define ntoh_l(var) ntohl(var)
+#else
+static inline uint16_t ntoh_s_force_align(void *p)
+{
+       uint16_t x;
+       memcpy(&x, p, sizeof(x));
+       return ntohs(x);
+}
+static inline uint32_t ntoh_l_force_align(void *p)
+{
+       uint32_t x;
+       memcpy(&x, p, sizeof(x));
+       return ntohl(x);
+}
+#define ntoh_s(var) ntoh_s_force_align(&(var))
+#define ntoh_l(var) ntoh_l_force_align(&(var))
+#endif
+
+static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *ondisk,
+                                                  unsigned int flags,
+                                                  const char *name,
+                                                  size_t len)
+{
+       struct cache_entry *ce = xmalloc(cache_entry_size(len));
+
+       ce->ce_ctime.sec = ntoh_l(ondisk->ctime.sec);
+       ce->ce_mtime.sec = ntoh_l(ondisk->mtime.sec);
+       ce->ce_ctime.nsec = ntoh_l(ondisk->ctime.nsec);
+       ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec);
+       ce->ce_dev   = ntoh_l(ondisk->dev);
+       ce->ce_ino   = ntoh_l(ondisk->ino);
+       ce->ce_mode  = ntoh_l(ondisk->mode);
+       ce->ce_uid   = ntoh_l(ondisk->uid);
+       ce->ce_gid   = ntoh_l(ondisk->gid);
+       ce->ce_size  = ntoh_l(ondisk->size);
+       ce->ce_flags = flags;
+       hashcpy(ce->sha1, ondisk->sha1);
+       memcpy(ce->name, name, len);
+       ce->name[len] = '\0';
+       return ce;
+}
+
+/*
+ * Adjacent cache entries tend to share the leading paths, so it makes
+ * sense to only store the differences in later entries.  In the v4
+ * on-disk format of the index, each on-disk cache entry stores the
+ * number of bytes to be stripped from the end of the previous name,
+ * and the bytes to append to the result, to come up with its name.
+ */
+static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
+{
+       const unsigned char *ep, *cp = (const unsigned char *)cp_;
+       size_t len = decode_varint(&cp);
+
+       if (name->len < len)
+               die("malformed name field in the index");
+       strbuf_remove(name, name->len - len, len);
+       for (ep = cp; *ep; ep++)
+               ; /* find the end */
+       strbuf_add(name, cp, ep - cp);
+       return (const char *)ep + 1 - cp_;
+}
+
+static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
+                                           unsigned long *ent_size,
+                                           struct strbuf *previous_name)
 {
+       struct cache_entry *ce;
        size_t len;
        const char *name;
+       unsigned int flags;
 
-       ce->ce_ctime.sec = ntohl(ondisk->ctime.sec);
-       ce->ce_mtime.sec = ntohl(ondisk->mtime.sec);
-       ce->ce_ctime.nsec = ntohl(ondisk->ctime.nsec);
-       ce->ce_mtime.nsec = ntohl(ondisk->mtime.nsec);
-       ce->ce_dev   = ntohl(ondisk->dev);
-       ce->ce_ino   = ntohl(ondisk->ino);
-       ce->ce_mode  = ntohl(ondisk->mode);
-       ce->ce_uid   = ntohl(ondisk->uid);
-       ce->ce_gid   = ntohl(ondisk->gid);
-       ce->ce_size  = ntohl(ondisk->size);
        /* On-disk flags are just 16 bits */
-       ce->ce_flags = ntohs(ondisk->flags);
-
-       hashcpy(ce->sha1, ondisk->sha1);
+       flags = ntoh_s(ondisk->flags);
+       len = flags & CE_NAMEMASK;
 
-       len = ce->ce_flags & CE_NAMEMASK;
-
-       if (ce->ce_flags & CE_EXTENDED) {
+       if (flags & CE_EXTENDED) {
                struct ondisk_cache_entry_extended *ondisk2;
                int extended_flags;
                ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-               extended_flags = ntohs(ondisk2->flags2) << 16;
+               extended_flags = ntoh_s(ondisk2->flags2) << 16;
                /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
                if (extended_flags & ~CE_EXTENDED_FLAGS)
                        die("Unknown index entry format %08x", extended_flags);
-               ce->ce_flags |= extended_flags;
+               flags |= extended_flags;
                name = ondisk2->name;
        }
        else
                name = ondisk->name;
 
-       if (len == CE_NAMEMASK)
-               len = strlen(name);
-       /*
-        * NEEDSWORK: If the original index is crafted, this copy could
-        * go unchecked.
-        */
-       memcpy(ce->name, name, len + 1);
-}
-
-static inline size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
-{
-       size_t fix_size_mem = offsetof(struct cache_entry, name);
-       size_t fix_size_dsk = offsetof(struct ondisk_cache_entry, name);
-       long per_entry = (fix_size_mem - fix_size_dsk + 7) & ~7;
-
-       /*
-        * Alignment can cause differences. This should be "alignof", but
-        * since that's a gcc'ism, just use the size of a pointer.
-        */
-       per_entry += sizeof(void *);
-       return ondisk_size + entries*per_entry;
+       if (!previous_name) {
+               /* v3 and earlier */
+               if (len == CE_NAMEMASK)
+                       len = strlen(name);
+               ce = cache_entry_from_ondisk(ondisk, flags, name, len);
+
+               *ent_size = ondisk_ce_size(ce);
+       } else {
+               unsigned long consumed;
+               consumed = expand_name_field(previous_name, name);
+               ce = cache_entry_from_ondisk(ondisk, flags,
+                                            previous_name->buf,
+                                            previous_name->len);
+
+               *ent_size = (name - ((char *)ondisk)) + consumed;
+       }
+       return ce;
 }
 
 /* remember to discard_cache() before reading a different cache! */
@@ -1288,10 +1406,11 @@ int read_index_from(struct index_state *istate, const char *path)
 {
        int fd, i;
        struct stat st;
-       unsigned long src_offset, dst_offset;
+       unsigned long src_offset;
        struct cache_header *hdr;
        void *mmap;
        size_t mmap_size;
+       struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
        errno = EBUSY;
        if (istate->initialized)
@@ -1327,30 +1446,26 @@ int read_index_from(struct index_state *istate, const char *path)
        istate->cache_nr = ntohl(hdr->hdr_entries);
        istate->cache_alloc = alloc_nr(istate->cache_nr);
        istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
-
-       /*
-        * The disk format is actually larger than the in-memory format,
-        * due to space for nsec etc, so even though the in-memory one
-        * has room for a few  more flags, we can allocate using the same
-        * index size
-        */
-       istate->alloc = xmalloc(estimate_cache_size(mmap_size, istate->cache_nr));
        istate->initialized = 1;
 
+       if (hdr->hdr_version == htonl(4))
+               previous_name = &previous_name_buf;
+       else
+               previous_name = NULL;
+
        src_offset = sizeof(*hdr);
-       dst_offset = 0;
        for (i = 0; i < istate->cache_nr; i++) {
                struct ondisk_cache_entry *disk_ce;
                struct cache_entry *ce;
+               unsigned long consumed;
 
                disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
-               ce = (struct cache_entry *)((char *)istate->alloc + dst_offset);
-               convert_from_disk(disk_ce, ce);
+               ce = create_from_disk(disk_ce, &consumed, previous_name);
                set_index_entry(istate, i, ce);
 
-               src_offset += ondisk_ce_size(ce);
-               dst_offset += ce_size(ce);
+               src_offset += consumed;
        }
+       strbuf_release(&previous_name_buf);
        istate->timestamp.sec = st.st_mtime;
        istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
@@ -1383,11 +1498,15 @@ int read_index_from(struct index_state *istate, const char *path)
 
 int is_index_unborn(struct index_state *istate)
 {
-       return (!istate->cache_nr && !istate->alloc && !istate->timestamp.sec);
+       return (!istate->cache_nr && !istate->timestamp.sec);
 }
 
 int discard_index(struct index_state *istate)
 {
+       int i;
+
+       for (i = 0; i < istate->cache_nr; i++)
+               free(istate->cache[i]);
        resolve_undo_clear_index(istate);
        istate->cache_nr = 0;
        istate->cache_changed = 0;
@@ -1396,8 +1515,6 @@ int discard_index(struct index_state *istate)
        istate->name_hash_initialized = 0;
        free_hash(&istate->name_hash);
        cache_tree_free(&(istate->cache_tree));
-       free(istate->alloc);
-       istate->alloc = NULL;
        istate->initialized = 0;
 
        /* no need to throw away allocated active_cache */
@@ -1532,13 +1649,10 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
        }
 }
 
-static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce)
+/* Copy miscellaneous fields but not the name */
+static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
+                                      struct cache_entry *ce)
 {
-       int size = ondisk_ce_size(ce);
-       struct ondisk_cache_entry *ondisk = xcalloc(1, size);
-       char *name;
-       int result;
-
        ondisk->ctime.sec = htonl(ce->ce_ctime.sec);
        ondisk->mtime.sec = htonl(ce->ce_mtime.sec);
        ondisk->ctime.nsec = htonl(ce->ce_ctime.nsec);
@@ -1555,10 +1669,21 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce)
                struct ondisk_cache_entry_extended *ondisk2;
                ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
                ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
-               name = ondisk2->name;
+               return ondisk2->name;
        }
-       else
-               name = ondisk->name;
+       else {
+               return ondisk->name;
+       }
+}
+
+static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce)
+{
+       int size = ondisk_ce_size(ce);
+       struct ondisk_cache_entry *ondisk = xcalloc(1, size);
+       char *name;
+       int result;
+
+       name = copy_cache_entry_to_ondisk(ondisk, ce);
        memcpy(name, ce->name, ce_namelen(ce));
 
        result = ce_write(c, fd, ondisk, size);