Merge branch 'jc/streaming' into next
authorJunio C Hamano <gitster@pobox.com>
Thu, 30 Jun 2011 00:09:27 +0000 (17:09 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 30 Jun 2011 00:09:27 +0000 (17:09 -0700)
* jc/streaming:
sha1_file: use the correct type (ssize_t, not size_t) for read-style function
streaming: read loose objects incrementally
sha1_file.c: expose helpers to read loose objects
streaming: read non-delta incrementally from a pack
streaming_write_entry(): support files with holes
convert: CRLF_INPUT is a no-op in the output codepath
streaming_write_entry(): use streaming API in write_entry()
streaming: a new API to read from the object store
write_entry(): separate two helper functions out
unpack_object_header(): make it public
sha1_object_info_extended(): hint about objects in delta-base cache
sha1_object_info_extended(): expose a bit more info
packed_object_info_detail(): do not return a string

Makefile
builtin/verify-pack.c
cache.h
convert.c
entry.c
sha1_file.c
streaming.c [new file with mode: 0644]
streaming.h [new file with mode: 0644]
index e40ac0c7f5ec2f304b88d92f47ff94272f5ce2a4..f8c72e10a0d9e939be09d0fe4bf64758304bbcab 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -556,6 +556,7 @@ LIB_H += sha1-lookup.h
 LIB_H += sideband.h
 LIB_H += sigchain.h
 LIB_H += strbuf.h
+LIB_H += streaming.h
 LIB_H += string-list.h
 LIB_H += submodule.h
 LIB_H += tag.h
@@ -662,6 +663,7 @@ LIB_OBJS += shallow.o
 LIB_OBJS += sideband.o
 LIB_OBJS += sigchain.o
 LIB_OBJS += strbuf.o
+LIB_OBJS += streaming.o
 LIB_OBJS += string-list.o
 LIB_OBJS += submodule.o
 LIB_OBJS += symlinks.o
index b6079ae6cb03c7f3112c6eebc8c9a012d690a125..3a919b170726a95b19c16f984ade250d4fc24c07 100644 (file)
@@ -33,9 +33,9 @@ static void show_pack_info(struct packed_git *p, unsigned int flags)
                if (!sha1)
                        die("internal error pack-check nth-packed-object");
                offset = nth_packed_object_offset(p, i);
-               type = packed_object_info_detail(p, offset, &size, &store_size,
+               type = typename(packed_object_info_detail(p, offset, &size, &store_size,
                                                 &delta_chain_length,
-                                                base_sha1);
+                                                base_sha1));
                if (!stat_only)
                        printf("%s ", sha1_to_hex(sha1));
                if (!delta_chain_length) {
diff --git a/cache.h b/cache.h
index f4bb43ec6aa3e910e8a7d2ba2d9f06440e400fc2..79c9302e22bddb9816d01a6e9d79206b21d03d4b 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -780,6 +780,9 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type,
 extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
 extern int force_object_loose(const unsigned char *sha1, time_t mtime);
+extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
+extern int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
+extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
 
 /* global flag to enable extra checks when accessing packed objects */
 extern int do_check_packed_object_crc;
@@ -1021,7 +1024,37 @@ extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
 extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
 extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
-extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
+extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
+extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
+
+struct object_info {
+       /* Request */
+       unsigned long *sizep;
+
+       /* Response */
+       enum {
+               OI_CACHED,
+               OI_LOOSE,
+               OI_PACKED,
+               OI_DBCACHED
+       } whence;
+       union {
+               /*
+                * struct {
+                *      ... Nothing to expose in this case
+                * } cached;
+                * struct {
+                *      ... Nothing to expose in this case
+                * } loose;
+                */
+               struct {
+                       struct packed_git *pack;
+                       off_t offset;
+                       unsigned int is_delta;
+               } packed;
+       } u;
+};
+extern int sha1_object_info_extended(const unsigned char *, struct object_info *);
 
 /* Dumb servers support */
 extern int update_server_info(int);
@@ -1135,6 +1168,7 @@ extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int can_bypass_conversion(const char *path);
 
 /* add */
 /*
index efc7e07d475c66f7835dc6cbbd3bc358f01c41c3..264af1d5ba0924bd867962d2337bb5c019b87d81 100644 (file)
--- a/convert.c
+++ b/convert.c
@@ -813,3 +813,26 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
        }
        return ret | convert_to_git(path, src, len, dst, 0);
 }
+
+/*
+ * You would be crazy to set CRLF, smuge/clean or ident to
+ * a large binary blob you would want us not to slurp into
+ * the memory!
+ */
+int can_bypass_conversion(const char *path)
+{
+       struct conv_attrs ca;
+       enum crlf_action crlf_action;
+
+       convert_attrs(&ca, path);
+
+       if (ca.ident ||
+           (ca.drv && (ca.drv->smudge || ca.drv->clean)))
+               return 0;
+
+       crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+       if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
+           (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
+               return 1;
+       return 0;
+}
diff --git a/entry.c b/entry.c
index b017167f2015623fb9c721e91d0a940abd1d5196..e2dc16c13143b97b28e4bc82238bda3c899306f3 100644 (file)
--- a/entry.c
+++ b/entry.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "blob.h"
 #include "dir.h"
+#include "streaming.h"
 
 static void create_directories(const char *path, int path_len,
                               const struct checkout *state)
@@ -91,6 +92,90 @@ static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
        return NULL;
 }
 
+static int open_output_fd(char *path, struct cache_entry *ce, int to_tempfile)
+{
+       int symlink = (ce->ce_mode & S_IFMT) != S_IFREG;
+       if (to_tempfile) {
+               strcpy(path, symlink
+                      ? ".merge_link_XXXXXX" : ".merge_file_XXXXXX");
+               return mkstemp(path);
+       } else {
+               return create_file(path, !symlink ? ce->ce_mode : 0666);
+       }
+}
+
+static int fstat_output(int fd, const struct checkout *state, struct stat *st)
+{
+       /* use fstat() only when path == ce->name */
+       if (fstat_is_reliable() &&
+           state->refresh_cache && !state->base_dir_len) {
+               fstat(fd, st);
+               return 1;
+       }
+       return 0;
+}
+
+static int streaming_write_entry(struct cache_entry *ce, char *path,
+                                const struct checkout *state, int to_tempfile,
+                                int *fstat_done, struct stat *statbuf)
+{
+       struct git_istream *st;
+       enum object_type type;
+       unsigned long sz;
+       int result = -1;
+       ssize_t kept = 0;
+       int fd = -1;
+
+       st = open_istream(ce->sha1, &type, &sz);
+       if (!st)
+               return -1;
+       if (type != OBJ_BLOB)
+               goto close_and_exit;
+
+       fd = open_output_fd(path, ce, to_tempfile);
+       if (fd < 0)
+               goto close_and_exit;
+
+       for (;;) {
+               char buf[1024 * 16];
+               ssize_t wrote, holeto;
+               ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+               if (!readlen)
+                       break;
+               if (sizeof(buf) == readlen) {
+                       for (holeto = 0; holeto < readlen; holeto++)
+                               if (buf[holeto])
+                                       break;
+                       if (readlen == holeto) {
+                               kept += holeto;
+                               continue;
+                       }
+               }
+
+               if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
+                       goto close_and_exit;
+               else
+                       kept = 0;
+               wrote = write_in_full(fd, buf, readlen);
+
+               if (wrote != readlen)
+                       goto close_and_exit;
+       }
+       if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
+                    write(fd, "", 1) != 1))
+               goto close_and_exit;
+       *fstat_done = fstat_output(fd, state, statbuf);
+
+close_and_exit:
+       close_istream(st);
+       if (0 <= fd)
+               result = close(fd);
+       if (result && 0 <= fd)
+               unlink(path);
+       return result;
+}
+
 static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
 {
        unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
@@ -101,6 +186,12 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
        size_t wrote, newsize = 0;
        struct stat st;
 
+       if ((ce_mode_s_ifmt == S_IFREG) &&
+           can_bypass_conversion(path) &&
+           !streaming_write_entry(ce, path, state, to_tempfile,
+                                  &fstat_done, &st))
+               goto finish;
+
        switch (ce_mode_s_ifmt) {
        case S_IFREG:
        case S_IFLNK:
@@ -128,17 +219,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
                        size = newsize;
                }
 
-               if (to_tempfile) {
-                       if (ce_mode_s_ifmt == S_IFREG)
-                               strcpy(path, ".merge_file_XXXXXX");
-                       else
-                               strcpy(path, ".merge_link_XXXXXX");
-                       fd = mkstemp(path);
-               } else if (ce_mode_s_ifmt == S_IFREG) {
-                       fd = create_file(path, ce->ce_mode);
-               } else {
-                       fd = create_file(path, 0666);
-               }
+               fd = open_output_fd(path, ce, to_tempfile);
                if (fd < 0) {
                        free(new);
                        return error("unable to create file %s (%s)",
@@ -146,12 +227,8 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
                }
 
                wrote = write_in_full(fd, new, size);
-               /* use fstat() only when path == ce->name */
-               if (fstat_is_reliable() &&
-                   state->refresh_cache && !to_tempfile && !state->base_dir_len) {
-                       fstat(fd, &st);
-                       fstat_done = 1;
-               }
+               if (!to_tempfile)
+                       fstat_done = fstat_output(fd, state, &st);
                close(fd);
                free(new);
                if (wrote != size)
@@ -167,6 +244,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
                return error("unknown file mode for %s in index", path);
        }
 
+finish:
        if (state->refresh_cache) {
                if (!fstat_done)
                        lstat(ce->name, &st);
index 064a33040812ba8782bf602c693abf08613d6ec7..8a85217996a80af300f544fa9a3d8d905b50f266 100644 (file)
@@ -1186,7 +1186,7 @@ static int open_sha1_file(const unsigned char *sha1)
        return -1;
 }
 
-static void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 {
        void *map;
        int fd;
@@ -1245,7 +1245,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
        return used;
 }
 
-static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
+int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
 {
        unsigned long size, used;
        static const char valid_loose_object_type[8] = {
@@ -1342,7 +1342,7 @@ static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-static int parse_sha1_header(const char *hdr, unsigned long *sizep)
+int parse_sha1_header(const char *hdr, unsigned long *sizep)
 {
        char type[10];
        int i;
@@ -1481,7 +1481,7 @@ static off_t get_delta_base(struct packed_git *p,
 
 /* forward declaration for a mutually recursive function */
 static int packed_object_info(struct packed_git *p, off_t offset,
-                             unsigned long *sizep);
+                             unsigned long *sizep, int *rtype);
 
 static int packed_delta_info(struct packed_git *p,
                             struct pack_window **w_curs,
@@ -1495,7 +1495,7 @@ static int packed_delta_info(struct packed_git *p,
        base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
        if (!base_offset)
                return OBJ_BAD;
-       type = packed_object_info(p, base_offset, NULL);
+       type = packed_object_info(p, base_offset, NULL, NULL);
        if (type <= OBJ_NONE) {
                struct revindex_entry *revidx;
                const unsigned char *base_sha1;
@@ -1523,10 +1523,10 @@ static int packed_delta_info(struct packed_git *p,
        return type;
 }
 
-static int unpack_object_header(struct packed_git *p,
-                               struct pack_window **w_curs,
-                               off_t *curpos,
-                               unsigned long *sizep)
+int unpack_object_header(struct packed_git *p,
+                        struct pack_window **w_curs,
+                        off_t *curpos,
+                        unsigned long *sizep)
 {
        unsigned char *base;
        unsigned int left;
@@ -1549,7 +1549,7 @@ static int unpack_object_header(struct packed_git *p,
        return type;
 }
 
-const char *packed_object_info_detail(struct packed_git *p,
+int packed_object_info_detail(struct packed_git *p,
                                      off_t obj_offset,
                                      unsigned long *size,
                                      unsigned long *store_size,
@@ -1580,7 +1580,7 @@ const char *packed_object_info_detail(struct packed_git *p,
                case OBJ_BLOB:
                case OBJ_TAG:
                        unuse_pack(&w_curs);
-                       return typename(type);
+                       return type;
                case OBJ_OFS_DELTA:
                        obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
                        if (!obj_offset)
@@ -1605,7 +1605,7 @@ const char *packed_object_info_detail(struct packed_git *p,
 }
 
 static int packed_object_info(struct packed_git *p, off_t obj_offset,
-                             unsigned long *sizep)
+                             unsigned long *sizep, int *rtype)
 {
        struct pack_window *w_curs = NULL;
        unsigned long size;
@@ -1613,6 +1613,8 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
        enum object_type type;
 
        type = unpack_object_header(p, &w_curs, &curpos, &size);
+       if (rtype)
+               *rtype = type; /* representation type */
 
        switch (type) {
        case OBJ_OFS_DELTA:
@@ -1695,6 +1697,13 @@ static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset)
        return hash % MAX_DELTA_CACHE;
 }
 
+static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
+{
+       unsigned long hash = pack_entry_hash(p, base_offset);
+       struct delta_base_cache_entry *ent = delta_base_cache + hash;
+       return (ent->data && ent->p == p && ent->base_offset == base_offset);
+}
+
 static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
        unsigned long *base_size, enum object_type *type, int keep_cache)
 {
@@ -2093,24 +2102,28 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
        return status;
 }
 
-int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
+/* returns enum object_type or negative */
+int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 {
        struct cached_object *co;
        struct pack_entry e;
-       int status;
+       int status, rtype;
 
        co = find_cached_object(sha1);
        if (co) {
-               if (sizep)
-                       *sizep = co->size;
+               if (oi->sizep)
+                       *(oi->sizep) = co->size;
+               oi->whence = OI_CACHED;
                return co->type;
        }
 
        if (!find_pack_entry(sha1, &e)) {
                /* Most likely it's a loose object. */
-               status = sha1_loose_object_info(sha1, sizep);
-               if (status >= 0)
+               status = sha1_loose_object_info(sha1, oi->sizep);
+               if (status >= 0) {
+                       oi->whence = OI_LOOSE;
                        return status;
+               }
 
                /* Not a loose object; someone else may have just packed it. */
                reprepare_packed_git();
@@ -2118,15 +2131,31 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
                        return status;
        }
 
-       status = packed_object_info(e.p, e.offset, sizep);
+       status = packed_object_info(e.p, e.offset, oi->sizep, &rtype);
        if (status < 0) {
                mark_bad_packed_object(e.p, sha1);
-               status = sha1_object_info(sha1, sizep);
+               status = sha1_object_info_extended(sha1, oi);
+       } else if (in_delta_base_cache(e.p, e.offset)) {
+               oi->whence = OI_DBCACHED;
+       } else {
+               oi->whence = OI_PACKED;
+               oi->u.packed.offset = e.offset;
+               oi->u.packed.pack = e.p;
+               oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
+                                        rtype == OBJ_OFS_DELTA);
        }
 
        return status;
 }
 
+int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
+{
+       struct object_info oi;
+
+       oi.sizep = sizep;
+       return sha1_object_info_extended(sha1, &oi);
+}
+
 static void *read_packed_sha1(const unsigned char *sha1,
                              enum object_type *type, unsigned long *size)
 {
@@ -2704,7 +2733,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size,
        while (size) {
                char buf[10240];
                size_t sz = size < sizeof(buf) ? size : sizeof(buf);
-               size_t actual;
+               ssize_t actual;
 
                actual = read_in_full(fd, buf, sz);
                if (actual < 0)
diff --git a/streaming.c b/streaming.c
new file mode 100644 (file)
index 0000000..0602926
--- /dev/null
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#include "cache.h"
+#include "streaming.h"
+
+enum input_source {
+       stream_error = -1,
+       incore = 0,
+       loose = 1,
+       pack_non_delta = 2
+};
+
+typedef int (*open_istream_fn)(struct git_istream *,
+                              struct object_info *,
+                              const unsigned char *,
+                              enum object_type *);
+typedef int (*close_istream_fn)(struct git_istream *);
+typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);
+
+struct stream_vtbl {
+       close_istream_fn close;
+       read_istream_fn read;
+};
+
+#define open_method_decl(name) \
+       int open_istream_ ##name \
+       (struct git_istream *st, struct object_info *oi, \
+        const unsigned char *sha1, \
+        enum object_type *type)
+
+#define close_method_decl(name) \
+       int close_istream_ ##name \
+       (struct git_istream *st)
+
+#define read_method_decl(name) \
+       ssize_t read_istream_ ##name \
+       (struct git_istream *st, char *buf, size_t sz)
+
+/* forward declaration */
+static open_method_decl(incore);
+static open_method_decl(loose);
+static open_method_decl(pack_non_delta);
+
+static open_istream_fn open_istream_tbl[] = {
+       open_istream_incore,
+       open_istream_loose,
+       open_istream_pack_non_delta,
+};
+
+struct git_istream {
+       const struct stream_vtbl *vtbl;
+       unsigned long size; /* inflated size of full object */
+       z_stream z;
+       enum { z_unused, z_used, z_done, z_error } z_state;
+
+       union {
+               struct {
+                       char *buf; /* from read_object() */
+                       unsigned long read_ptr;
+               } incore;
+
+               struct {
+                       void *mapped;
+                       unsigned long mapsize;
+                       char hdr[32];
+                       int hdr_avail;
+                       int hdr_used;
+               } loose;
+
+               struct {
+                       struct packed_git *pack;
+                       off_t pos;
+               } in_pack;
+       } u;
+};
+
+int close_istream(struct git_istream *st)
+{
+       return st->vtbl->close(st);
+}
+
+ssize_t read_istream(struct git_istream *st, char *buf, size_t sz)
+{
+       return st->vtbl->read(st, buf, sz);
+}
+
+static enum input_source istream_source(const unsigned char *sha1,
+                                       enum object_type *type,
+                                       struct object_info *oi)
+{
+       unsigned long size;
+       int status;
+
+       oi->sizep = &size;
+       status = sha1_object_info_extended(sha1, oi);
+       if (status < 0)
+               return stream_error;
+       *type = status;
+
+       switch (oi->whence) {
+       case OI_LOOSE:
+               return loose;
+       case OI_PACKED:
+               if (!oi->u.packed.is_delta && big_file_threshold <= size)
+                       return pack_non_delta;
+               /* fallthru */
+       default:
+               return incore;
+       }
+}
+
+struct git_istream *open_istream(const unsigned char *sha1,
+                                enum object_type *type,
+                                unsigned long *size)
+{
+       struct git_istream *st;
+       struct object_info oi;
+       const unsigned char *real = lookup_replace_object(sha1);
+       enum input_source src = istream_source(real, type, &oi);
+
+       if (src < 0)
+               return NULL;
+
+       st = xmalloc(sizeof(*st));
+       if (open_istream_tbl[src](st, &oi, real, type)) {
+               if (open_istream_incore(st, &oi, real, type)) {
+                       free(st);
+                       return NULL;
+               }
+       }
+       *size = st->size;
+       return st;
+}
+
+
+/*****************************************************************
+ *
+ * Common helpers
+ *
+ *****************************************************************/
+
+static void close_deflated_stream(struct git_istream *st)
+{
+       if (st->z_state == z_used)
+               git_inflate_end(&st->z);
+}
+
+
+/*****************************************************************
+ *
+ * Loose object stream
+ *
+ *****************************************************************/
+
+static read_method_decl(loose)
+{
+       size_t total_read = 0;
+
+       switch (st->z_state) {
+       case z_done:
+               return 0;
+       case z_error:
+               return -1;
+       default:
+               break;
+       }
+
+       if (st->u.loose.hdr_used < st->u.loose.hdr_avail) {
+               size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used;
+               if (sz < to_copy)
+                       to_copy = sz;
+               memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy);
+               st->u.loose.hdr_used += to_copy;
+               total_read += to_copy;
+       }
+
+       while (total_read < sz) {
+               int status;
+
+               st->z.next_out = (unsigned char *)buf + total_read;
+               st->z.avail_out = sz - total_read;
+               status = git_inflate(&st->z, Z_FINISH);
+
+               total_read = st->z.next_out - (unsigned char *)buf;
+
+               if (status == Z_STREAM_END) {
+                       git_inflate_end(&st->z);
+                       st->z_state = z_done;
+                       break;
+               }
+               if (status != Z_OK && status != Z_BUF_ERROR) {
+                       git_inflate_end(&st->z);
+                       st->z_state = z_error;
+                       return -1;
+               }
+       }
+       return total_read;
+}
+
+static close_method_decl(loose)
+{
+       close_deflated_stream(st);
+       munmap(st->u.loose.mapped, st->u.loose.mapsize);
+       return 0;
+}
+
+static struct stream_vtbl loose_vtbl = {
+       close_istream_loose,
+       read_istream_loose,
+};
+
+static open_method_decl(loose)
+{
+       st->u.loose.mapped = map_sha1_file(sha1, &st->u.loose.mapsize);
+       if (!st->u.loose.mapped)
+               return -1;
+       if (unpack_sha1_header(&st->z,
+                              st->u.loose.mapped,
+                              st->u.loose.mapsize,
+                              st->u.loose.hdr,
+                              sizeof(st->u.loose.hdr)) < 0) {
+               git_inflate_end(&st->z);
+               munmap(st->u.loose.mapped, st->u.loose.mapsize);
+               return -1;
+       }
+
+       parse_sha1_header(st->u.loose.hdr, &st->size);
+       st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
+       st->u.loose.hdr_avail = st->z.total_out;
+       st->z_state = z_used;
+
+       st->vtbl = &loose_vtbl;
+       return 0;
+}
+
+
+/*****************************************************************
+ *
+ * Non-delta packed object stream
+ *
+ *****************************************************************/
+
+static read_method_decl(pack_non_delta)
+{
+       size_t total_read = 0;
+
+       switch (st->z_state) {
+       case z_unused:
+               memset(&st->z, 0, sizeof(st->z));
+               git_inflate_init(&st->z);
+               st->z_state = z_used;
+               break;
+       case z_done:
+               return 0;
+       case z_error:
+               return -1;
+       case z_used:
+               break;
+       }
+
+       while (total_read < sz) {
+               int status;
+               struct pack_window *window = NULL;
+               unsigned char *mapped;
+
+               mapped = use_pack(st->u.in_pack.pack, &window,
+                                 st->u.in_pack.pos, &st->z.avail_in);
+
+               st->z.next_out = (unsigned char *)buf + total_read;
+               st->z.avail_out = sz - total_read;
+               st->z.next_in = mapped;
+               status = git_inflate(&st->z, Z_FINISH);
+
+               st->u.in_pack.pos += st->z.next_in - mapped;
+               total_read = st->z.next_out - (unsigned char *)buf;
+               unuse_pack(&window);
+
+               if (status == Z_STREAM_END) {
+                       git_inflate_end(&st->z);
+                       st->z_state = z_done;
+                       break;
+               }
+               if (status != Z_OK && status != Z_BUF_ERROR) {
+                       git_inflate_end(&st->z);
+                       st->z_state = z_error;
+                       return -1;
+               }
+       }
+       return total_read;
+}
+
+static close_method_decl(pack_non_delta)
+{
+       close_deflated_stream(st);
+       return 0;
+}
+
+static struct stream_vtbl pack_non_delta_vtbl = {
+       close_istream_pack_non_delta,
+       read_istream_pack_non_delta,
+};
+
+static open_method_decl(pack_non_delta)
+{
+       struct pack_window *window;
+       enum object_type in_pack_type;
+
+       st->u.in_pack.pack = oi->u.packed.pack;
+       st->u.in_pack.pos = oi->u.packed.offset;
+       window = NULL;
+
+       in_pack_type = unpack_object_header(st->u.in_pack.pack,
+                                           &window,
+                                           &st->u.in_pack.pos,
+                                           &st->size);
+       unuse_pack(&window);
+       switch (in_pack_type) {
+       default:
+               return -1; /* we do not do deltas for now */
+       case OBJ_COMMIT:
+       case OBJ_TREE:
+       case OBJ_BLOB:
+       case OBJ_TAG:
+               break;
+       }
+       st->z_state = z_unused;
+       st->vtbl = &pack_non_delta_vtbl;
+       return 0;
+}
+
+
+/*****************************************************************
+ *
+ * In-core stream
+ *
+ *****************************************************************/
+
+static close_method_decl(incore)
+{
+       free(st->u.incore.buf);
+       return 0;
+}
+
+static read_method_decl(incore)
+{
+       size_t read_size = sz;
+       size_t remainder = st->size - st->u.incore.read_ptr;
+
+       if (remainder <= read_size)
+               read_size = remainder;
+       if (read_size) {
+               memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
+               st->u.incore.read_ptr += read_size;
+       }
+       return read_size;
+}
+
+static struct stream_vtbl incore_vtbl = {
+       close_istream_incore,
+       read_istream_incore,
+};
+
+static open_method_decl(incore)
+{
+       st->u.incore.buf = read_sha1_file_extended(sha1, type, &st->size, 0);
+       st->u.incore.read_ptr = 0;
+       st->vtbl = &incore_vtbl;
+
+       return st->u.incore.buf ? 0 : -1;
+}
diff --git a/streaming.h b/streaming.h
new file mode 100644 (file)
index 0000000..18cbe68
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef STREAMING_H
+#define STREAMING_H 1
+#include "cache.h"
+
+/* opaque */
+struct git_istream;
+
+extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *);
+extern int close_istream(struct git_istream *);
+extern ssize_t read_istream(struct git_istream *, char *, size_t);
+
+#endif /* STREAMING_H */