Merge branch 'jc/stream-to-pack'
authorJunio C Hamano <gitster@pobox.com>
Sat, 17 Dec 2011 06:33:40 +0000 (22:33 -0800)
committerJunio C Hamano <gitster@pobox.com>
Sat, 17 Dec 2011 06:33:40 +0000 (22:33 -0800)
* jc/stream-to-pack:
bulk-checkin: replace fast-import based implementation
csum-file: introduce sha1file_checkpoint
finish_tmp_packfile(): a helper function
create_tmp_packfile(): a helper function
write_pack_header(): a helper function

Conflicts:
pack.h

16 files changed:
Makefile
builtin/add.c
builtin/pack-objects.c
bulk-checkin.c [new file with mode: 0644]
bulk-checkin.h [new file with mode: 0644]
cache.h
config.c
csum-file.c
csum-file.h
environment.c
fast-import.c
pack-write.c
pack.h
sha1_file.c
t/t1050-large.sh
zlib.c
index ed8232075e017b43519298083a6c678ea9c806d9..0464e1fc68a93e1876e54c625d73d471f6c62e71 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -511,6 +511,7 @@ LIB_H += argv-array.h
 LIB_H += attr.h
 LIB_H += blob.h
 LIB_H += builtin.h
+LIB_H += bulk-checkin.h
 LIB_H += cache.h
 LIB_H += cache-tree.h
 LIB_H += color.h
@@ -600,6 +601,7 @@ LIB_OBJS += base85.o
 LIB_OBJS += bisect.o
 LIB_OBJS += blob.o
 LIB_OBJS += branch.o
+LIB_OBJS += bulk-checkin.o
 LIB_OBJS += bundle.o
 LIB_OBJS += cache-tree.o
 LIB_OBJS += color.o
index c59b0c98fefefc413c8330715fffcc83142d5b2d..1c42900ff8c55a94ccfd1d214567d0f64d615412 100644 (file)
@@ -13,6 +13,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "revision.h"
+#include "bulk-checkin.h"
 
 static const char * const builtin_add_usage[] = {
        "git add [options] [--] <filepattern>...",
@@ -458,11 +459,15 @@ int cmd_add(int argc, const char **argv, const char *prefix)
                free(seen);
        }
 
+       plug_bulk_checkin();
+
        exit_status |= add_files_to_cache(prefix, pathspec, flags);
 
        if (add_new_files)
                exit_status |= add_files(&dir, flags);
 
+       unplug_bulk_checkin();
+
  finish:
        if (active_cache_changed) {
                if (write_cache(newfd, active_cache, active_nr) ||
index b1895aaaa1520ef910504c3beee685f95e72ec6b..96c1680976fc653a1cd7a9dd81a567885a5b0050 100644 (file)
@@ -76,7 +76,7 @@ static struct pack_idx_option pack_idx_opts;
 static const char *base_name;
 static int progress = 1;
 static int window = 10;
-static unsigned long pack_size_limit, pack_size_limit_cfg;
+static unsigned long pack_size_limit;
 static int depth = 50;
 static int delta_search_threads;
 static int pack_to_stdout;
@@ -638,7 +638,6 @@ static void write_pack_file(void)
        uint32_t i = 0, j;
        struct sha1file *f;
        off_t offset;
-       struct pack_header hdr;
        uint32_t nr_remaining = nr_result;
        time_t last_mtime = 0;
        struct object_entry **write_order;
@@ -652,22 +651,14 @@ static void write_pack_file(void)
                unsigned char sha1[20];
                char *pack_tmp_name = NULL;
 
-               if (pack_to_stdout) {
+               if (pack_to_stdout)
                        f = sha1fd_throughput(1, "<stdout>", progress_state);
-               } else {
-                       char tmpname[PATH_MAX];
-                       int fd;
-                       fd = odb_mkstemp(tmpname, sizeof(tmpname),
-                                        "pack/tmp_pack_XXXXXX");
-                       pack_tmp_name = xstrdup(tmpname);
-                       f = sha1fd(fd, pack_tmp_name);
-               }
-
-               hdr.hdr_signature = htonl(PACK_SIGNATURE);
-               hdr.hdr_version = htonl(PACK_VERSION);
-               hdr.hdr_entries = htonl(nr_remaining);
-               sha1write(f, &hdr, sizeof(hdr));
-               offset = sizeof(hdr);
+               else
+                       f = create_tmp_packfile(&pack_tmp_name);
+
+               offset = write_pack_header(f, nr_remaining);
+               if (!offset)
+                       die_errno("unable to write pack header");
                nr_written = 0;
                for (; i < nr_objects; i++) {
                        struct object_entry *e = write_order[i];
@@ -693,20 +684,8 @@ static void write_pack_file(void)
 
                if (!pack_to_stdout) {
                        struct stat st;
-                       const char *idx_tmp_name;
                        char tmpname[PATH_MAX];
 
-                       idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
-                                                     &pack_idx_opts, sha1);
-
-                       snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
-                                base_name, sha1_to_hex(sha1));
-                       free_pack_by_name(tmpname);
-                       if (adjust_shared_perm(pack_tmp_name))
-                               die_errno("unable to make temporary pack file readable");
-                       if (rename(pack_tmp_name, tmpname))
-                               die_errno("unable to rename temporary pack file");
-
                        /*
                         * Packs are runtime accessed in their mtime
                         * order since newer packs are more likely to contain
@@ -714,28 +693,27 @@ static void write_pack_file(void)
                         * packs then we should modify the mtime of later ones
                         * to preserve this property.
                         */
-                       if (stat(tmpname, &st) < 0) {
+                       if (stat(pack_tmp_name, &st) < 0) {
                                warning("failed to stat %s: %s",
-                                       tmpname, strerror(errno));
+                                       pack_tmp_name, strerror(errno));
                        } else if (!last_mtime) {
                                last_mtime = st.st_mtime;
                        } else {
                                struct utimbuf utb;
                                utb.actime = st.st_atime;
                                utb.modtime = --last_mtime;
-                               if (utime(tmpname, &utb) < 0)
+                               if (utime(pack_tmp_name, &utb) < 0)
                                        warning("failed utime() on %s: %s",
                                                tmpname, strerror(errno));
                        }
 
-                       snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
-                                base_name, sha1_to_hex(sha1));
-                       if (adjust_shared_perm(idx_tmp_name))
-                               die_errno("unable to make temporary index file readable");
-                       if (rename(idx_tmp_name, tmpname))
-                               die_errno("unable to rename temporary index file");
-
-                       free((void *) idx_tmp_name);
+                       /* Enough space for "-<sha-1>.pack"? */
+                       if (sizeof(tmpname) <= strlen(base_name) + 50)
+                               die("pack base name '%s' too long", base_name);
+                       snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
+                       finish_tmp_packfile(tmpname, pack_tmp_name,
+                                           written_list, nr_written,
+                                           &pack_idx_opts, sha1);
                        free(pack_tmp_name);
                        puts(sha1_to_hex(sha1));
                }
@@ -2098,10 +2076,6 @@ static int git_pack_config(const char *k, const char *v, void *cb)
                            pack_idx_opts.version);
                return 0;
        }
-       if (!strcmp(k, "pack.packsizelimit")) {
-               pack_size_limit_cfg = git_config_ulong(k, v);
-               return 0;
-       }
        return git_default_config(k, v, cb);
 }
 
diff --git a/bulk-checkin.c b/bulk-checkin.c
new file mode 100644 (file)
index 0000000..6b0b6d4
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#include "bulk-checkin.h"
+#include "csum-file.h"
+#include "pack.h"
+
+static int pack_compression_level = Z_DEFAULT_COMPRESSION;
+
+static struct bulk_checkin_state {
+       unsigned plugged:1;
+
+       char *pack_tmp_name;
+       struct sha1file *f;
+       off_t offset;
+       struct pack_idx_option pack_idx_opts;
+
+       struct pack_idx_entry **written;
+       uint32_t alloc_written;
+       uint32_t nr_written;
+} state;
+
+static void finish_bulk_checkin(struct bulk_checkin_state *state)
+{
+       unsigned char sha1[20];
+       char packname[PATH_MAX];
+       int i;
+
+       if (!state->f)
+               return;
+
+       if (state->nr_written == 0) {
+               close(state->f->fd);
+               unlink(state->pack_tmp_name);
+               goto clear_exit;
+       } else if (state->nr_written == 1) {
+               sha1close(state->f, sha1, CSUM_FSYNC);
+       } else {
+               int fd = sha1close(state->f, sha1, 0);
+               fixup_pack_header_footer(fd, sha1, state->pack_tmp_name,
+                                        state->nr_written, sha1,
+                                        state->offset);
+               close(fd);
+       }
+
+       sprintf(packname, "%s/pack/pack-", get_object_directory());
+       finish_tmp_packfile(packname, state->pack_tmp_name,
+                           state->written, state->nr_written,
+                           &state->pack_idx_opts, sha1);
+       for (i = 0; i < state->nr_written; i++)
+               free(state->written[i]);
+
+clear_exit:
+       free(state->written);
+       memset(state, 0, sizeof(*state));
+
+       /* Make objects we just wrote available to ourselves */
+       reprepare_packed_git();
+}
+
+static int already_written(struct bulk_checkin_state *state, unsigned char sha1[])
+{
+       int i;
+
+       /* The object may already exist in the repository */
+       if (has_sha1_file(sha1))
+               return 1;
+
+       /* Might want to keep the list sorted */
+       for (i = 0; i < state->nr_written; i++)
+               if (!hashcmp(state->written[i]->sha1, sha1))
+                       return 1;
+
+       /* This is a new object we need to keep */
+       return 0;
+}
+
+/*
+ * Read the contents from fd for size bytes, streaming it to the
+ * packfile in state while updating the hash in ctx. Signal a failure
+ * by returning a negative value when the resulting pack would exceed
+ * the pack size limit and this is not the first object in the pack,
+ * so that the caller can discard what we wrote from the current pack
+ * by truncating it and opening a new one. The caller will then call
+ * us again after rewinding the input fd.
+ *
+ * The already_hashed_to pointer is kept untouched by the caller to
+ * make sure we do not hash the same byte when we are called
+ * again. This way, the caller does not have to checkpoint its hash
+ * status before calling us just in case we ask it to call us again
+ * with a new pack.
+ */
+static int stream_to_pack(struct bulk_checkin_state *state,
+                         git_SHA_CTX *ctx, off_t *already_hashed_to,
+                         int fd, size_t size, enum object_type type,
+                         const char *path, unsigned flags)
+{
+       git_zstream s;
+       unsigned char obuf[16384];
+       unsigned hdrlen;
+       int status = Z_OK;
+       int write_object = (flags & HASH_WRITE_OBJECT);
+       off_t offset = 0;
+
+       memset(&s, 0, sizeof(s));
+       git_deflate_init(&s, pack_compression_level);
+
+       hdrlen = encode_in_pack_object_header(type, size, obuf);
+       s.next_out = obuf + hdrlen;
+       s.avail_out = sizeof(obuf) - hdrlen;
+
+       while (status != Z_STREAM_END) {
+               unsigned char ibuf[16384];
+
+               if (size && !s.avail_in) {
+                       ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
+                       if (xread(fd, ibuf, rsize) != rsize)
+                               die("failed to read %d bytes from '%s'",
+                                   (int)rsize, path);
+                       offset += rsize;
+                       if (*already_hashed_to < offset) {
+                               size_t hsize = offset - *already_hashed_to;
+                               if (rsize < hsize)
+                                       hsize = rsize;
+                               if (hsize)
+                                       git_SHA1_Update(ctx, ibuf, hsize);
+                               *already_hashed_to = offset;
+                       }
+                       s.next_in = ibuf;
+                       s.avail_in = rsize;
+                       size -= rsize;
+               }
+
+               status = git_deflate(&s, size ? 0 : Z_FINISH);
+
+               if (!s.avail_out || status == Z_STREAM_END) {
+                       if (write_object) {
+                               size_t written = s.next_out - obuf;
+
+                               /* would we bust the size limit? */
+                               if (state->nr_written &&
+                                   pack_size_limit_cfg &&
+                                   pack_size_limit_cfg < state->offset + written) {
+                                       git_deflate_abort(&s);
+                                       return -1;
+                               }
+
+                               sha1write(state->f, obuf, written);
+                               state->offset += written;
+                       }
+                       s.next_out = obuf;
+                       s.avail_out = sizeof(obuf);
+               }
+
+               switch (status) {
+               case Z_OK:
+               case Z_BUF_ERROR:
+               case Z_STREAM_END:
+                       continue;
+               default:
+                       die("unexpected deflate failure: %d", status);
+               }
+       }
+       git_deflate_end(&s);
+       return 0;
+}
+
+/* Lazily create backing packfile for the state */
+static void prepare_to_stream(struct bulk_checkin_state *state,
+                             unsigned flags)
+{
+       if (!(flags & HASH_WRITE_OBJECT) || state->f)
+               return;
+
+       state->f = create_tmp_packfile(&state->pack_tmp_name);
+       reset_pack_idx_option(&state->pack_idx_opts);
+
+       /* Pretend we are going to write only one object */
+       state->offset = write_pack_header(state->f, 1);
+       if (!state->offset)
+               die_errno("unable to write pack header");
+}
+
+static int deflate_to_pack(struct bulk_checkin_state *state,
+                          unsigned char result_sha1[],
+                          int fd, size_t size,
+                          enum object_type type, const char *path,
+                          unsigned flags)
+{
+       off_t seekback, already_hashed_to;
+       git_SHA_CTX ctx;
+       unsigned char obuf[16384];
+       unsigned header_len;
+       struct sha1file_checkpoint checkpoint;
+       struct pack_idx_entry *idx = NULL;
+
+       seekback = lseek(fd, 0, SEEK_CUR);
+       if (seekback == (off_t) -1)
+               return error("cannot find the current offset");
+
+       header_len = sprintf((char *)obuf, "%s %" PRIuMAX,
+                            typename(type), (uintmax_t)size) + 1;
+       git_SHA1_Init(&ctx);
+       git_SHA1_Update(&ctx, obuf, header_len);
+
+       /* Note: idx is non-NULL when we are writing */
+       if ((flags & HASH_WRITE_OBJECT) != 0)
+               idx = xcalloc(1, sizeof(*idx));
+
+       already_hashed_to = 0;
+
+       while (1) {
+               prepare_to_stream(state, flags);
+               if (idx) {
+                       sha1file_checkpoint(state->f, &checkpoint);
+                       idx->offset = state->offset;
+                       crc32_begin(state->f);
+               }
+               if (!stream_to_pack(state, &ctx, &already_hashed_to,
+                                   fd, size, type, path, flags))
+                       break;
+               /*
+                * Writing this object to the current pack will make
+                * it too big; we need to truncate it, start a new
+                * pack, and write into it.
+                */
+               if (!idx)
+                       die("BUG: should not happen");
+               sha1file_truncate(state->f, &checkpoint);
+               state->offset = checkpoint.offset;
+               finish_bulk_checkin(state);
+               if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
+                       return error("cannot seek back");
+       }
+       git_SHA1_Final(result_sha1, &ctx);
+       if (!idx)
+               return 0;
+
+       idx->crc32 = crc32_end(state->f);
+       if (already_written(state, result_sha1)) {
+               sha1file_truncate(state->f, &checkpoint);
+               state->offset = checkpoint.offset;
+               free(idx);
+       } else {
+               hashcpy(idx->sha1, result_sha1);
+               ALLOC_GROW(state->written,
+                          state->nr_written + 1,
+                          state->alloc_written);
+               state->written[state->nr_written++] = idx;
+       }
+       return 0;
+}
+
+int index_bulk_checkin(unsigned char *sha1,
+                      int fd, size_t size, enum object_type type,
+                      const char *path, unsigned flags)
+{
+       int status = deflate_to_pack(&state, sha1, fd, size, type,
+                                    path, flags);
+       if (!state.plugged)
+               finish_bulk_checkin(&state);
+       return status;
+}
+
+void plug_bulk_checkin(void)
+{
+       state.plugged = 1;
+}
+
+void unplug_bulk_checkin(void)
+{
+       state.plugged = 0;
+       if (state.f)
+               finish_bulk_checkin(&state);
+}
diff --git a/bulk-checkin.h b/bulk-checkin.h
new file mode 100644 (file)
index 0000000..4f599f8
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef BULK_CHECKIN_H
+#define BULK_CHECKIN_H
+
+#include "cache.h"
+
+extern int index_bulk_checkin(unsigned char sha1[],
+                             int fd, size_t size, enum object_type type,
+                             const char *path, unsigned flags);
+
+extern void plug_bulk_checkin(void);
+extern void unplug_bulk_checkin(void);
+
+#endif
diff --git a/cache.h b/cache.h
index 8c98d056674c9a426215bf93398fe922a1a90679..627fb6a084b3d3e60b10121928c59edb384b96c4 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -35,6 +35,7 @@ int git_inflate(git_zstream *, int flush);
 void git_deflate_init(git_zstream *, int level);
 void git_deflate_init_gzip(git_zstream *, int level);
 void git_deflate_end(git_zstream *);
+int git_deflate_abort(git_zstream *);
 int git_deflate_end_gently(git_zstream *);
 int git_deflate(git_zstream *, int flush);
 unsigned long git_deflate_bound(git_zstream *, unsigned long);
@@ -597,6 +598,7 @@ extern size_t packed_git_window_size;
 extern size_t packed_git_limit;
 extern size_t delta_base_cache_limit;
 extern unsigned long big_file_threshold;
+extern unsigned long pack_size_limit_cfg;
 extern int read_replace_refs;
 extern int fsync_object_files;
 extern int core_preload_index;
index 5ea101fb251d27eadac20c665a7f01fb210c20d1..40f9c6d10317ed47f7786e5c328df3ab6f167e7c 100644 (file)
--- a/config.c
+++ b/config.c
@@ -818,6 +818,10 @@ int git_default_config(const char *var, const char *value, void *dummy)
                return 0;
        }
 
+       if (!strcmp(var, "pack.packsizelimit")) {
+               pack_size_limit_cfg = git_config_ulong(var, value);
+               return 0;
+       }
        /* Add other config variables here and to Documentation/config.txt. */
        return 0;
 }
index fc97d6e04528b5c5b55fc211a462f3cb828f3d49..53f5375b6ca3368de6647cf5edcd7fb4dec79657 100644 (file)
@@ -158,6 +158,26 @@ struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp
        return f;
 }
 
+void sha1file_checkpoint(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
+{
+       sha1flush(f);
+       checkpoint->offset = f->total;
+       checkpoint->ctx = f->ctx;
+}
+
+int sha1file_truncate(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
+{
+       off_t offset = checkpoint->offset;
+
+       if (ftruncate(f->fd, offset) ||
+           lseek(f->fd, offset, SEEK_SET) != offset)
+               return -1;
+       f->total = offset;
+       f->ctx = checkpoint->ctx;
+       f->offset = 0; /* sha1flush() was called in checkpoint */
+       return 0;
+}
+
 void crc32_begin(struct sha1file *f)
 {
        f->crc32 = crc32(0, NULL, 0);
index 6a7967c6bf604076c7d68ce139f65f34df3bc30e..3b540bdc21d2edf8d3d6fc2b5e52cc840aa19395 100644 (file)
@@ -17,6 +17,15 @@ struct sha1file {
        unsigned char buffer[8192];
 };
 
+/* Checkpoint */
+struct sha1file_checkpoint {
+       off_t offset;
+       git_SHA_CTX ctx;
+};
+
+extern void sha1file_checkpoint(struct sha1file *, struct sha1file_checkpoint *);
+extern int sha1file_truncate(struct sha1file *, struct sha1file_checkpoint *);
+
 /* sha1close flags */
 #define CSUM_CLOSE     1
 #define CSUM_FSYNC     2
index 2c41d7d6cb66832197d69d49065a3d5b3bb2e5da..c93b8f44df0171a0f923546813d00e8b8e837af1 100644 (file)
@@ -62,6 +62,7 @@ int grafts_replace_parents = 1;
 int core_apply_sparse_checkout;
 int merge_log_config = -1;
 struct startup_info *startup_info;
+unsigned long pack_size_limit_cfg;
 
 /* Parallel index stat data preload? */
 int core_preload_index = 0;
index f4bfe0f6650fdb2666f031ca1f551134f872e098..350b2e9e107b8dadf129f00817b6ad3e85d9d973 100644 (file)
@@ -1143,17 +1143,11 @@ static int store_object(
        return 0;
 }
 
-static void truncate_pack(off_t to, git_SHA_CTX *ctx)
+static void truncate_pack(struct sha1file_checkpoint *checkpoint)
 {
-       if (ftruncate(pack_data->pack_fd, to)
-        || lseek(pack_data->pack_fd, to, SEEK_SET) != to)
+       if (sha1file_truncate(pack_file, checkpoint))
                die_errno("cannot truncate pack to skip duplicate");
-       pack_size = to;
-
-       /* yes this is a layering violation */
-       pack_file->total = to;
-       pack_file->offset = 0;
-       pack_file->ctx = *ctx;
+       pack_size = checkpoint->offset;
 }
 
 static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1166,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
        unsigned long hdrlen;
        off_t offset;
        git_SHA_CTX c;
-       git_SHA_CTX pack_file_ctx;
        git_zstream s;
+       struct sha1file_checkpoint checkpoint;
        int status = Z_OK;
 
        /* Determine if we should auto-checkpoint. */
@@ -1175,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
                || (pack_size + 60 + len) < pack_size)
                cycle_packfile();
 
-       offset = pack_size;
-
-       /* preserve the pack_file SHA1 ctx in case we have to truncate later */
-       sha1flush(pack_file);
-       pack_file_ctx = pack_file->ctx;
+       sha1file_checkpoint(pack_file, &checkpoint);
+       offset = checkpoint.offset;
 
        hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
        if (out_sz <= hdrlen)
@@ -1245,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 
        if (e->idx.offset) {
                duplicate_count_by_type[OBJ_BLOB]++;
-               truncate_pack(offset, &pack_file_ctx);
+               truncate_pack(&checkpoint);
 
        } else if (find_sha1_pack(sha1, packed_git)) {
                e->type = OBJ_BLOB;
                e->pack_id = MAX_PACK_ID;
                e->idx.offset = 1; /* just not zero! */
                duplicate_count_by_type[OBJ_BLOB]++;
-               truncate_pack(offset, &pack_file_ctx);
+               truncate_pack(&checkpoint);
 
        } else {
                e->depth = 0;
index f84adde3eb3bb6f6d3f4a871167d6a07ef73ebd8..de2bd01414f07cb2d3fb9d212895ad0f1ea1ba26 100644 (file)
@@ -182,6 +182,18 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
        return index_name;
 }
 
+off_t write_pack_header(struct sha1file *f, uint32_t nr_entries)
+{
+       struct pack_header hdr;
+
+       hdr.hdr_signature = htonl(PACK_SIGNATURE);
+       hdr.hdr_version = htonl(PACK_VERSION);
+       hdr.hdr_entries = htonl(nr_entries);
+       if (sha1write(f, &hdr, sizeof(hdr)))
+               return 0;
+       return sizeof(hdr);
+}
+
 /*
  * Update pack header with object_count and compute new SHA1 for pack data
  * associated to pack_fd, and write that SHA1 at the end.  That new SHA1
@@ -320,3 +332,44 @@ int encode_in_pack_object_header(enum object_type type, uintmax_t size, unsigned
        *hdr = c;
        return n;
 }
+
+struct sha1file *create_tmp_packfile(char **pack_tmp_name)
+{
+       char tmpname[PATH_MAX];
+       int fd;
+
+       fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+       *pack_tmp_name = xstrdup(tmpname);
+       return sha1fd(fd, *pack_tmp_name);
+}
+
+void finish_tmp_packfile(char *name_buffer,
+                        const char *pack_tmp_name,
+                        struct pack_idx_entry **written_list,
+                        uint32_t nr_written,
+                        struct pack_idx_option *pack_idx_opts,
+                        unsigned char sha1[])
+{
+       const char *idx_tmp_name;
+       char *end_of_name_prefix = strrchr(name_buffer, 0);
+
+       if (adjust_shared_perm(pack_tmp_name))
+               die_errno("unable to make temporary pack file readable");
+
+       idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
+                                     pack_idx_opts, sha1);
+       if (adjust_shared_perm(idx_tmp_name))
+               die_errno("unable to make temporary index file readable");
+
+       sprintf(end_of_name_prefix, "%s.pack", sha1_to_hex(sha1));
+       free_pack_by_name(name_buffer);
+
+       if (rename(pack_tmp_name, name_buffer))
+               die_errno("unable to rename temporary pack file");
+
+       sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1));
+       if (rename(idx_tmp_name, name_buffer))
+               die_errno("unable to rename temporary index file");
+
+       free((void *)idx_tmp_name);
+}
diff --git a/pack.h b/pack.h
index a8d9b9f2fcc41bf56007cd48dcfcb94a7e00d3ca..aa6ee7d606f1d5336bff9a3067b44057c362e788 100644 (file)
--- a/pack.h
+++ b/pack.h
@@ -2,6 +2,7 @@
 #define PACK_H
 
 #include "object.h"
+#include "csum-file.h"
 
 /*
  * Packed object header
@@ -79,6 +80,7 @@ extern const char *write_idx_file(const char *index_name, struct pack_idx_entry
 extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
 extern int verify_pack_index(struct packed_git *);
 extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t);
+extern off_t write_pack_header(struct sha1file *f, uint32_t);
 extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
 extern char *index_pack_lockfile(int fd);
 extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
@@ -87,4 +89,8 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
 #define PH_ERROR_PACK_SIGNATURE        (-2)
 #define PH_ERROR_PROTOCOL      (-3)
 extern int read_pack_header(int fd, struct pack_header *);
+
+extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
+extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
+
 #endif
index 956422ba4a5df5f46caaf85f10e4c85321439272..f291f3f0f7d919c7d59b8b7dc3c3f2450a5a9f09 100644 (file)
@@ -18,6 +18,7 @@
 #include "refs.h"
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
+#include "bulk-checkin.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -2680,10 +2681,8 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
 }
 
 /*
- * This creates one packfile per large blob, because the caller
- * immediately wants the result sha1, and fast-import can report the
- * object name via marks mechanism only by closing the created
- * packfile.
+ * This creates one packfile per large blob unless bulk-checkin
+ * machinery is "plugged".
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
@@ -2697,65 +2696,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size,
                        enum object_type type, const char *path,
                        unsigned flags)
 {
-       struct child_process fast_import;
-       char export_marks[512];
-       const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
-       char tmpfile[512];
-       char fast_import_cmd[512];
-       char buf[512];
-       int len, tmpfd;
-
-       strcpy(tmpfile, git_path("hashstream_XXXXXX"));
-       tmpfd = git_mkstemp_mode(tmpfile, 0600);
-       if (tmpfd < 0)
-               die_errno("cannot create tempfile: %s", tmpfile);
-       if (close(tmpfd))
-               die_errno("cannot close tempfile: %s", tmpfile);
-       sprintf(export_marks, "--export-marks=%s", tmpfile);
-
-       memset(&fast_import, 0, sizeof(fast_import));
-       fast_import.in = -1;
-       fast_import.argv = argv;
-       fast_import.git_cmd = 1;
-       if (start_command(&fast_import))
-               die_errno("index-stream: git fast-import failed");
-
-       len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
-                     (unsigned long) size);
-       write_or_whine(fast_import.in, fast_import_cmd, len,
-                      "index-stream: feeding fast-import");
-       while (size) {
-               char buf[10240];
-               size_t sz = size < sizeof(buf) ? size : sizeof(buf);
-               ssize_t actual;
-
-               actual = read_in_full(fd, buf, sz);
-               if (actual < 0)
-                       die_errno("index-stream: reading input");
-               if (write_in_full(fast_import.in, buf, actual) != actual)
-                       die_errno("index-stream: feeding fast-import");
-               size -= actual;
-       }
-       if (close(fast_import.in))
-               die_errno("index-stream: closing fast-import");
-       if (finish_command(&fast_import))
-               die_errno("index-stream: finishing fast-import");
-
-       tmpfd = open(tmpfile, O_RDONLY);
-       if (tmpfd < 0)
-               die_errno("index-stream: cannot open fast-import mark");
-       len = read(tmpfd, buf, sizeof(buf));
-       if (len < 0)
-               die_errno("index-stream: reading fast-import mark");
-       if (close(tmpfd) < 0)
-               die_errno("index-stream: closing fast-import mark");
-       if (unlink(tmpfile))
-               die_errno("index-stream: unlinking fast-import mark");
-       if (len != 44 ||
-           memcmp(":1 ", buf, 3) ||
-           get_sha1_hex(buf + 3, sha1))
-               die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
-       return 0;
+       return index_bulk_checkin(sha1, fd, size, type, path, flags);
 }
 
 int index_fd(unsigned char *sha1, int fd, struct stat *st,
index deba111bd7c2d26f3bd38dbfa086cec209f81874..29d6024b7f1b55c09cbd7e9ed682a3e745c550d6 100755 (executable)
@@ -7,21 +7,97 @@ test_description='adding and checking out large blobs'
 
 test_expect_success setup '
        git config core.bigfilethreshold 200k &&
-       echo X | dd of=large bs=1k seek=2000
+       echo X | dd of=large1 bs=1k seek=2000 &&
+       echo X | dd of=large2 bs=1k seek=2000 &&
+       echo X | dd of=large3 bs=1k seek=2000 &&
+       echo Y | dd of=huge bs=1k seek=2500
 '
 
-test_expect_success 'add a large file' '
-       git add large &&
-       # make sure we got a packfile and no loose objects
-       test -f .git/objects/pack/pack-*.pack &&
-       test ! -f .git/objects/??/??????????????????????????????????????
+test_expect_success 'add a large file or two' '
+       git add large1 huge large2 &&
+       # make sure we got a single packfile and no loose objects
+       bad= count=0 idx= &&
+       for p in .git/objects/pack/pack-*.pack
+       do
+               count=$(( $count + 1 ))
+               if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
+               then
+                       continue
+               fi
+               bad=t
+       done &&
+       test -z "$bad" &&
+       test $count = 1 &&
+       cnt=$(git show-index <"$idx" | wc -l) &&
+       test $cnt = 2 &&
+       for l in .git/objects/??/??????????????????????????????????????
+       do
+               test -f "$l" || continue
+               bad=t
+       done &&
+       test -z "$bad" &&
+
+       # attempt to add another copy of the same
+       git add large3 &&
+       bad= count=0 &&
+       for p in .git/objects/pack/pack-*.pack
+       do
+               count=$(( $count + 1 ))
+               if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
+               then
+                       continue
+               fi
+               bad=t
+       done &&
+       test -z "$bad" &&
+       test $count = 1
 '
 
 test_expect_success 'checkout a large file' '
-       large=$(git rev-parse :large) &&
-       git update-index --add --cacheinfo 100644 $large another &&
+       large1=$(git rev-parse :large1) &&
+       git update-index --add --cacheinfo 100644 $large1 another &&
        git checkout another &&
-       cmp large another ;# this must not be test_cmp
+       cmp large1 another ;# this must not be test_cmp
+'
+
+test_expect_success 'packsize limit' '
+       test_create_repo mid &&
+       (
+               cd mid &&
+               git config core.bigfilethreshold 64k &&
+               git config pack.packsizelimit 256k &&
+
+               # mid1 and mid2 will fit within 256k limit but
+               # appending mid3 will bust the limit and will
+               # result in a separate packfile.
+               test-genrandom "a" $(( 66 * 1024 )) >mid1 &&
+               test-genrandom "b" $(( 80 * 1024 )) >mid2 &&
+               test-genrandom "c" $(( 128 * 1024 )) >mid3 &&
+               git add mid1 mid2 mid3 &&
+
+               count=0
+               for pi in .git/objects/pack/pack-*.idx
+               do
+                       test -f "$pi" && count=$(( $count + 1 ))
+               done &&
+               test $count = 2 &&
+
+               (
+                       git hash-object --stdin <mid1
+                       git hash-object --stdin <mid2
+                       git hash-object --stdin <mid3
+               ) |
+               sort >expect &&
+
+               for pi in .git/objects/pack/pack-*.idx
+               do
+                       git show-index <"$pi"
+               done |
+               sed -e "s/^[0-9]* \([0-9a-f]*\) .*/\1/" |
+               sort >actual &&
+
+               test_cmp expect actual
+       )
 '
 
 test_done
diff --git a/zlib.c b/zlib.c
index 3c63d480c75e9939fb3a047f595b032e9509d681..2b2c0c780e3fca6217c688298053a01aad724505 100644 (file)
--- a/zlib.c
+++ b/zlib.c
@@ -188,13 +188,20 @@ void git_deflate_init_gzip(git_zstream *strm, int level)
            strm->z.msg ? strm->z.msg : "no message");
 }
 
-void git_deflate_end(git_zstream *strm)
+int git_deflate_abort(git_zstream *strm)
 {
        int status;
 
        zlib_pre_call(strm);
        status = deflateEnd(&strm->z);
        zlib_post_call(strm);
+       return status;
+}
+
+void git_deflate_end(git_zstream *strm)
+{
+       int status = git_deflate_abort(strm);
+
        if (status == Z_OK)
                return;
        error("deflateEnd: %s (%s)", zerr_to_string(status),