Merge branch 'nh/empty-rebase'

diff --git a/fast-import.c b/fast-import.c

index fff285cd0f73ca2bfe5b76aa3d87a156ad7808ff..eed97c8fa9f3e1624f69443e28f76d995e589b34 100644 (file)
--- a/fast-import.c
+++ b/fast-import.c
@@ -170,8 +170,12 @@ Format of STDIN stream:
  #define DEPTH_BITS 13
  #define MAX_DEPTH ((1<<DEPTH_BITS)-1)
  
-struct object_entry
-{
+/*
+ * We abuse the setuid bit on directories to mean "do not delta".
+ */
+#define NO_DELTA S_ISUID
+
+struct object_entry {
         struct pack_idx_entry idx;
         struct object_entry *next;
         uint32_t type : TYPE_BITS,
@@ -179,16 +183,14 @@ struct object_entry
                 depth : DEPTH_BITS;
  };
  
-struct object_entry_pool
-{
+struct object_entry_pool {
         struct object_entry_pool *next_pool;
         struct object_entry *next_free;
         struct object_entry *end;
         struct object_entry entries[FLEX_ARRAY]; /* more */
  };
  
-struct mark_set
-{
+struct mark_set {
         union {
                 struct object_entry *marked[1024];
                 struct mark_set *sets[1024];
@@ -196,57 +198,49 @@ struct mark_set
         unsigned int shift;
  };
  
-struct last_object
-{
+struct last_object {
         struct strbuf data;
         off_t offset;
         unsigned int depth;
         unsigned no_swap : 1;
  };
  
-struct mem_pool
-{
+struct mem_pool {
         struct mem_pool *next_pool;
         char *next_free;
         char *end;
         uintmax_t space[FLEX_ARRAY]; /* more */
  };
  
-struct atom_str
-{
+struct atom_str {
         struct atom_str *next_atom;
         unsigned short str_len;
         char str_dat[FLEX_ARRAY]; /* more */
  };
  
  struct tree_content;
-struct tree_entry
-{
+struct tree_entry {
         struct tree_content *tree;
         struct atom_str *name;
-       struct tree_entry_ms
-       {
+       struct tree_entry_ms {
                 uint16_t mode;
                 unsigned char sha1[20];
         } versions[2];
  };
  
-struct tree_content
-{
+struct tree_content {
         unsigned int entry_capacity; /* must match avail_tree_content */
         unsigned int entry_count;
         unsigned int delta_depth;
         struct tree_entry *entries[FLEX_ARRAY]; /* more */
  };
  
-struct avail_tree_content
-{
+struct avail_tree_content {
         unsigned int entry_capacity; /* must match tree_content */
         struct avail_tree_content *next_avail;
  };
  
-struct branch
-{
+struct branch {
         struct branch *table_next_branch;
         struct branch *active_next_branch;
         const char *name;
@@ -258,16 +252,14 @@ struct branch
         unsigned char sha1[20];
  };
  
-struct tag
-{
+struct tag {
         struct tag *next_tag;
         const char *name;
         unsigned int pack_id;
         unsigned char sha1[20];
  };
  
-struct hash_list
-{
+struct hash_list {
         struct hash_list *next;
         unsigned char sha1[20];
  };
@@ -278,8 +270,7 @@ typedef enum {
         WHENSPEC_NOW
  } whenspec_type;
  
-struct recent_command
-{
+struct recent_command {
         struct recent_command *prev;
         struct recent_command *next;
         char *buf;
@@ -288,7 +279,6 @@ struct recent_command
  /* Configured limits on output */
  static unsigned long max_depth = 10;
  static off_t max_packsize;
-static uintmax_t big_file_threshold = 512 * 1024 * 1024;
  static int force_update;
  static int pack_compression_level = Z_DEFAULT_COMPRESSION;
  static int pack_compression_seen;
@@ -299,6 +289,7 @@ static uintmax_t marks_set_count;
  static uintmax_t object_count_by_type[1 << TYPE_BITS];
  static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
  static uintmax_t delta_count_by_type[1 << TYPE_BITS];
+static uintmax_t delta_count_attempts_by_type[1 << TYPE_BITS];
  static unsigned long object_count;
  static unsigned long branch_count;
  static unsigned long branch_load_count;
@@ -319,6 +310,7 @@ static unsigned int atom_cnt;
  static struct atom_str **atom_table;
  
  /* The .pack file being generated */
+static struct pack_idx_option pack_idx_opts;
  static unsigned int pack_id;
  static struct sha1file *pack_file;
  static struct packed_git *pack_data;
@@ -369,6 +361,7 @@ static unsigned int cmd_save = 100;
  static uintmax_t next_mark;
  static struct strbuf new_data = STRBUF_INIT;
  static int seen_data_command;
+static int require_explicit_termination;
  
  /* Signal handling */
  static volatile sig_atomic_t checkpoint_requested;
@@ -729,13 +722,8 @@ static struct branch *new_branch(const char *name)
  
         if (b)
                 die("Invalid attempt to create duplicate branch: %s", name);
-       switch (check_ref_format(name)) {
-       case 0: break; /* its valid */
-       case CHECK_REF_FORMAT_ONELEVEL:
-               break; /* valid, but too few '/', allow anyway */
-       default:
+       if (check_refname_format(name, REFNAME_ALLOW_ONELEVEL))
                 die("Branch name doesn't conform to GIT standards: %s", name);
-       }
  
         b = pool_calloc(1, sizeof(struct branch));
         b->name = pool_strdup(name);
@@ -867,16 +855,17 @@ static struct tree_content *dup_tree_content(struct tree_content *s)
  
  static void start_packfile(void)
  {
-       static char tmpfile[PATH_MAX];
+       static char tmp_file[PATH_MAX];
         struct packed_git *p;
         struct pack_header hdr;
         int pack_fd;
  
-       pack_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
+       pack_fd = odb_mkstemp(tmp_file, sizeof(tmp_file),
                               "pack/tmp_pack_XXXXXX");
-       p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
-       strcpy(p->pack_name, tmpfile);
+       p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
+       strcpy(p->pack_name, tmp_file);
         p->pack_fd = pack_fd;
+       p->do_not_close = 1;
         pack_file = sha1fd(pack_fd, p->pack_name);
  
         hdr.hdr_signature = htonl(PACK_SIGNATURE);
@@ -910,7 +899,7 @@ static const char *create_index(void)
         if (c != last)
                 die("internal consistency error creating the index");
  
-       tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
+       tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, pack_data->sha1);
         free(idx);
         return tmpfile;
  }
@@ -1031,7 +1020,7 @@ static int store_object(
         unsigned char sha1[20];
         unsigned long hdrlen, deltalen;
         git_SHA_CTX c;
-       z_stream s;
+       git_zstream s;
  
         hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
                 (unsigned long)dat->len) + 1;
@@ -1057,6 +1046,7 @@ static int store_object(
         }
  
         if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
+               delta_count_attempts_by_type[type]++;
                 delta = diff_delta(last->data.buf, last->data.len,
                         dat->buf, dat->len,
                         &deltalen, dat->len - 20);
@@ -1064,7 +1054,7 @@ static int store_object(
                 delta = NULL;
  
         memset(&s, 0, sizeof(s));
-       deflateInit(&s, pack_compression_level);
+       git_deflate_init(&s, pack_compression_level);
         if (delta) {
                 s.next_in = delta;
                 s.avail_in = deltalen;
@@ -1072,11 +1062,11 @@ static int store_object(
                 s.next_in = (void *)dat->buf;
                 s.avail_in = dat->len;
         }
-       s.avail_out = deflateBound(&s, s.avail_in);
+       s.avail_out = git_deflate_bound(&s, s.avail_in);
         s.next_out = out = xmalloc(s.avail_out);
-       while (deflate(&s, Z_FINISH) == Z_OK)
-               /* nothing */;
-       deflateEnd(&s);
+       while (git_deflate(&s, Z_FINISH) == Z_OK)
+               ; /* nothing */
+       git_deflate_end(&s);
  
         /* Determine if we should auto-checkpoint. */
         if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
@@ -1092,14 +1082,14 @@ static int store_object(
                         delta = NULL;
  
                         memset(&s, 0, sizeof(s));
-                       deflateInit(&s, pack_compression_level);
+                       git_deflate_init(&s, pack_compression_level);
                         s.next_in = (void *)dat->buf;
                         s.avail_in = dat->len;
-                       s.avail_out = deflateBound(&s, s.avail_in);
+                       s.avail_out = git_deflate_bound(&s, s.avail_in);
                         s.next_out = out = xrealloc(out, s.avail_out);
-                       while (deflate(&s, Z_FINISH) == Z_OK)
-                               /* nothing */;
-                       deflateEnd(&s);
+                       while (git_deflate(&s, Z_FINISH) == Z_OK)
+                               ; /* nothing */
+                       git_deflate_end(&s);
                 }
         }
  
@@ -1153,17 +1143,11 @@ static int store_object(
         return 0;
  }
  
-static void truncate_pack(off_t to, git_SHA_CTX *ctx)
+static void truncate_pack(struct sha1file_checkpoint *checkpoint)
  {
-       if (ftruncate(pack_data->pack_fd, to)
-        || lseek(pack_data->pack_fd, to, SEEK_SET) != to)
+       if (sha1file_truncate(pack_file, checkpoint))
                 die_errno("cannot truncate pack to skip duplicate");
-       pack_size = to;
-
-       /* yes this is a layering violation */
-       pack_file->total = to;
-       pack_file->offset = 0;
-       pack_file->ctx = *ctx;
+       pack_size = checkpoint->offset;
  }
  
  static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1176,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
         unsigned long hdrlen;
         off_t offset;
         git_SHA_CTX c;
-       git_SHA_CTX pack_file_ctx;
-       z_stream s;
+       git_zstream s;
+       struct sha1file_checkpoint checkpoint;
         int status = Z_OK;
  
         /* Determine if we should auto-checkpoint. */
@@ -1185,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
                 || (pack_size + 60 + len) < pack_size)
                 cycle_packfile();
  
-       offset = pack_size;
-
-       /* preserve the pack_file SHA1 ctx in case we have to truncate later */
-       sha1flush(pack_file);
-       pack_file_ctx = pack_file->ctx;
+       sha1file_checkpoint(pack_file, &checkpoint);
+       offset = checkpoint.offset;
  
         hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
         if (out_sz <= hdrlen)
@@ -1201,7 +1182,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
         crc32_begin(pack_file);
  
         memset(&s, 0, sizeof(s));
-       deflateInit(&s, pack_compression_level);
+       git_deflate_init(&s, pack_compression_level);
  
         hdrlen = encode_in_pack_object_header(OBJ_BLOB, len, out_buf);
         if (out_sz <= hdrlen)
@@ -1223,7 +1204,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
                         len -= n;
                 }
  
-               status = deflate(&s, len ? 0 : Z_FINISH);
+               status = git_deflate(&s, len ? 0 : Z_FINISH);
  
                 if (!s.avail_out || status == Z_STREAM_END) {
                         size_t n = s.next_out - out_buf;
@@ -1242,7 +1223,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
                         die("unexpected deflate failure: %d", status);
                 }
         }
-       deflateEnd(&s);
+       git_deflate_end(&s);
         git_SHA1_Final(sha1, &c);
  
         if (sha1out)
@@ -1255,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
  
         if (e->idx.offset) {
                 duplicate_count_by_type[OBJ_BLOB]++;
-               truncate_pack(offset, &pack_file_ctx);
+               truncate_pack(&checkpoint);
  
         } else if (find_sha1_pack(sha1, packed_git)) {
                 e->type = OBJ_BLOB;
                 e->pack_id = MAX_PACK_ID;
                 e->idx.offset = 1; /* just not zero! */
                 duplicate_count_by_type[OBJ_BLOB]++;
-               truncate_pack(offset, &pack_file_ctx);
+               truncate_pack(&checkpoint);
  
         } else {
                 e->depth = 0;
@@ -1428,8 +1409,9 @@ static void mktree(struct tree_content *t, int v, struct strbuf *b)
                 struct tree_entry *e = t->entries[i];
                 if (!e->versions[v].mode)
                         continue;
-               strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
-                                       e->name->str_dat, '\0');
+               strbuf_addf(b, "%o %s%c",
+                       (unsigned int)(e->versions[v].mode & ~NO_DELTA),
+                       e->name->str_dat, '\0');
                 strbuf_add(b, e->versions[v].sha1, 20);
         }
  }
@@ -1439,7 +1421,7 @@ static void store_tree(struct tree_entry *root)
         struct tree_content *t = root->tree;
         unsigned int i, j, del;
         struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
-       struct object_entry *le;
+       struct object_entry *le = NULL;
  
         if (!is_null_sha1(root->versions[1].sha1))
                 return;
@@ -1449,7 +1431,8 @@ static void store_tree(struct tree_entry *root)
                         store_tree(t->entries[i]);
         }
  
-       le = find_object(root->versions[0].sha1);
+       if (!(root->versions[0].mode & NO_DELTA))
+               le = find_object(root->versions[0].sha1);
         if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
                 mktree(t, 0, &old_tree);
                 lo.data = old_tree;
@@ -1483,6 +1466,7 @@ static void tree_content_replace(
  {
         if (!S_ISDIR(mode))
                 die("Root cannot be a non-directory");
+       hashclr(root->versions[0].sha1);
         hashcpy(root->versions[1].sha1, sha1);
         if (root->tree)
                 release_tree_content_recursive(root->tree);
@@ -1527,6 +1511,23 @@ static int tree_content_set(
                                 if (e->tree)
                                         release_tree_content_recursive(e->tree);
                                 e->tree = subtree;
+
+                               /*
+                                * We need to leave e->versions[0].sha1 alone
+                                * to avoid modifying the preimage tree used
+                                * when writing out the parent directory.
+                                * But after replacing the subdir with a
+                                * completely different one, it's not a good
+                                * delta base any more, and besides, we've
+                                * thrown away the tree entries needed to
+                                * make a delta against it.
+                                *
+                                * So let's just explicitly disable deltas
+                                * for the subtree.
+                                */
+                               if (S_ISDIR(e->versions[0].mode))
+                                       e->versions[0].mode |= NO_DELTA;
+
                                 hashclr(root->versions[1].sha1);
                                 return 1;
                         }
@@ -1640,6 +1641,8 @@ static int tree_content_get(
                 n = slash1 - p;
         else
                 n = strlen(p);
+       if (!n)
+               die("Empty path component found in input");
  
         if (!root->tree)
                 load_tree(root);
@@ -1981,32 +1984,41 @@ static int validate_raw_date(const char *src, char *result, int maxlen)
  
  static char *parse_ident(const char *buf)
  {
-       const char *gt;
+       const char *ltgt;
         size_t name_len;
         char *ident;
  
-       gt = strrchr(buf, '>');
-       if (!gt)
+       /* ensure there is a space delimiter even if there is no name */
+       if (*buf == '<')
+               --buf;
+
+       ltgt = buf + strcspn(buf, "<>");
+       if (*ltgt != '<')
+               die("Missing < in ident string: %s", buf);
+       if (ltgt != buf && ltgt[-1] != ' ')
+               die("Missing space before < in ident string: %s", buf);
+       ltgt = ltgt + 1 + strcspn(ltgt + 1, "<>");
+       if (*ltgt != '>')
                 die("Missing > in ident string: %s", buf);
-       gt++;
-       if (*gt != ' ')
+       ltgt++;
+       if (*ltgt != ' ')
                 die("Missing space after > in ident string: %s", buf);
-       gt++;
-       name_len = gt - buf;
+       ltgt++;
+       name_len = ltgt - buf;
         ident = xmalloc(name_len + 24);
         strncpy(ident, buf, name_len);
  
         switch (whenspec) {
         case WHENSPEC_RAW:
-               if (validate_raw_date(gt, ident + name_len, 24) < 0)
-                       die("Invalid raw date \"%s\" in ident: %s", gt, buf);
+               if (validate_raw_date(ltgt, ident + name_len, 24) < 0)
+                       die("Invalid raw date \"%s\" in ident: %s", ltgt, buf);
                 break;
         case WHENSPEC_RFC2822:
-               if (parse_date(gt, ident + name_len, 24) < 0)
-                       die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
+               if (parse_date(ltgt, ident + name_len, 24) < 0)
+                       die("Invalid rfc2822 date \"%s\" in ident: %s", ltgt, buf);
                 break;
         case WHENSPEC_NOW:
-               if (strcmp("now", gt))
+               if (strcmp("now", ltgt))
                         die("Date in ident must be 'now': %s", buf);
                 datestamp(ident + name_len, 24);
                 break;
@@ -2154,6 +2166,11 @@ static uintmax_t do_change_note_fanout(
  
                 if (tmp_hex_sha1_len == 40 && !get_sha1_hex(hex_sha1, sha1)) {
                         /* This is a note entry */
+                       if (fanout == 0xff) {
+                               /* Counting mode, no rename */
+                               num_notes++;
+                               continue;
+                       }
                         construct_path_with_fanout(hex_sha1, fanout, realpath);
                         if (!strcmp(fullpath, realpath)) {
                                 /* Note entry is in correct location */
@@ -2190,6 +2207,59 @@ static uintmax_t change_note_fanout(struct tree_entry *root,
         return do_change_note_fanout(root, root, hex_sha1, 0, path, 0, fanout);
  }
  
+/*
+ * Given a pointer into a string, parse a mark reference:
+ *
+ *   idnum ::= ':' bigint;
+ *
+ * Return the first character after the value in *endptr.
+ *
+ * Complain if the following character is not what is expected,
+ * either a space or end of the string.
+ */
+static uintmax_t parse_mark_ref(const char *p, char **endptr)
+{
+       uintmax_t mark;
+
+       assert(*p == ':');
+       p++;
+       mark = strtoumax(p, endptr, 10);
+       if (*endptr == p)
+               die("No value after ':' in mark: %s", command_buf.buf);
+       return mark;
+}
+
+/*
+ * Parse the mark reference, and complain if this is not the end of
+ * the string.
+ */
+static uintmax_t parse_mark_ref_eol(const char *p)
+{
+       char *end;
+       uintmax_t mark;
+
+       mark = parse_mark_ref(p, &end);
+       if (*end != '\0')
+               die("Garbage after mark: %s", command_buf.buf);
+       return mark;
+}
+
+/*
+ * Parse the mark reference, demanding a trailing space.  Return a
+ * pointer to the space.
+ */
+static uintmax_t parse_mark_ref_space(const char **p)
+{
+       uintmax_t mark;
+       char *end;
+
+       mark = parse_mark_ref(*p, &end);
+       if (*end != ' ')
+               die("Missing space after mark: %s", command_buf.buf);
+       *p = end;
+       return mark;
+}
+
  static void file_change_m(struct branch *b)
  {
         const char *p = command_buf.buf + 2;
@@ -2218,21 +2288,21 @@ static void file_change_m(struct branch *b)
         }
  
         if (*p == ':') {
-               char *x;
-               oe = find_mark(strtoumax(p + 1, &x, 10));
+               oe = find_mark(parse_mark_ref_space(&p));
                 hashcpy(sha1, oe->idx.sha1);
-               p = x;
-       } else if (!prefixcmp(p, "inline")) {
+       } else if (!prefixcmp(p, "inline ")) {
                 inline_data = 1;
-               p += 6;
+               p += strlen("inline");  /* advance to space */
         } else {
                 if (get_sha1_hex(p, sha1))
-                       die("Invalid SHA1: %s", command_buf.buf);
+                       die("Invalid dataref: %s", command_buf.buf);
                 oe = find_object(sha1);
                 p += 40;
+               if (*p != ' ')
+                       die("Missing space after SHA1: %s", command_buf.buf);
         }
-       if (*p++ != ' ')
-               die("Missing space after SHA1: %s", command_buf.buf);
+       assert(*p == ' ');
+       p++;  /* skip space */
  
         strbuf_reset(&uq);
         if (!unquote_c_style(&uq, p, &endp)) {
@@ -2360,7 +2430,7 @@ static void file_change_cr(struct branch *b, int rename)
                 leaf.tree);
  }
  
-static void note_change_n(struct branch *b, unsigned char old_fanout)
+static void note_change_n(struct branch *b, unsigned char *old_fanout)
  {
         const char *p = command_buf.buf + 2;
         static struct strbuf uq = STRBUF_INIT;
@@ -2371,30 +2441,49 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
         uint16_t inline_data = 0;
         unsigned char new_fanout;
  
+       /*
+        * When loading a branch, we don't traverse its tree to count the real
+        * number of notes (too expensive to do this for all non-note refs).
+        * This means that recently loaded notes refs might incorrectly have
+        * b->num_notes == 0, and consequently, old_fanout might be wrong.
+        *
+        * Fix this by traversing the tree and counting the number of notes
+        * when b->num_notes == 0. If the notes tree is truly empty, the
+        * calculation should not take long.
+        */
+       if (b->num_notes == 0 && *old_fanout == 0) {
+               /* Invoke change_note_fanout() in "counting mode". */
+               b->num_notes = change_note_fanout(&b->branch_tree, 0xff);
+               *old_fanout = convert_num_notes_to_fanout(b->num_notes);
+       }
+
+       /* Now parse the notemodify command. */
         /* <dataref> or 'inline' */
         if (*p == ':') {
-               char *x;
-               oe = find_mark(strtoumax(p + 1, &x, 10));
+               oe = find_mark(parse_mark_ref_space(&p));
                 hashcpy(sha1, oe->idx.sha1);
-               p = x;
-       } else if (!prefixcmp(p, "inline")) {
+       } else if (!prefixcmp(p, "inline ")) {
                 inline_data = 1;
-               p += 6;
+               p += strlen("inline");  /* advance to space */
         } else {
                 if (get_sha1_hex(p, sha1))
-                       die("Invalid SHA1: %s", command_buf.buf);
+                       die("Invalid dataref: %s", command_buf.buf);
                 oe = find_object(sha1);
                 p += 40;
+               if (*p != ' ')
+                       die("Missing space after SHA1: %s", command_buf.buf);
         }
-       if (*p++ != ' ')
-               die("Missing space after SHA1: %s", command_buf.buf);
+       assert(*p == ' ');
+       p++;  /* skip space */
  
         /* <committish> */
         s = lookup_branch(p);
         if (s) {
+               if (is_null_sha1(s->sha1))
+                       die("Can't add a note on empty branch.");
                 hashcpy(commit_sha1, s->sha1);
         } else if (*p == ':') {
-               uintmax_t commit_mark = strtoumax(p + 1, NULL, 10);
+               uintmax_t commit_mark = parse_mark_ref_eol(p);
                 struct object_entry *commit_oe = find_mark(commit_mark);
                 if (commit_oe->type != OBJ_COMMIT)
                         die("Mark :%" PRIuMAX " not a commit", commit_mark);
@@ -2429,7 +2518,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
                             typename(type), command_buf.buf);
         }
  
-       construct_path_with_fanout(sha1_to_hex(commit_sha1), old_fanout, path);
+       construct_path_with_fanout(sha1_to_hex(commit_sha1), *old_fanout, path);
         if (tree_content_remove(&b->branch_tree, path, NULL))
                 b->num_notes--;
  
@@ -2501,7 +2590,7 @@ static int parse_from(struct branch *b)
                 hashcpy(b->branch_tree.versions[0].sha1, t);
                 hashcpy(b->branch_tree.versions[1].sha1, t);
         } else if (*from == ':') {
-               uintmax_t idnum = strtoumax(from + 1, NULL, 10);
+               uintmax_t idnum = parse_mark_ref_eol(from);
                 struct object_entry *oe = find_mark(idnum);
                 if (oe->type != OBJ_COMMIT)
                         die("Mark :%" PRIuMAX " not a commit", idnum);
@@ -2536,7 +2625,7 @@ static struct hash_list *parse_merge(unsigned int *count)
                 if (s)
                         hashcpy(n->sha1, s->sha1);
                 else if (*from == ':') {
-                       uintmax_t idnum = strtoumax(from + 1, NULL, 10);
+                       uintmax_t idnum = parse_mark_ref_eol(from);
                         struct object_entry *oe = find_mark(idnum);
                         if (oe->type != OBJ_COMMIT)
                                 die("Mark :%" PRIuMAX " not a commit", idnum);
@@ -2616,7 +2705,7 @@ static void parse_new_commit(void)
                 else if (!prefixcmp(command_buf.buf, "C "))
                         file_change_cr(b, 0);
                 else if (!prefixcmp(command_buf.buf, "N "))
-                       note_change_n(b, prev_fanout);
+                       note_change_n(b, &prev_fanout);
                 else if (!strcmp("deleteall", command_buf.buf))
                         file_change_deleteall(b);
                 else if (!prefixcmp(command_buf.buf, "ls "))
@@ -2678,7 +2767,7 @@ static void parse_new_tag(void)
         /* Obtain the new tag name from the rest of our command */
         sp = strchr(command_buf.buf, ' ') + 1;
         t = pool_alloc(sizeof(struct tag));
-       t->next_tag = NULL;
+       memset(t, 0, sizeof(struct tag));
         t->name = pool_strdup(sp);
         if (last_tag)
                 last_tag->next_tag = t;
@@ -2693,22 +2782,24 @@ static void parse_new_tag(void)
         from = strchr(command_buf.buf, ' ') + 1;
         s = lookup_branch(from);
         if (s) {
+               if (is_null_sha1(s->sha1))
+                       die("Can't tag an empty branch.");
                 hashcpy(sha1, s->sha1);
                 type = OBJ_COMMIT;
         } else if (*from == ':') {
                 struct object_entry *oe;
-               from_mark = strtoumax(from + 1, NULL, 10);
+               from_mark = parse_mark_ref_eol(from);
                 oe = find_mark(from_mark);
                 type = oe->type;
                 hashcpy(sha1, oe->idx.sha1);
         } else if (!get_sha1(from, sha1)) {
-               unsigned long size;
-               char *buf;
-
-               buf = read_sha1_file(sha1, &type, &size);
-               if (!buf || size < 46)
-                       die("Not a valid commit: %s", from);
-               free(buf);
+               struct object_entry *oe = find_object(sha1);
+               if (!oe) {
+                       type = sha1_object_info(sha1, NULL);
+                       if (type < 0)
+                               die("Not a valid object: %s", from);
+               } else
+                       type = oe->type;
         } else
                 die("Invalid ref name or SHA1 expression: %s", from);
         read_next_command();
@@ -2812,7 +2903,12 @@ static void cat_blob(struct object_entry *oe, unsigned char sha1[20])
         strbuf_release(&line);
         cat_blob_write(buf, size);
         cat_blob_write("\n", 1);
-       free(buf);
+       if (oe && oe->pack_id == pack_id) {
+               last_blob.offset = oe->idx.offset;
+               strbuf_attach(&last_blob.data, buf, size, size);
+               last_blob.depth = oe->depth;
+       } else
+               free(buf);
  }
  
  static void parse_cat_blob(void)
@@ -2824,18 +2920,13 @@ static void parse_cat_blob(void)
         /* cat-blob SP <object> LF */
         p = command_buf.buf + strlen("cat-blob ");
         if (*p == ':') {
-               char *x;
-               oe = find_mark(strtoumax(p + 1, &x, 10));
-               if (x == p + 1)
-                       die("Invalid mark: %s", command_buf.buf);
+               oe = find_mark(parse_mark_ref_eol(p));
                 if (!oe)
                         die("Unknown mark: %s", command_buf.buf);
-               if (*x)
-                       die("Garbage after mark: %s", command_buf.buf);
                 hashcpy(sha1, oe->idx.sha1);
         } else {
                 if (get_sha1_hex(p, sha1))
-                       die("Invalid SHA1: %s", command_buf.buf);
+                       die("Invalid dataref: %s", command_buf.buf);
                 if (p[40])
                         die("Garbage after SHA1: %s", command_buf.buf);
                 oe = find_object(sha1);
@@ -2848,7 +2939,7 @@ static struct object_entry *dereference(struct object_entry *oe,
                                         unsigned char sha1[20])
  {
         unsigned long size;
-       void *buf = NULL;
+       char *buf = NULL;
         if (!oe) {
                 enum object_type type = sha1_object_info(sha1, NULL);
                 if (type < 0)
@@ -2901,17 +2992,13 @@ static struct object_entry *parse_treeish_dataref(const char **p)
         struct object_entry *e;
  
         if (**p == ':') {       /* <mark> */
-               char *endptr;
-               e = find_mark(strtoumax(*p + 1, &endptr, 10));
-               if (endptr == *p + 1)
-                       die("Invalid mark: %s", command_buf.buf);
+               e = find_mark(parse_mark_ref_space(p));
                 if (!e)
                         die("Unknown mark: %s", command_buf.buf);
-               *p = endptr;
                 hashcpy(sha1, e->idx.sha1);
         } else {        /* <sha1> */
                 if (get_sha1_hex(*p, sha1))
-                       die("Invalid SHA1: %s", command_buf.buf);
+                       die("Invalid dataref: %s", command_buf.buf);
                 e = find_object(sha1);
                 *p += 40;
         }
@@ -2941,7 +3028,7 @@ static void print_ls(int mode, const unsigned char *sha1, const char *path)
                 /* mode SP type SP object_name TAB path LF */
                 strbuf_reset(&line);
                 strbuf_addf(&line, "%06o %s %s\t",
-                               mode, type, sha1_to_hex(sha1));
+                               mode & ~NO_DELTA, type, sha1_to_hex(sha1));
                 quote_c_style(path, &line, NULL, 0);
                 strbuf_addch(&line, '\n');
         }
@@ -2952,7 +3039,7 @@ static void parse_ls(struct branch *b)
  {
         const char *p;
         struct tree_entry *root = NULL;
-       struct tree_entry leaf = {0};
+       struct tree_entry leaf = {NULL};
  
         /* ls SP (<treeish> SP)? <path> */
         p = command_buf.buf + strlen("ls ");
@@ -3151,11 +3238,13 @@ static int parse_one_feature(const char *feature, int from_stream)
                 option_export_marks(feature + 13);
         } else if (!strcmp(feature, "cat-blob")) {
                 ; /* Don't die - this feature is supported */
-       } else if (!prefixcmp(feature, "relative-marks")) {
+       } else if (!strcmp(feature, "relative-marks")) {
                 relative_marks_paths = 1;
-       } else if (!prefixcmp(feature, "no-relative-marks")) {
+       } else if (!strcmp(feature, "no-relative-marks")) {
                 relative_marks_paths = 0;
-       } else if (!prefixcmp(feature, "force")) {
+       } else if (!strcmp(feature, "done")) {
+               require_explicit_termination = 1;
+       } else if (!strcmp(feature, "force")) {
                 force_update = 1;
         } else if (!strcmp(feature, "notes") || !strcmp(feature, "ls")) {
                 ; /* do nothing; we have the feature */
@@ -3211,20 +3300,16 @@ static int git_pack_config(const char *k, const char *v, void *cb)
                 return 0;
         }
         if (!strcmp(k, "pack.indexversion")) {
-               pack_idx_default_version = git_config_int(k, v);
-               if (pack_idx_default_version > 2)
+               pack_idx_opts.version = git_config_int(k, v);
+               if (pack_idx_opts.version > 2)
                         die("bad pack.indexversion=%"PRIu32,
-                           pack_idx_default_version);
+                           pack_idx_opts.version);
                 return 0;
         }
         if (!strcmp(k, "pack.packsizelimit")) {
                 max_packsize = git_config_ulong(k, v);
                 return 0;
         }
-       if (!strcmp(k, "core.bigfilethreshold")) {
-               long n = git_config_int(k, v);
-               big_file_threshold = 0 < n ? n : 0;
-       }
         return git_default_config(k, v, cb);
  }
  
@@ -3268,10 +3353,13 @@ int main(int argc, const char **argv)
  
         git_extract_argv0_path(argv[0]);
  
+       git_setup_gettext();
+
         if (argc == 2 && !strcmp(argv[1], "-h"))
                 usage(fast_import_usage);
  
         setup_git_directory();
+       reset_pack_idx_option(&pack_idx_opts);
         git_config(git_pack_config, NULL);
         if (!pack_compression_seen && core_compression_seen)
                 pack_compression_level = core_compression_level;
@@ -3308,6 +3396,8 @@ int main(int argc, const char **argv)
                         parse_reset_branch();
                 else if (!strcmp("checkpoint", command_buf.buf))
                         parse_checkpoint();
+               else if (!strcmp("done", command_buf.buf))
+                       break;
                 else if (!prefixcmp(command_buf.buf, "progress "))
                         parse_progress();
                 else if (!prefixcmp(command_buf.buf, "feature "))
@@ -3327,6 +3417,9 @@ int main(int argc, const char **argv)
         if (!seen_data_command)
                 parse_argv();
  
+       if (require_explicit_termination && feof(stdin))
+               die("stream ends early");
+
         end_packfile();
  
         dump_branches();
@@ -3348,10 +3441,10 @@ int main(int argc, const char **argv)
                 fprintf(stderr, "---------------------------------------------------------------------\n");
                 fprintf(stderr, "Alloc'd objects: %10" PRIuMAX "\n", alloc_count);
                 fprintf(stderr, "Total objects:   %10" PRIuMAX " (%10" PRIuMAX " duplicates                  )\n", total_count, duplicate_count);
-               fprintf(stderr, "      blobs  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
-               fprintf(stderr, "      trees  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
-               fprintf(stderr, "      commits:   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
-               fprintf(stderr, "      tags   :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
+               fprintf(stderr, "      blobs  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB], delta_count_attempts_by_type[OBJ_BLOB]);
+               fprintf(stderr, "      trees  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE], delta_count_attempts_by_type[OBJ_TREE]);
+               fprintf(stderr, "      commits:   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT], delta_count_attempts_by_type[OBJ_COMMIT]);
+               fprintf(stderr, "      tags   :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG], delta_count_attempts_by_type[OBJ_TAG]);
                 fprintf(stderr, "Total branches:  %10lu (%10lu loads     )\n", branch_count, branch_load_count);
                 fprintf(stderr, "      marks:     %10" PRIuMAX " (%10" PRIuMAX " unique    )\n", (((uintmax_t)1) << marks->shift) * 1024, marks_set_count);
                 fprintf(stderr, "      atoms:     %10u\n", atom_cnt);