run-command.c: fix broken list iteration in clear_child_for_cleanup
[gitweb.git] / fast-import.c
index 7cc22625e5b73d2778566a154bce94316311b0c8..6cd19e580ba710d03682fcaff997fa7e0e555691 100644 (file)
@@ -170,6 +170,11 @@ Format of STDIN stream:
 #define DEPTH_BITS 13
 #define MAX_DEPTH ((1<<DEPTH_BITS)-1)
 
+/*
+ * We abuse the setuid bit on directories to mean "do not delta".
+ */
+#define NO_DELTA S_ISUID
+
 struct object_entry {
        struct pack_idx_entry idx;
        struct object_entry *next;
@@ -284,6 +289,7 @@ static uintmax_t marks_set_count;
 static uintmax_t object_count_by_type[1 << TYPE_BITS];
 static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
 static uintmax_t delta_count_by_type[1 << TYPE_BITS];
+static uintmax_t delta_count_attempts_by_type[1 << TYPE_BITS];
 static unsigned long object_count;
 static unsigned long branch_count;
 static unsigned long branch_load_count;
@@ -716,13 +722,8 @@ static struct branch *new_branch(const char *name)
 
        if (b)
                die("Invalid attempt to create duplicate branch: %s", name);
-       switch (check_ref_format(name)) {
-       case 0: break; /* its valid */
-       case CHECK_REF_FORMAT_ONELEVEL:
-               break; /* valid, but too few '/', allow anyway */
-       default:
+       if (check_refname_format(name, REFNAME_ALLOW_ONELEVEL))
                die("Branch name doesn't conform to GIT standards: %s", name);
-       }
 
        b = pool_calloc(1, sizeof(struct branch));
        b->name = pool_strdup(name);
@@ -854,15 +855,15 @@ static struct tree_content *dup_tree_content(struct tree_content *s)
 
 static void start_packfile(void)
 {
-       static char tmpfile[PATH_MAX];
+       static char tmp_file[PATH_MAX];
        struct packed_git *p;
        struct pack_header hdr;
        int pack_fd;
 
-       pack_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
+       pack_fd = odb_mkstemp(tmp_file, sizeof(tmp_file),
                              "pack/tmp_pack_XXXXXX");
-       p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
-       strcpy(p->pack_name, tmpfile);
+       p = xcalloc(1, sizeof(*p) + strlen(tmp_file) + 2);
+       strcpy(p->pack_name, tmp_file);
        p->pack_fd = pack_fd;
        p->do_not_close = 1;
        pack_file = sha1fd(pack_fd, p->pack_name);
@@ -1045,6 +1046,7 @@ static int store_object(
        }
 
        if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
+               delta_count_attempts_by_type[type]++;
                delta = diff_delta(last->data.buf, last->data.len,
                        dat->buf, dat->len,
                        &deltalen, dat->len - 20);
@@ -1141,17 +1143,11 @@ static int store_object(
        return 0;
 }
 
-static void truncate_pack(off_t to, git_SHA_CTX *ctx)
+static void truncate_pack(struct sha1file_checkpoint *checkpoint)
 {
-       if (ftruncate(pack_data->pack_fd, to)
-        || lseek(pack_data->pack_fd, to, SEEK_SET) != to)
+       if (sha1file_truncate(pack_file, checkpoint))
                die_errno("cannot truncate pack to skip duplicate");
-       pack_size = to;
-
-       /* yes this is a layering violation */
-       pack_file->total = to;
-       pack_file->offset = 0;
-       pack_file->ctx = *ctx;
+       pack_size = checkpoint->offset;
 }
 
 static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1164,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
        unsigned long hdrlen;
        off_t offset;
        git_SHA_CTX c;
-       git_SHA_CTX pack_file_ctx;
        git_zstream s;
+       struct sha1file_checkpoint checkpoint;
        int status = Z_OK;
 
        /* Determine if we should auto-checkpoint. */
@@ -1173,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
                || (pack_size + 60 + len) < pack_size)
                cycle_packfile();
 
-       offset = pack_size;
-
-       /* preserve the pack_file SHA1 ctx in case we have to truncate later */
-       sha1flush(pack_file);
-       pack_file_ctx = pack_file->ctx;
+       sha1file_checkpoint(pack_file, &checkpoint);
+       offset = checkpoint.offset;
 
        hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
        if (out_sz <= hdrlen)
@@ -1243,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 
        if (e->idx.offset) {
                duplicate_count_by_type[OBJ_BLOB]++;
-               truncate_pack(offset, &pack_file_ctx);
+               truncate_pack(&checkpoint);
 
        } else if (find_sha1_pack(sha1, packed_git)) {
                e->type = OBJ_BLOB;
                e->pack_id = MAX_PACK_ID;
                e->idx.offset = 1; /* just not zero! */
                duplicate_count_by_type[OBJ_BLOB]++;
-               truncate_pack(offset, &pack_file_ctx);
+               truncate_pack(&checkpoint);
 
        } else {
                e->depth = 0;
@@ -1416,8 +1409,9 @@ static void mktree(struct tree_content *t, int v, struct strbuf *b)
                struct tree_entry *e = t->entries[i];
                if (!e->versions[v].mode)
                        continue;
-               strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
-                                       e->name->str_dat, '\0');
+               strbuf_addf(b, "%o %s%c",
+                       (unsigned int)(e->versions[v].mode & ~NO_DELTA),
+                       e->name->str_dat, '\0');
                strbuf_add(b, e->versions[v].sha1, 20);
        }
 }
@@ -1427,7 +1421,7 @@ static void store_tree(struct tree_entry *root)
        struct tree_content *t = root->tree;
        unsigned int i, j, del;
        struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
-       struct object_entry *le;
+       struct object_entry *le = NULL;
 
        if (!is_null_sha1(root->versions[1].sha1))
                return;
@@ -1437,7 +1431,8 @@ static void store_tree(struct tree_entry *root)
                        store_tree(t->entries[i]);
        }
 
-       le = find_object(root->versions[0].sha1);
+       if (!(root->versions[0].mode & NO_DELTA))
+               le = find_object(root->versions[0].sha1);
        if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
                mktree(t, 0, &old_tree);
                lo.data = old_tree;
@@ -1471,6 +1466,7 @@ static void tree_content_replace(
 {
        if (!S_ISDIR(mode))
                die("Root cannot be a non-directory");
+       hashclr(root->versions[0].sha1);
        hashcpy(root->versions[1].sha1, sha1);
        if (root->tree)
                release_tree_content_recursive(root->tree);
@@ -1515,6 +1511,23 @@ static int tree_content_set(
                                if (e->tree)
                                        release_tree_content_recursive(e->tree);
                                e->tree = subtree;
+
+                               /*
+                                * We need to leave e->versions[0].sha1 alone
+                                * to avoid modifying the preimage tree used
+                                * when writing out the parent directory.
+                                * But after replacing the subdir with a
+                                * completely different one, it's not a good
+                                * delta base any more, and besides, we've
+                                * thrown away the tree entries needed to
+                                * make a delta against it.
+                                *
+                                * So let's just explicitly disable deltas
+                                * for the subtree.
+                                */
+                               if (S_ISDIR(e->versions[0].mode))
+                                       e->versions[0].mode |= NO_DELTA;
+
                                hashclr(root->versions[1].sha1);
                                return 1;
                        }
@@ -1969,32 +1982,41 @@ static int validate_raw_date(const char *src, char *result, int maxlen)
 
 static char *parse_ident(const char *buf)
 {
-       const char *gt;
+       const char *ltgt;
        size_t name_len;
        char *ident;
 
-       gt = strrchr(buf, '>');
-       if (!gt)
+       /* ensure there is a space delimiter even if there is no name */
+       if (*buf == '<')
+               --buf;
+
+       ltgt = buf + strcspn(buf, "<>");
+       if (*ltgt != '<')
+               die("Missing < in ident string: %s", buf);
+       if (ltgt != buf && ltgt[-1] != ' ')
+               die("Missing space before < in ident string: %s", buf);
+       ltgt = ltgt + 1 + strcspn(ltgt + 1, "<>");
+       if (*ltgt != '>')
                die("Missing > in ident string: %s", buf);
-       gt++;
-       if (*gt != ' ')
+       ltgt++;
+       if (*ltgt != ' ')
                die("Missing space after > in ident string: %s", buf);
-       gt++;
-       name_len = gt - buf;
+       ltgt++;
+       name_len = ltgt - buf;
        ident = xmalloc(name_len + 24);
        strncpy(ident, buf, name_len);
 
        switch (whenspec) {
        case WHENSPEC_RAW:
-               if (validate_raw_date(gt, ident + name_len, 24) < 0)
-                       die("Invalid raw date \"%s\" in ident: %s", gt, buf);
+               if (validate_raw_date(ltgt, ident + name_len, 24) < 0)
+                       die("Invalid raw date \"%s\" in ident: %s", ltgt, buf);
                break;
        case WHENSPEC_RFC2822:
-               if (parse_date(gt, ident + name_len, 24) < 0)
-                       die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
+               if (parse_date(ltgt, ident + name_len, 24) < 0)
+                       die("Invalid rfc2822 date \"%s\" in ident: %s", ltgt, buf);
                break;
        case WHENSPEC_NOW:
-               if (strcmp("now", gt))
+               if (strcmp("now", ltgt))
                        die("Date in ident must be 'now': %s", buf);
                datestamp(ident + name_len, 24);
                break;
@@ -2142,6 +2164,11 @@ static uintmax_t do_change_note_fanout(
 
                if (tmp_hex_sha1_len == 40 && !get_sha1_hex(hex_sha1, sha1)) {
                        /* This is a note entry */
+                       if (fanout == 0xff) {
+                               /* Counting mode, no rename */
+                               num_notes++;
+                               continue;
+                       }
                        construct_path_with_fanout(hex_sha1, fanout, realpath);
                        if (!strcmp(fullpath, realpath)) {
                                /* Note entry is in correct location */
@@ -2348,7 +2375,7 @@ static void file_change_cr(struct branch *b, int rename)
                leaf.tree);
 }
 
-static void note_change_n(struct branch *b, unsigned char old_fanout)
+static void note_change_n(struct branch *b, unsigned char *old_fanout)
 {
        const char *p = command_buf.buf + 2;
        static struct strbuf uq = STRBUF_INIT;
@@ -2359,6 +2386,23 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
        uint16_t inline_data = 0;
        unsigned char new_fanout;
 
+       /*
+        * When loading a branch, we don't traverse its tree to count the real
+        * number of notes (too expensive to do this for all non-note refs).
+        * This means that recently loaded notes refs might incorrectly have
+        * b->num_notes == 0, and consequently, old_fanout might be wrong.
+        *
+        * Fix this by traversing the tree and counting the number of notes
+        * when b->num_notes == 0. If the notes tree is truly empty, the
+        * calculation should not take long.
+        */
+       if (b->num_notes == 0 && *old_fanout == 0) {
+               /* Invoke change_note_fanout() in "counting mode". */
+               b->num_notes = change_note_fanout(&b->branch_tree, 0xff);
+               *old_fanout = convert_num_notes_to_fanout(b->num_notes);
+       }
+
+       /* Now parse the notemodify command. */
        /* <dataref> or 'inline' */
        if (*p == ':') {
                char *x;
@@ -2380,6 +2424,8 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
        /* <committish> */
        s = lookup_branch(p);
        if (s) {
+               if (is_null_sha1(s->sha1))
+                       die("Can't add a note on empty branch.");
                hashcpy(commit_sha1, s->sha1);
        } else if (*p == ':') {
                uintmax_t commit_mark = strtoumax(p + 1, NULL, 10);
@@ -2417,7 +2463,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
                            typename(type), command_buf.buf);
        }
 
-       construct_path_with_fanout(sha1_to_hex(commit_sha1), old_fanout, path);
+       construct_path_with_fanout(sha1_to_hex(commit_sha1), *old_fanout, path);
        if (tree_content_remove(&b->branch_tree, path, NULL))
                b->num_notes--;
 
@@ -2604,7 +2650,7 @@ static void parse_new_commit(void)
                else if (!prefixcmp(command_buf.buf, "C "))
                        file_change_cr(b, 0);
                else if (!prefixcmp(command_buf.buf, "N "))
-                       note_change_n(b, prev_fanout);
+                       note_change_n(b, &prev_fanout);
                else if (!strcmp("deleteall", command_buf.buf))
                        file_change_deleteall(b);
                else if (!prefixcmp(command_buf.buf, "ls "))
@@ -2681,6 +2727,8 @@ static void parse_new_tag(void)
        from = strchr(command_buf.buf, ' ') + 1;
        s = lookup_branch(from);
        if (s) {
+               if (is_null_sha1(s->sha1))
+                       die("Can't tag an empty branch.");
                hashcpy(sha1, s->sha1);
                type = OBJ_COMMIT;
        } else if (*from == ':') {
@@ -2690,13 +2738,13 @@ static void parse_new_tag(void)
                type = oe->type;
                hashcpy(sha1, oe->idx.sha1);
        } else if (!get_sha1(from, sha1)) {
-               unsigned long size;
-               char *buf;
-
-               buf = read_sha1_file(sha1, &type, &size);
-               if (!buf || size < 46)
-                       die("Not a valid commit: %s", from);
-               free(buf);
+               struct object_entry *oe = find_object(sha1);
+               if (!oe) {
+                       type = sha1_object_info(sha1, NULL);
+                       if (type < 0)
+                               die("Not a valid object: %s", from);
+               } else
+                       type = oe->type;
        } else
                die("Invalid ref name or SHA1 expression: %s", from);
        read_next_command();
@@ -2800,7 +2848,12 @@ static void cat_blob(struct object_entry *oe, unsigned char sha1[20])
        strbuf_release(&line);
        cat_blob_write(buf, size);
        cat_blob_write("\n", 1);
-       free(buf);
+       if (oe && oe->pack_id == pack_id) {
+               last_blob.offset = oe->idx.offset;
+               strbuf_attach(&last_blob.data, buf, size, size);
+               last_blob.depth = oe->depth;
+       } else
+               free(buf);
 }
 
 static void parse_cat_blob(void)
@@ -2929,7 +2982,7 @@ static void print_ls(int mode, const unsigned char *sha1, const char *path)
                /* mode SP type SP object_name TAB path LF */
                strbuf_reset(&line);
                strbuf_addf(&line, "%06o %s %s\t",
-                               mode, type, sha1_to_hex(sha1));
+                               mode & ~NO_DELTA, type, sha1_to_hex(sha1));
                quote_c_style(path, &line, NULL, 0);
                strbuf_addch(&line, '\n');
        }
@@ -3252,6 +3305,8 @@ int main(int argc, const char **argv)
 
        git_extract_argv0_path(argv[0]);
 
+       git_setup_gettext();
+
        if (argc == 2 && !strcmp(argv[1], "-h"))
                usage(fast_import_usage);
 
@@ -3338,10 +3393,10 @@ int main(int argc, const char **argv)
                fprintf(stderr, "---------------------------------------------------------------------\n");
                fprintf(stderr, "Alloc'd objects: %10" PRIuMAX "\n", alloc_count);
                fprintf(stderr, "Total objects:   %10" PRIuMAX " (%10" PRIuMAX " duplicates                  )\n", total_count, duplicate_count);
-               fprintf(stderr, "      blobs  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
-               fprintf(stderr, "      trees  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
-               fprintf(stderr, "      commits:   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
-               fprintf(stderr, "      tags   :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
+               fprintf(stderr, "      blobs  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB], delta_count_attempts_by_type[OBJ_BLOB]);
+               fprintf(stderr, "      trees  :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE], delta_count_attempts_by_type[OBJ_TREE]);
+               fprintf(stderr, "      commits:   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT], delta_count_attempts_by_type[OBJ_COMMIT]);
+               fprintf(stderr, "      tags   :   %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG], delta_count_attempts_by_type[OBJ_TAG]);
                fprintf(stderr, "Total branches:  %10lu (%10lu loads     )\n", branch_count, branch_load_count);
                fprintf(stderr, "      marks:     %10" PRIuMAX " (%10" PRIuMAX " unique    )\n", (((uintmax_t)1) << marks->shift) * 1024, marks_set_count);
                fprintf(stderr, "      atoms:     %10u\n", atom_cnt);