Merge branch 'di/fast-import-deltified-tree'
authorJunio C Hamano <gitster@pobox.com>
Mon, 29 Aug 2011 04:18:47 +0000 (21:18 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 29 Aug 2011 04:18:47 +0000 (21:18 -0700)
* di/fast-import-deltified-tree:
fast-import: prevent producing bad delta
fast-import: add a test for tree delta base corruption

1  2 
fast-import.c
t/t9300-fast-import.sh
diff --combined fast-import.c
index 6d491b92fefe5845fe53bf6995baa077e78a93be,6dad9ff4db81bbdda0e44f2c59bb3dcc9f9fb585..016d2456f6cb0f00d0ab80fb6fb038cee200a2c6
@@@ -170,6 -170,11 +170,11 @@@ Format of STDIN stream
  #define DEPTH_BITS 13
  #define MAX_DEPTH ((1<<DEPTH_BITS)-1)
  
+ /*
+  * We abuse the setuid bit on directories to mean "do not delta".
+  */
+ #define NO_DELTA S_ISUID
  struct object_entry {
        struct pack_idx_entry idx;
        struct object_entry *next;
@@@ -304,7 -309,6 +309,7 @@@ static unsigned int atom_cnt
  static struct atom_str **atom_table;
  
  /* The .pack file being generated */
 +static struct pack_idx_option pack_idx_opts;
  static unsigned int pack_id;
  static struct sha1file *pack_file;
  static struct packed_git *pack_data;
@@@ -355,7 -359,6 +360,7 @@@ static unsigned int cmd_save = 100
  static uintmax_t next_mark;
  static struct strbuf new_data = STRBUF_INIT;
  static int seen_data_command;
 +static int require_explicit_termination;
  
  /* Signal handling */
  static volatile sig_atomic_t checkpoint_requested;
@@@ -898,7 -901,7 +903,7 @@@ static const char *create_index(void
        if (c != last)
                die("internal consistency error creating the index");
  
 -      tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
 +      tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, pack_data->sha1);
        free(idx);
        return tmpfile;
  }
@@@ -1019,7 -1022,7 +1024,7 @@@ static int store_object
        unsigned char sha1[20];
        unsigned long hdrlen, deltalen;
        git_SHA_CTX c;
 -      z_stream s;
 +      git_zstream s;
  
        hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
                (unsigned long)dat->len) + 1;
                delta = NULL;
  
        memset(&s, 0, sizeof(s));
 -      deflateInit(&s, pack_compression_level);
 +      git_deflate_init(&s, pack_compression_level);
        if (delta) {
                s.next_in = delta;
                s.avail_in = deltalen;
                s.next_in = (void *)dat->buf;
                s.avail_in = dat->len;
        }
 -      s.avail_out = deflateBound(&s, s.avail_in);
 +      s.avail_out = git_deflate_bound(&s, s.avail_in);
        s.next_out = out = xmalloc(s.avail_out);
 -      while (deflate(&s, Z_FINISH) == Z_OK)
 -              /* nothing */;
 -      deflateEnd(&s);
 +      while (git_deflate(&s, Z_FINISH) == Z_OK)
 +              ; /* nothing */
 +      git_deflate_end(&s);
  
        /* Determine if we should auto-checkpoint. */
        if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
                        delta = NULL;
  
                        memset(&s, 0, sizeof(s));
 -                      deflateInit(&s, pack_compression_level);
 +                      git_deflate_init(&s, pack_compression_level);
                        s.next_in = (void *)dat->buf;
                        s.avail_in = dat->len;
 -                      s.avail_out = deflateBound(&s, s.avail_in);
 +                      s.avail_out = git_deflate_bound(&s, s.avail_in);
                        s.next_out = out = xrealloc(out, s.avail_out);
 -                      while (deflate(&s, Z_FINISH) == Z_OK)
 -                              /* nothing */;
 -                      deflateEnd(&s);
 +                      while (git_deflate(&s, Z_FINISH) == Z_OK)
 +                              ; /* nothing */
 +                      git_deflate_end(&s);
                }
        }
  
@@@ -1165,7 -1168,7 +1170,7 @@@ static void stream_blob(uintmax_t len, 
        off_t offset;
        git_SHA_CTX c;
        git_SHA_CTX pack_file_ctx;
 -      z_stream s;
 +      git_zstream s;
        int status = Z_OK;
  
        /* Determine if we should auto-checkpoint. */
        crc32_begin(pack_file);
  
        memset(&s, 0, sizeof(s));
 -      deflateInit(&s, pack_compression_level);
 +      git_deflate_init(&s, pack_compression_level);
  
        hdrlen = encode_in_pack_object_header(OBJ_BLOB, len, out_buf);
        if (out_sz <= hdrlen)
                        len -= n;
                }
  
 -              status = deflate(&s, len ? 0 : Z_FINISH);
 +              status = git_deflate(&s, len ? 0 : Z_FINISH);
  
                if (!s.avail_out || status == Z_STREAM_END) {
                        size_t n = s.next_out - out_buf;
                        die("unexpected deflate failure: %d", status);
                }
        }
 -      deflateEnd(&s);
 +      git_deflate_end(&s);
        git_SHA1_Final(sha1, &c);
  
        if (sha1out)
@@@ -1416,8 -1419,9 +1421,9 @@@ static void mktree(struct tree_content 
                struct tree_entry *e = t->entries[i];
                if (!e->versions[v].mode)
                        continue;
-               strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
-                                       e->name->str_dat, '\0');
+               strbuf_addf(b, "%o %s%c",
+                       (unsigned int)(e->versions[v].mode & ~NO_DELTA),
+                       e->name->str_dat, '\0');
                strbuf_add(b, e->versions[v].sha1, 20);
        }
  }
@@@ -1427,7 -1431,7 +1433,7 @@@ static void store_tree(struct tree_entr
        struct tree_content *t = root->tree;
        unsigned int i, j, del;
        struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
-       struct object_entry *le;
+       struct object_entry *le = NULL;
  
        if (!is_null_sha1(root->versions[1].sha1))
                return;
                        store_tree(t->entries[i]);
        }
  
-       le = find_object(root->versions[0].sha1);
+       if (!(root->versions[0].mode & NO_DELTA))
+               le = find_object(root->versions[0].sha1);
        if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
                mktree(t, 0, &old_tree);
                lo.data = old_tree;
@@@ -1471,6 -1476,7 +1478,7 @@@ static void tree_content_replace
  {
        if (!S_ISDIR(mode))
                die("Root cannot be a non-directory");
+       hashclr(root->versions[0].sha1);
        hashcpy(root->versions[1].sha1, sha1);
        if (root->tree)
                release_tree_content_recursive(root->tree);
@@@ -1515,6 -1521,23 +1523,23 @@@ static int tree_content_set
                                if (e->tree)
                                        release_tree_content_recursive(e->tree);
                                e->tree = subtree;
+                               /*
+                                * We need to leave e->versions[0].sha1 alone
+                                * to avoid modifying the preimage tree used
+                                * when writing out the parent directory.
+                                * But after replacing the subdir with a
+                                * completely different one, it's not a good
+                                * delta base any more, and besides, we've
+                                * thrown away the tree entries needed to
+                                * make a delta against it.
+                                *
+                                * So let's just explicitly disable deltas
+                                * for the subtree.
+                                */
+                               if (S_ISDIR(e->versions[0].mode))
+                                       e->versions[0].mode |= NO_DELTA;
                                hashclr(root->versions[1].sha1);
                                return 1;
                        }
@@@ -1969,41 -1992,32 +1994,41 @@@ static int validate_raw_date(const cha
  
  static char *parse_ident(const char *buf)
  {
 -      const char *gt;
 +      const char *ltgt;
        size_t name_len;
        char *ident;
  
 -      gt = strrchr(buf, '>');
 -      if (!gt)
 +      /* ensure there is a space delimiter even if there is no name */
 +      if (*buf == '<')
 +              --buf;
 +
 +      ltgt = buf + strcspn(buf, "<>");
 +      if (*ltgt != '<')
 +              die("Missing < in ident string: %s", buf);
 +      if (ltgt != buf && ltgt[-1] != ' ')
 +              die("Missing space before < in ident string: %s", buf);
 +      ltgt = ltgt + 1 + strcspn(ltgt + 1, "<>");
 +      if (*ltgt != '>')
                die("Missing > in ident string: %s", buf);
 -      gt++;
 -      if (*gt != ' ')
 +      ltgt++;
 +      if (*ltgt != ' ')
                die("Missing space after > in ident string: %s", buf);
 -      gt++;
 -      name_len = gt - buf;
 +      ltgt++;
 +      name_len = ltgt - buf;
        ident = xmalloc(name_len + 24);
        strncpy(ident, buf, name_len);
  
        switch (whenspec) {
        case WHENSPEC_RAW:
 -              if (validate_raw_date(gt, ident + name_len, 24) < 0)
 -                      die("Invalid raw date \"%s\" in ident: %s", gt, buf);
 +              if (validate_raw_date(ltgt, ident + name_len, 24) < 0)
 +                      die("Invalid raw date \"%s\" in ident: %s", ltgt, buf);
                break;
        case WHENSPEC_RFC2822:
 -              if (parse_date(gt, ident + name_len, 24) < 0)
 -                      die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
 +              if (parse_date(ltgt, ident + name_len, 24) < 0)
 +                      die("Invalid rfc2822 date \"%s\" in ident: %s", ltgt, buf);
                break;
        case WHENSPEC_NOW:
 -              if (strcmp("now", gt))
 +              if (strcmp("now", ltgt))
                        die("Date in ident must be 'now': %s", buf);
                datestamp(ident + name_len, 24);
                break;
@@@ -2938,7 -2952,7 +2963,7 @@@ static void print_ls(int mode, const un
                /* mode SP type SP object_name TAB path LF */
                strbuf_reset(&line);
                strbuf_addf(&line, "%06o %s %s\t",
-                               mode, type, sha1_to_hex(sha1));
+                               mode & ~NO_DELTA, type, sha1_to_hex(sha1));
                quote_c_style(path, &line, NULL, 0);
                strbuf_addch(&line, '\n');
        }
@@@ -3150,8 -3164,6 +3175,8 @@@ static int parse_one_feature(const cha
                relative_marks_paths = 1;
        } else if (!strcmp(feature, "no-relative-marks")) {
                relative_marks_paths = 0;
 +      } else if (!strcmp(feature, "done")) {
 +              require_explicit_termination = 1;
        } else if (!strcmp(feature, "force")) {
                force_update = 1;
        } else if (!strcmp(feature, "notes") || !strcmp(feature, "ls")) {
@@@ -3208,10 -3220,10 +3233,10 @@@ static int git_pack_config(const char *
                return 0;
        }
        if (!strcmp(k, "pack.indexversion")) {
 -              pack_idx_default_version = git_config_int(k, v);
 -              if (pack_idx_default_version > 2)
 +              pack_idx_opts.version = git_config_int(k, v);
 +              if (pack_idx_opts.version > 2)
                        die("bad pack.indexversion=%"PRIu32,
 -                          pack_idx_default_version);
 +                          pack_idx_opts.version);
                return 0;
        }
        if (!strcmp(k, "pack.packsizelimit")) {
@@@ -3265,7 -3277,6 +3290,7 @@@ int main(int argc, const char **argv
                usage(fast_import_usage);
  
        setup_git_directory();
 +      reset_pack_idx_option(&pack_idx_opts);
        git_config(git_pack_config, NULL);
        if (!pack_compression_seen && core_compression_seen)
                pack_compression_level = core_compression_level;
                        parse_reset_branch();
                else if (!strcmp("checkpoint", command_buf.buf))
                        parse_checkpoint();
 +              else if (!strcmp("done", command_buf.buf))
 +                      break;
                else if (!prefixcmp(command_buf.buf, "progress "))
                        parse_progress();
                else if (!prefixcmp(command_buf.buf, "feature "))
        if (!seen_data_command)
                parse_argv();
  
 +      if (require_explicit_termination && feof(stdin))
 +              die("stream ends early");
 +
        end_packfile();
  
        dump_branches();
diff --combined t/t9300-fast-import.sh
index 4ef7624646eff9a770f05840df8bf1684a73747e,b33bc8246de0ee2c86656315bf49a3dcc0ab4b97..dbe109963e74ed1819811c3a8df69140903eb185
@@@ -324,105 -324,6 +324,105 @@@ test_expect_success 
         test `git rev-parse master` = `git rev-parse TEMP_TAG^`'
  rm -f .git/TEMP_TAG
  
 +git gc 2>/dev/null >/dev/null
 +git prune 2>/dev/null >/dev/null
 +
 +cat >input <<INPUT_END
 +commit refs/heads/empty-committer-1
 +committer  <> $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: accept empty committer' '
 +      git fast-import <input &&
 +      out=$(git fsck) &&
 +      echo "$out" &&
 +      test -z "$out"
 +'
 +git update-ref -d refs/heads/empty-committer-1 || true
 +
 +git gc 2>/dev/null >/dev/null
 +git prune 2>/dev/null >/dev/null
 +
 +cat >input <<INPUT_END
 +commit refs/heads/empty-committer-2
 +committer <a@b.com> $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: accept and fixup committer with no name' '
 +      git fast-import <input &&
 +      out=$(git fsck) &&
 +      echo "$out" &&
 +      test -z "$out"
 +'
 +git update-ref -d refs/heads/empty-committer-2 || true
 +
 +git gc 2>/dev/null >/dev/null
 +git prune 2>/dev/null >/dev/null
 +
 +cat >input <<INPUT_END
 +commit refs/heads/invalid-committer
 +committer Name email> $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: fail on invalid committer (1)' '
 +      test_must_fail git fast-import <input
 +'
 +git update-ref -d refs/heads/invalid-committer || true
 +
 +cat >input <<INPUT_END
 +commit refs/heads/invalid-committer
 +committer Name <e<mail> $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: fail on invalid committer (2)' '
 +      test_must_fail git fast-import <input
 +'
 +git update-ref -d refs/heads/invalid-committer || true
 +
 +cat >input <<INPUT_END
 +commit refs/heads/invalid-committer
 +committer Name <email>> $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: fail on invalid committer (3)' '
 +      test_must_fail git fast-import <input
 +'
 +git update-ref -d refs/heads/invalid-committer || true
 +
 +cat >input <<INPUT_END
 +commit refs/heads/invalid-committer
 +committer Name <email $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: fail on invalid committer (4)' '
 +      test_must_fail git fast-import <input
 +'
 +git update-ref -d refs/heads/invalid-committer || true
 +
 +cat >input <<INPUT_END
 +commit refs/heads/invalid-committer
 +committer Name<email> $GIT_COMMITTER_DATE
 +data <<COMMIT
 +empty commit
 +COMMIT
 +INPUT_END
 +test_expect_success 'B: fail on invalid committer (5)' '
 +      test_must_fail git fast-import <input
 +'
 +git update-ref -d refs/heads/invalid-committer || true
 +
  ###
  ### series C
  ###
@@@ -833,6 -734,47 +833,47 @@@ test_expect_success 
         git diff-tree --abbrev --raw L^ L >output &&
         test_cmp expect output'
  
+ cat >input <<INPUT_END
+ blob
+ mark :1
+ data <<EOF
+ the data
+ EOF
+ commit refs/heads/L2
+ committer C O Mitter <committer@example.com> 1112912473 -0700
+ data <<COMMIT
+ init L2
+ COMMIT
+ M 644 :1 a/b/c
+ M 644 :1 a/b/d
+ M 644 :1 a/e/f
+ commit refs/heads/L2
+ committer C O Mitter <committer@example.com> 1112912473 -0700
+ data <<COMMIT
+ update L2
+ COMMIT
+ C a g
+ C a/e g/b
+ M 644 :1 g/b/h
+ INPUT_END
+ cat <<EOF >expect
+ g/b/f
+ g/b/h
+ EOF
+ test_expect_success \
+     'L: nested tree copy does not corrupt deltas' \
+       'git fast-import <input &&
+       git ls-tree L2 g/b/ >tmp &&
+       cat tmp | cut -f 2 >actual &&
+       test_cmp expect actual &&
+       git fsck `git rev-parse L2`'
+ git update-ref -d refs/heads/L2
  ###
  ### series M
  ###
@@@ -1981,53 -1923,6 +2022,53 @@@ test_expect_success 'R: --import-marks-
        test_cmp expect io.marks
  '
  
 +test_expect_success 'R: feature import-marks-if-exists' '
 +      rm -f io.marks &&
 +      >expect &&
 +
 +      git fast-import --export-marks=io.marks <<-\EOF &&
 +      feature import-marks-if-exists=not_io.marks
 +      EOF
 +      test_cmp expect io.marks &&
 +
 +      blob=$(echo hi | git hash-object --stdin) &&
 +
 +      echo ":1 $blob" >io.marks &&
 +      echo ":1 $blob" >expect &&
 +      echo ":2 $blob" >>expect &&
 +
 +      git fast-import --export-marks=io.marks <<-\EOF &&
 +      feature import-marks-if-exists=io.marks
 +      blob
 +      mark :2
 +      data 3
 +      hi
 +
 +      EOF
 +      test_cmp expect io.marks &&
 +
 +      echo ":3 $blob" >>expect &&
 +
 +      git fast-import --import-marks=io.marks \
 +                      --export-marks=io.marks <<-\EOF &&
 +      feature import-marks-if-exists=not_io.marks
 +      blob
 +      mark :3
 +      data 3
 +      hi
 +
 +      EOF
 +      test_cmp expect io.marks &&
 +
 +      >expect &&
 +
 +      git fast-import --import-marks-if-exists=not_io.marks \
 +                      --export-marks=io.marks <<-\EOF
 +      feature import-marks-if-exists=io.marks
 +      EOF
 +      test_cmp expect io.marks
 +'
 +
  cat >input << EOF
  feature import-marks=marks.out
  feature export-marks=marks.new
@@@ -2343,48 -2238,6 +2384,48 @@@ test_expect_success 'R: quiet option re
      test_cmp empty output
  '
  
 +test_expect_success 'R: feature done means terminating "done" is mandatory' '
 +      echo feature done | test_must_fail git fast-import &&
 +      test_must_fail git fast-import --done </dev/null
 +'
 +
 +test_expect_success 'R: terminating "done" with trailing gibberish is ok' '
 +      git fast-import <<-\EOF &&
 +      feature done
 +      done
 +      trailing gibberish
 +      EOF
 +      git fast-import <<-\EOF
 +      done
 +      more trailing gibberish
 +      EOF
 +'
 +
 +test_expect_success 'R: terminating "done" within commit' '
 +      cat >expect <<-\EOF &&
 +      OBJID
 +      :000000 100644 OBJID OBJID A    hello.c
 +      :000000 100644 OBJID OBJID A    hello2.c
 +      EOF
 +      git fast-import <<-EOF &&
 +      commit refs/heads/done-ends
 +      committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
 +      data <<EOT
 +      Commit terminated by "done" command
 +      EOT
 +      M 100644 inline hello.c
 +      data <<EOT
 +      Hello, world.
 +      EOT
 +      C hello.c hello2.c
 +      done
 +      EOF
 +      git rev-list done-ends |
 +      git diff-tree -r --stdin --root --always |
 +      sed -e "s/$_x40/OBJID/g" >actual &&
 +      test_cmp expect actual
 +'
 +
  cat >input <<EOF
  option git non-existing-option
  EOF