Merge branch 'rj/platform-pread-may-be-thread-unsafe'
authorJunio C Hamano <gitster@pobox.com>
Mon, 9 Jul 2012 16:00:45 +0000 (09:00 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 9 Jul 2012 16:00:45 +0000 (09:00 -0700)
On Cygwin, the platform pread(3) is not thread safe, just like our
own compat/ emulation, and cannot be used in the index-pack program.

* rj/platform-pread-may-be-thread-unsafe:
index-pack: Disable threading on cygwin

1  2 
Makefile
builtin/index-pack.c
diff --combined Makefile
index cba9f7788ddd8e9e9c405f331df6f7a3e7d02c54,67d761e83adad8e3332b22fbe753e0cdb81b1a83..ae62a50bc5f5b10bf66754ebfd857c964553c3f9
+++ b/Makefile
@@@ -158,6 -158,9 +158,9 @@@ all:
  # Define NO_PREAD if you have a problem with pread() system call (e.g.
  # cygwin1.dll before v1.5.22).
  #
+ # Define NO_THREAD_SAFE_PREAD if your pread() implementation is not
+ # thread-safe. (e.g. compat/pread.c or cygwin)
+ #
  # Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
  # generally faster on your platform than accessing the working directory.
  #
  # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
  # field that counts the on-disk footprint in 512-byte blocks.
  #
 -# Define ASCIIDOC7 if you want to format documentation with AsciiDoc 7
 -#
  # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72
  # (not v1.73 or v1.71).
  #
  # the diff algorithm.  It gives a nice speedup if your processor has
  # fast unaligned word loads.  Does NOT work on big-endian systems!
  # Enabled by default on x86_64.
 +#
 +# Define GIT_USER_AGENT if you want to change how git identifies itself during
 +# network interactions.  The default is "git/$(GIT_VERSION)".
 +#
 +# Define DEFAULT_HELP_FORMAT to "man", "info" or "html"
 +# (defaults to "man") if you want to have a different default when
 +# "git help" is called without a parameter specifying the format.
  
  GIT-VERSION-FILE: FORCE
        @$(SHELL_PATH) ./GIT-VERSION-GEN
@@@ -804,7 -802,6 +807,7 @@@ LIB_OBJS += usage.
  LIB_OBJS += userdiff.o
  LIB_OBJS += utf8.o
  LIB_OBJS += varint.o
 +LIB_OBJS += version.o
  LIB_OBJS += walker.o
  LIB_OBJS += wrapper.o
  LIB_OBJS += write_or_die.o
@@@ -910,8 -907,6 +913,8 @@@ BUILTIN_OBJS += builtin/write-tree.
  GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
  EXTLIBS =
  
 +GIT_USER_AGENT = git/$(GIT_VERSION)
 +
  #
  # Platform specific tweaks
  #
@@@ -1059,6 -1054,7 +1062,7 @@@ ifeq ($(uname_O),Cygwin
                NO_IPV6 = YesPlease
                OLD_ICONV = UnfortunatelyYes
        endif
+       NO_THREAD_SAFE_PREAD = YesPlease
        NEEDS_LIBICONV = YesPlease
        NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
        NO_TRUSTABLE_FILEMODE = UnfortunatelyYes
@@@ -1244,7 -1240,6 +1248,7 @@@ ifeq ($(uname_S),Windows
        BLK_SHA1 = YesPlease
        NO_POSIX_GOODIES = UnfortunatelyYes
        NATIVE_CRLF = YesPlease
 +      DEFAULT_HELP_FORMAT = html
  
        CC = compat/vcbuild/scripts/clink.pl
        AR = compat/vcbuild/scripts/lib.pl
@@@ -1668,6 -1663,10 +1672,10 @@@ endi
  ifdef NO_PREAD
        COMPAT_CFLAGS += -DNO_PREAD
        COMPAT_OBJS += compat/pread.o
+       NO_THREAD_SAFE_PREAD = YesPlease
+ endif
+ ifdef NO_THREAD_SAFE_PREAD
+       BASIC_CFLAGS += -DNO_THREAD_SAFE_PREAD
  endif
  ifdef NO_FAST_WORKING_DIRECTORY
        BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
@@@ -1843,6 -1842,10 +1851,6 @@@ ifndef 
  endif
  endif
  
 -ifdef ASCIIDOC7
 -      export ASCIIDOC7
 -endif
 -
  ifdef NO_INSTALL_HARDLINKS
        export NO_INSTALL_HARDLINKS
  endif
@@@ -1920,15 -1923,6 +1928,15 @@@ SHELL_PATH_CQ_SQ = $(subst ','\'',$(SHE
  BASIC_CFLAGS += -DSHELL_PATH='$(SHELL_PATH_CQ_SQ)'
  endif
  
 +GIT_USER_AGENT_SQ = $(subst ','\'',$(GIT_USER_AGENT))
 +GIT_USER_AGENT_CQ = "$(subst ",\",$(subst \,\\,$(GIT_USER_AGENT)))"
 +GIT_USER_AGENT_CQ_SQ = $(subst ','\'',$(GIT_USER_AGENT_CQ))
 +BASIC_CFLAGS += -DGIT_USER_AGENT='$(GIT_USER_AGENT_CQ_SQ)'
 +
 +ifdef DEFAULT_HELP_FORMAT
 +BASIC_CFLAGS += -DDEFAULT_HELP_FORMAT='"$(DEFAULT_HELP_FORMAT)"'
 +endif
 +
  ALL_CFLAGS += $(BASIC_CFLAGS)
  ALL_LDFLAGS += $(BASIC_LDFLAGS)
  
@@@ -1976,7 -1970,7 +1984,7 @@@ strip: $(PROGRAMS) git$
        $(STRIP) $(STRIP_OPTS) $(PROGRAMS) git$X
  
  git.o: common-cmds.h
 -git.sp git.s git.o: EXTRA_CPPFLAGS = -DGIT_VERSION='"$(GIT_VERSION)"' \
 +git.sp git.s git.o: EXTRA_CPPFLAGS = \
        '-DGIT_HTML_PATH="$(htmldir_SQ)"' \
        '-DGIT_MAN_PATH="$(mandir_SQ)"' \
        '-DGIT_INFO_PATH="$(infodir_SQ)"'
@@@ -1993,9 -1987,6 +2001,9 @@@ builtin/help.sp builtin/help.s builtin/
        '-DGIT_MAN_PATH="$(mandir_SQ)"' \
        '-DGIT_INFO_PATH="$(infodir_SQ)"'
  
 +version.sp version.s version.o: EXTRA_CPPFLAGS = \
 +      '-DGIT_VERSION="$(GIT_VERSION)"'
 +
  $(BUILT_INS): git$X
        $(QUIET_BUILT_IN)$(RM) $@ && \
        ln git$X $@ 2>/dev/null || \
@@@ -2013,7 -2004,6 +2021,7 @@@ sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|
      -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
      -e 's|@@DIFF@@|$(DIFF_SQ)|' \
      -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
 +    -e 's|@@GIT_USER_AGENT@@|$(GIT_USER_AGENT_SQ)|g' \
      -e 's|@@LOCALEDIR@@|$(localedir_SQ)|g' \
      -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
      -e 's/@@USE_GETTEXT_SCHEME@@/$(USE_GETTEXT_SCHEME)/g' \
@@@ -2107,7 -2097,7 +2115,7 @@@ configure: configure.a
        $(RM) $<+
  
  # These can record GIT_VERSION
 -git.o git.spec http.o \
 +version.o git.spec \
        $(patsubst %.sh,%,$(SCRIPT_SH)) \
        $(patsubst %.perl,%,$(SCRIPT_PERL)) \
        : GIT-VERSION-FILE
@@@ -2277,6 -2267,9 +2285,6 @@@ attr.sp attr.s attr.o: EXTRA_CPPFLAGS 
  gettext.sp gettext.s gettext.o: EXTRA_CPPFLAGS = \
        -DGIT_LOCALE_PATH='"$(localedir_SQ)"'
  
 -http.sp http.s http.o: EXTRA_CPPFLAGS = \
 -      -DGIT_HTTP_USER_AGENT='"git/$(GIT_VERSION)"'
 -
  ifdef NO_EXPAT
  http-walker.sp http-walker.s http-walker.o: EXTRA_CPPFLAGS = -DNO_EXPAT
  endif
diff --combined builtin/index-pack.c
index 8b5c1eb33e18cdec0f74e246e501f50461757e9a,470547835ca41888817b85dc4b7ed39273ede784..5a0372ab08e7d7fe8381356669702b5e5838f78f
@@@ -9,7 -9,6 +9,7 @@@
  #include "progress.h"
  #include "fsck.h"
  #include "exec_cmd.h"
 +#include "streaming.h"
  #include "thread-utils.h"
  
  static const char index_pack_usage[] =
@@@ -40,8 -39,8 +40,8 @@@ struct base_data 
        int ofs_first, ofs_last;
  };
  
- #if !defined(NO_PTHREADS) && defined(NO_PREAD)
- /* NO_PREAD uses compat/pread.c, which is not thread-safe. Disable threading. */
+ #if !defined(NO_PTHREADS) && defined(NO_THREAD_SAFE_PREAD)
+ /* pread() emulation is not thread-safe. Disable threading. */
  #define NO_PTHREADS
  #endif
  
@@@ -385,62 -384,30 +385,62 @@@ static void unlink_base_data(struct bas
        free_base_data(c);
  }
  
 -static void *unpack_entry_data(unsigned long offset, unsigned long size)
 +static int is_delta_type(enum object_type type)
 +{
 +      return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
 +}
 +
 +static void *unpack_entry_data(unsigned long offset, unsigned long size,
 +                             enum object_type type, unsigned char *sha1)
  {
 +      static char fixed_buf[8192];
        int status;
        git_zstream stream;
 -      void *buf = xmalloc(size);
 +      void *buf;
 +      git_SHA_CTX c;
 +      char hdr[32];
 +      int hdrlen;
 +
 +      if (!is_delta_type(type)) {
 +              hdrlen = sprintf(hdr, "%s %lu", typename(type), size) + 1;
 +              git_SHA1_Init(&c);
 +              git_SHA1_Update(&c, hdr, hdrlen);
 +      } else
 +              sha1 = NULL;
 +      if (type == OBJ_BLOB && size > big_file_threshold)
 +              buf = fixed_buf;
 +      else
 +              buf = xmalloc(size);
  
        memset(&stream, 0, sizeof(stream));
        git_inflate_init(&stream);
        stream.next_out = buf;
 -      stream.avail_out = size;
 +      stream.avail_out = buf == fixed_buf ? sizeof(fixed_buf) : size;
  
        do {
 +              unsigned char *last_out = stream.next_out;
                stream.next_in = fill(1);
                stream.avail_in = input_len;
                status = git_inflate(&stream, 0);
                use(input_len - stream.avail_in);
 +              if (sha1)
 +                      git_SHA1_Update(&c, last_out, stream.next_out - last_out);
 +              if (buf == fixed_buf) {
 +                      stream.next_out = buf;
 +                      stream.avail_out = sizeof(fixed_buf);
 +              }
        } while (status == Z_OK);
        if (stream.total_out != size || status != Z_STREAM_END)
                bad_object(offset, _("inflate returned %d"), status);
        git_inflate_end(&stream);
 -      return buf;
 +      if (sha1)
 +              git_SHA1_Final(sha1, &c);
 +      return buf == fixed_buf ? NULL : buf;
  }
  
 -static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 +static void *unpack_raw_entry(struct object_entry *obj,
 +                            union delta_base *delta_base,
 +                            unsigned char *sha1)
  {
        unsigned char *p;
        unsigned long size, c;
        }
        obj->hdr_size = consumed_bytes - obj->idx.offset;
  
 -      data = unpack_entry_data(obj->idx.offset, obj->size);
 +      data = unpack_entry_data(obj->idx.offset, obj->size, obj->type, sha1);
        obj->idx.crc32 = input_crc32;
        return data;
  }
  
 -static void *get_data_from_pack(struct object_entry *obj)
 +static void *unpack_data(struct object_entry *obj,
 +                       int (*consume)(const unsigned char *, unsigned long, void *),
 +                       void *cb_data)
  {
        off_t from = obj[0].idx.offset + obj[0].hdr_size;
        unsigned long len = obj[1].idx.offset - from;
        git_zstream stream;
        int status;
  
 -      data = xmalloc(obj->size);
 +      data = xmalloc(consume ? 64*1024 : obj->size);
        inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
  
        memset(&stream, 0, sizeof(stream));
        git_inflate_init(&stream);
        stream.next_out = data;
 -      stream.avail_out = obj->size;
 +      stream.avail_out = consume ? 64*1024 : obj->size;
  
        do {
 +              unsigned char *last_out = stream.next_out;
                ssize_t n = (len < 64*1024) ? len : 64*1024;
                n = pread(pack_fd, inbuf, n, from);
                if (n < 0)
                stream.next_in = inbuf;
                stream.avail_in = n;
                status = git_inflate(&stream, 0);
 +              if (consume) {
 +                      if (consume(last_out, stream.next_out - last_out, cb_data)) {
 +                              free(inbuf);
 +                              free(data);
 +                              return NULL;
 +                      }
 +                      stream.next_out = data;
 +                      stream.avail_out = 64*1024;
 +              }
        } while (len && status == Z_OK && !stream.avail_in);
  
        /* This has been inflated OK when first encountered, so... */
  
        git_inflate_end(&stream);
        free(inbuf);
 +      if (consume) {
 +              free(data);
 +              data = NULL;
 +      }
        return data;
  }
  
 +static void *get_data_from_pack(struct object_entry *obj)
 +{
 +      return unpack_data(obj, NULL, NULL);
 +}
 +
  static int compare_delta_bases(const union delta_base *base1,
                               const union delta_base *base2,
                               enum object_type type1,
@@@ -622,102 -568,25 +622,102 @@@ static void find_delta_children(const u
        *last_index = last;
  }
  
 -static void sha1_object(const void *data, unsigned long size,
 -                      enum object_type type, unsigned char *sha1)
 +struct compare_data {
 +      struct object_entry *entry;
 +      struct git_istream *st;
 +      unsigned char *buf;
 +      unsigned long buf_size;
 +};
 +
 +static int compare_objects(const unsigned char *buf, unsigned long size,
 +                         void *cb_data)
  {
 -      hash_sha1_file(data, size, typename(type), sha1);
 +      struct compare_data *data = cb_data;
 +
 +      if (data->buf_size < size) {
 +              free(data->buf);
 +              data->buf = xmalloc(size);
 +              data->buf_size = size;
 +      }
 +
 +      while (size) {
 +              ssize_t len = read_istream(data->st, data->buf, size);
 +              if (len == 0)
 +                      die(_("SHA1 COLLISION FOUND WITH %s !"),
 +                          sha1_to_hex(data->entry->idx.sha1));
 +              if (len < 0)
 +                      die(_("unable to read %s"),
 +                          sha1_to_hex(data->entry->idx.sha1));
 +              if (memcmp(buf, data->buf, len))
 +                      die(_("SHA1 COLLISION FOUND WITH %s !"),
 +                          sha1_to_hex(data->entry->idx.sha1));
 +              size -= len;
 +              buf += len;
 +      }
 +      return 0;
 +}
 +
 +static int check_collison(struct object_entry *entry)
 +{
 +      struct compare_data data;
 +      enum object_type type;
 +      unsigned long size;
 +
 +      if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
 +              return -1;
 +
 +      memset(&data, 0, sizeof(data));
 +      data.entry = entry;
 +      data.st = open_istream(entry->idx.sha1, &type, &size, NULL);
 +      if (!data.st)
 +              return -1;
 +      if (size != entry->size || type != entry->type)
 +              die(_("SHA1 COLLISION FOUND WITH %s !"),
 +                  sha1_to_hex(entry->idx.sha1));
 +      unpack_data(entry, compare_objects, &data);
 +      close_istream(data.st);
 +      free(data.buf);
 +      return 0;
 +}
 +
 +static void sha1_object(const void *data, struct object_entry *obj_entry,
 +                      unsigned long size, enum object_type type,
 +                      const unsigned char *sha1)
 +{
 +      void *new_data = NULL;
 +      int collision_test_needed;
 +
 +      assert(data || obj_entry);
 +
        read_lock();
 -      if (has_sha1_file(sha1)) {
 +      collision_test_needed = has_sha1_file(sha1);
 +      read_unlock();
 +
 +      if (collision_test_needed && !data) {
 +              read_lock();
 +              if (!check_collison(obj_entry))
 +                      collision_test_needed = 0;
 +              read_unlock();
 +      }
 +      if (collision_test_needed) {
                void *has_data;
                enum object_type has_type;
                unsigned long has_size;
 +              read_lock();
 +              has_type = sha1_object_info(sha1, &has_size);
 +              if (has_type != type || has_size != size)
 +                      die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
                has_data = read_sha1_file(sha1, &has_type, &has_size);
                read_unlock();
 +              if (!data)
 +                      data = new_data = get_data_from_pack(obj_entry);
                if (!has_data)
                        die(_("cannot read existing object %s"), sha1_to_hex(sha1));
                if (size != has_size || type != has_type ||
                    memcmp(data, has_data, size) != 0)
                        die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
                free(has_data);
 -      } else
 -              read_unlock();
 +      }
  
        if (strict) {
                read_lock();
                        int eaten;
                        void *buf = (void *) data;
  
 +                      if (!buf)
 +                              buf = new_data = get_data_from_pack(obj_entry);
 +
                        /*
                         * we do not need to free the memory here, as the
                         * buf is deleted by the caller.
                }
                read_unlock();
        }
 -}
  
 -static int is_delta_type(enum object_type type)
 -{
 -      return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
 +      free(new_data);
  }
  
  /*
@@@ -842,9 -711,7 +842,9 @@@ static void resolve_delta(struct object
        free(delta_data);
        if (!result->data)
                bad_object(delta_obj->idx.offset, _("failed to apply delta"));
 -      sha1_object(result->data, result->size, delta_obj->real_type,
 +      hash_sha1_file(result->data, result->size,
 +                     typename(delta_obj->real_type), delta_obj->idx.sha1);
 +      sha1_object(result->data, NULL, result->size, delta_obj->real_type,
                    delta_obj->idx.sha1);
        counter_lock();
        nr_resolved_deltas++;
@@@ -974,7 -841,7 +974,7 @@@ static void *threaded_second_pass(void 
   */
  static void parse_pack_objects(unsigned char *sha1)
  {
 -      int i;
 +      int i, nr_delays = 0;
        struct delta_entry *delta = deltas;
        struct stat st;
  
                                nr_objects);
        for (i = 0; i < nr_objects; i++) {
                struct object_entry *obj = &objects[i];
 -              void *data = unpack_raw_entry(obj, &delta->base);
 +              void *data = unpack_raw_entry(obj, &delta->base, obj->idx.sha1);
                obj->real_type = obj->type;
                if (is_delta_type(obj->type)) {
                        nr_deltas++;
                        delta->obj_no = i;
                        delta++;
 +              } else if (!data) {
 +                      /* large blobs, check later */
 +                      obj->real_type = OBJ_BAD;
 +                      nr_delays++;
                } else
 -                      sha1_object(data, obj->size, obj->type, obj->idx.sha1);
 +                      sha1_object(data, NULL, obj->size, obj->type, obj->idx.sha1);
                free(data);
                display_progress(progress, i+1);
        }
        if (S_ISREG(st.st_mode) &&
                        lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
                die(_("pack has junk at the end"));
 +
 +      for (i = 0; i < nr_objects; i++) {
 +              struct object_entry *obj = &objects[i];
 +              if (obj->real_type != OBJ_BAD)
 +                      continue;
 +              obj->real_type = obj->type;
 +              sha1_object(NULL, obj, obj->size, obj->type, obj->idx.sha1);
 +              nr_delays--;
 +      }
 +      if (nr_delays)
 +              die(_("confusion beyond insanity in parse_pack_objects()"));
  }
  
  /*