Merge branch 'jh/resize-convert-scratch-buffer'
authorJunio C Hamano <gitster@pobox.com>
Tue, 9 Apr 2019 17:14:22 +0000 (02:14 +0900)
committerJunio C Hamano <gitster@pobox.com>
Tue, 9 Apr 2019 17:14:22 +0000 (02:14 +0900)
When the "clean" filter can reduce the size of a huge file in the
working tree down to a small "token" (a la Git LFS), there is no
point in allocating a huge scratch area upfront, but the buffer is
sized based on the original file size. The convert mechanism now
allocates very minimum and reallocates as it receives the output
from the clean filter process.

* jh/resize-convert-scratch-buffer:
convert: avoid malloc of original file size

1  2 
convert.c
diff --combined convert.c
index 5d0307fc1004f215c48a6f653e21da41bee71baa,bdaec3411dfb93b3da6b78a0fac625d1fd3d4733..94ff8376492257782a1af5b3d2851ee8724c2edd
+++ b/convert.c
@@@ -1,3 -1,4 +1,3 @@@
 -#define NO_THE_INDEX_COMPATIBILITY_MACROS
  #include "cache.h"
  #include "config.h"
  #include "object-store.h"
@@@ -91,7 -92,7 +91,7 @@@ static void gather_stats(const char *bu
   * The same heuristics as diff.c::mmfile_is_binary()
   * We treat files with bare CR as binary
   */
 -static int convert_is_binary(unsigned long size, const struct text_stat *stats)
 +static int convert_is_binary(const struct text_stat *stats)
  {
        if (stats->lonecr)
                return 1;
@@@ -109,7 -110,7 +109,7 @@@ static unsigned int gather_convert_stat
        if (!data || !size)
                return 0;
        gather_stats(data, size, &stats);
 -      if (convert_is_binary(size, &stats))
 +      if (convert_is_binary(&stats))
                ret |= CONVERT_STAT_BITS_BIN;
        if (stats.crlf)
                ret |= CONVERT_STAT_BITS_TXT_CRLF;
@@@ -244,7 -245,7 +244,7 @@@ static int has_crlf_in_index(const stru
        return has_crlf;
  }
  
 -static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
 +static int will_convert_lf_to_crlf(struct text_stat *stats,
                                   enum crlf_action crlf_action)
  {
        if (output_eol(crlf_action) != EOL_CRLF)
                if (stats->lonecr || stats->crlf)
                        return 0;
  
 -              if (convert_is_binary(len, stats))
 +              if (convert_is_binary(stats))
                        return 0;
        }
        return 1;
@@@ -526,7 -527,7 +526,7 @@@ static int crlf_to_git(const struct ind
        convert_crlf_into_lf = !!stats.crlf;
  
        if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
 -              if (convert_is_binary(len, &stats))
 +              if (convert_is_binary(&stats))
                        return 0;
                /*
                 * If the file in the index has any CR in it, do not
                        new_stats.crlf = 0;
                }
                /* simulate "git checkout" */
 -              if (will_convert_lf_to_crlf(len, &new_stats, crlf_action)) {
 +              if (will_convert_lf_to_crlf(&new_stats, crlf_action)) {
                        new_stats.crlf += new_stats.lonelf;
                        new_stats.lonelf = 0;
                }
        return 1;
  }
  
 -static int crlf_to_worktree(const char *path, const char *src, size_t len,
 +static int crlf_to_worktree(const char *src, size_t len,
                            struct strbuf *buf, enum crlf_action crlf_action)
  {
        char *to_free = NULL;
                return 0;
  
        gather_stats(src, len, &stats);
 -      if (!will_convert_lf_to_crlf(len, &stats, crlf_action))
 +      if (!will_convert_lf_to_crlf(&stats, crlf_action))
                return 0;
  
        /* are we "faking" in place editing ? */
@@@ -704,7 -705,7 +704,7 @@@ static int filter_buffer_or_fd(int in, 
  }
  
  static int apply_single_file_filter(const char *path, const char *src, size_t len, int fd,
 -                        struct strbuf *dst, const char *cmd)
 +                                  struct strbuf *dst, const char *cmd)
  {
        /*
         * Create a pipeline to have the command filter the buffer's
        if (start_async(&async))
                return 0;       /* error was already reported */
  
-       if (strbuf_read(&nbuf, async.out, len) < 0) {
+       if (strbuf_read(&nbuf, async.out, 0) < 0) {
                err = error(_("read from external filter '%s' failed"), cmd);
        }
        if (close(async.out)) {
@@@ -777,8 -778,7 +777,8 @@@ static int start_multi_file_filter_fn(s
  
  static void handle_filter_error(const struct strbuf *filter_status,
                                struct cmd2process *entry,
 -                              const unsigned int wanted_capability) {
 +                              const unsigned int wanted_capability)
 +{
        if (!strcmp(filter_status->buf, "error"))
                ; /* The filter signaled a problem with the file. */
        else if (!strcmp(filter_status->buf, "abort") && wanted_capability) {
@@@ -1090,8 -1090,8 +1090,8 @@@ static int count_ident(const char *cp, 
        return cnt;
  }
  
 -static int ident_to_git(const char *path, const char *src, size_t len,
 -                        struct strbuf *buf, int ident)
 +static int ident_to_git(const char *src, size_t len,
 +                      struct strbuf *buf, int ident)
  {
        char *dst, *dollar;
  
        return 1;
  }
  
 -static int ident_to_worktree(const char *path, const char *src, size_t len,
 -                             struct strbuf *buf, int ident)
 +static int ident_to_worktree(const char *src, size_t len,
 +                           struct strbuf *buf, int ident)
  {
        struct object_id oid;
        char *to_free = NULL, *dollar, *spc;
@@@ -1415,7 -1415,7 +1415,7 @@@ int convert_to_git(const struct index_s
                        len = dst->len;
                }
        }
 -      return ret | ident_to_git(path, src, len, dst, ca.ident);
 +      return ret | ident_to_git(src, len, dst, ca.ident);
  }
  
  void convert_to_git_filter_fd(const struct index_state *istate,
  
        encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
        crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
 -      ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
 +      ident_to_git(dst->buf, dst->len, dst, ca.ident);
  }
  
  static int convert_to_working_tree_internal(const struct index_state *istate,
  
        convert_attrs(istate, &ca, path);
  
 -      ret |= ident_to_worktree(path, src, len, dst, ca.ident);
 +      ret |= ident_to_worktree(src, len, dst, ca.ident);
        if (ret) {
                src = dst->buf;
                len = dst->len;
         * support smudge).  The filters might expect CRLFs.
         */
        if ((ca.drv && (ca.drv->smudge || ca.drv->process)) || !normalizing) {
 -              ret |= crlf_to_worktree(path, src, len, dst, ca.crlf_action);
 +              ret |= crlf_to_worktree(src, len, dst, ca.crlf_action);
                if (ret) {
                        src = dst->buf;
                        len = dst->len;