Merge branch 'jc/same-encoding'

author Junio C Hamano <gitster@pobox.com>
Thu, 15 Nov 2012 18:24:05 +0000 (10:24 -0800)

committer Junio C Hamano <gitster@pobox.com>
Thu, 15 Nov 2012 18:24:05 +0000 (10:24 -0800)
author: Junio C Hamano <gitster@pobox.com>
Thu, 15 Nov 2012 18:24:05 +0000 (10:24 -0800)
committer: Junio C Hamano <gitster@pobox.com>
Thu, 15 Nov 2012 18:24:05 +0000 (10:24 -0800)
diff --combined builtin/mailinfo.c

index da231400b327b86a18a054075f6aee4749846932,90b158d4f5c5b22a9493877dea6b08d568d637f9..24a772d8e1b7355a58088d784fdc18cd54302b7d
--- 1/builtin/mailinfo.c
--- 2/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@@ -477,13 -477,37 +477,14 @@@ static struct strbuf *decode_b_segment(
         return out;
   }
   
- -/*
- - * When there is no known charset, guess.
- - *
- - * Right now we assume that if the target is UTF-8 (the default),
- - * and it already looks like UTF-8 (which includes US-ASCII as its
- - * subset, of course) then that is what it is and there is nothing
- - * to do.
- - *
- - * Otherwise, we default to assuming it is Latin1 for historical
- - * reasons.
- - */
- -static const char *guess_charset(const struct strbuf *line, const char *target_charset)
- -{
- -      if (is_encoding_utf8(target_charset)) {
- -              if (is_utf8(line->buf))
- -                      return NULL;
- -      }
- -      return "ISO8859-1";
- -}
- -
   static void convert_to_utf8(struct strbuf *line, const char *charset)
   {
         char *out;
   
- -      if (!charset || !*charset) {
- -              charset = guess_charset(line, metainfo_charset);
- -              if (!charset)
- -                      return;
- -      }
+ +      if (!charset || !*charset)
+ +              return;
-       if (!strcasecmp(metainfo_charset, charset))
+ 
+       if (same_encoding(metainfo_charset, charset))
                 return;
         out = reencode_string(line->buf, metainfo_charset, charset);
         if (!out)
diff --combined notes.c

index bc454e1eab2b0e1d264cee7bb1f84bdb3bb14237,e48f6604d8c4b6dc3954335f718ff10dad4c68cd..ee8f01f1d5c1dcb39c40eee443eddaedde380d20
--- 1/notes.c
--- 2/notes.c
+++ b/notes.c
@@@ -1196,18 -1196,8 +1196,18 @@@ void free_notes(struct notes_tree *t
         memset(t, 0, sizeof(struct notes_tree));
   }
   
- -void format_note(struct notes_tree *t, const unsigned char *object_sha1,
- -              struct strbuf *sb, const char *output_encoding, int flags)
+ +/*
+ + * Fill the given strbuf with the notes associated with the given object.
+ + *
+ + * If the given notes_tree structure is not initialized, it will be auto-
+ + * initialized to the default value (see documentation for init_notes() above).
+ + * If the given notes_tree is NULL, the internal/default notes_tree will be
+ + * used instead.
+ + *
+ + * 'flags' is a bitwise combination of the flags for format_display_notes.
+ + */
+ +static void format_note(struct notes_tree *t, const unsigned char *object_sha1,
+ +                      struct strbuf *sb, const char *output_encoding, int flags)
   {
         static const char utf8[] = "utf-8";
         const unsigned char *sha1;
@@@ -1231,7 -1221,7 +1231,7 @@@
         }
   
         if (output_encoding && *output_encoding &&
-                       strcmp(utf8, output_encoding)) {
+           !is_encoding_utf8(output_encoding)) {
                 char *reencoded = reencode_string(msg, output_encoding, utf8);
                 if (reencoded) {
                         free(msg);
diff --combined pretty.c

index 413e7587b6f67326a68a7679ca5496cd1249fbcd,e87fe9fec33babf783852f4a4abf536f579e5767..dba682828c2e005b71c0ccbb325fb666915984bc
--- 1/pretty.c
--- 2/pretty.c
+++ b/pretty.c
@@@ -231,7 -231,7 +231,7 @@@ static int is_rfc822_special(char ch
         }
   }
   
- -static int has_rfc822_specials(const char *s, int len)
+ +static int needs_rfc822_quoting(const char *s, int len)
   {
         int i;
         for (i = 0; i < len; i++)
@@@ -240,17 -240,6 +240,17 @@@
         return 0;
   }
   
+ +static int last_line_length(struct strbuf *sb)
+ +{
+ +      int i;
+ +
+ +      /* How many bytes are already used on the last line? */
+ +      for (i = sb->len - 1; i >= 0; i--)
+ +              if (sb->buf[i] == '\n')
+ +                      break;
+ +      return sb->len - (i + 1);
+ +}
+ +
   static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
   {
         int i;
@@@ -272,110 -261,57 +272,110 @@@
         strbuf_addch(out, '"');
   }
   
- -static int is_rfc2047_special(char ch)
+ +enum rfc2047_type {
+ +      RFC2047_SUBJECT,
+ +      RFC2047_ADDRESS,
+ +};
+ +
+ +static int is_rfc2047_special(char ch, enum rfc2047_type type)
   {
- -      return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
+ +      /*
+ +       * rfc2047, section 4.2:
+ +       *
+ +       *    8-bit values which correspond to printable ASCII characters other
+ +       *    than "=", "?", and "_" (underscore), MAY be represented as those
+ +       *    characters.  (But see section 5 for restrictions.)  In
+ +       *    particular, SPACE and TAB MUST NOT be represented as themselves
+ +       *    within encoded words.
+ +       */
+ +
+ +      /*
+ +       * rule out non-ASCII characters and non-printable characters (the
+ +       * non-ASCII check should be redundant as isprint() is not localized
+ +       * and only knows about ASCII, but be defensive about that)
+ +       */
+ +      if (non_ascii(ch) || !isprint(ch))
+ +              return 1;
+ +
+ +      /*
+ +       * rule out special printable characters (' ' should be the only
+ +       * whitespace character considered printable, but be defensive and use
+ +       * isspace())
+ +       */
+ +      if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
+ +              return 1;
+ +
+ +      /*
+ +       * rfc2047, section 5.3:
+ +       *
+ +       *    As a replacement for a 'word' entity within a 'phrase', for example,
+ +       *    one that precedes an address in a From, To, or Cc header.  The ABNF
+ +       *    definition for 'phrase' from RFC 822 thus becomes:
+ +       *
+ +       *    phrase = 1*( encoded-word / word )
+ +       *
+ +       *    In this case the set of characters that may be used in a "Q"-encoded
+ +       *    'encoded-word' is restricted to: <upper and lower case ASCII
+ +       *    letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
+ +       *    (underscore, ASCII 95.)>.  An 'encoded-word' that appears within a
+ +       *    'phrase' MUST be separated from any adjacent 'word', 'text' or
+ +       *    'special' by 'linear-white-space'.
+ +       */
+ +
+ +      if (type != RFC2047_ADDRESS)
+ +              return 0;
+ +
+ +      /* '=' and '_' are special cases and have been checked above */
+ +      return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
   }
   
- -static void add_rfc2047(struct strbuf *sb, const char *line, int len,
- -                     const char *encoding)
+ +static int needs_rfc2047_encoding(const char *line, int len,
+ +                                enum rfc2047_type type)
   {
- -      static const int max_length = 78; /* per rfc2822 */
         int i;
- -      int line_len;
- -
- -      /* How many bytes are already used on the current line? */
- -      for (i = sb->len - 1; i >= 0; i--)
- -              if (sb->buf[i] == '\n')
- -                      break;
- -      line_len = sb->len - (i+1);
   
         for (i = 0; i < len; i++) {
                 int ch = line[i];
                 if (non_ascii(ch) || ch == '\n')
- -                      goto needquote;
+ +                      return 1;
                 if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
- -                      goto needquote;
+ +                      return 1;
         }
- -      strbuf_add_wrapped_bytes(sb, line, len, 0, 1, max_length - line_len);
- -      return;
   
- -needquote:
+ +      return 0;
+ +}
+ +
+ +static void add_rfc2047(struct strbuf *sb, const char *line, int len,
+ +                     const char *encoding, enum rfc2047_type type)
+ +{
+ +      static const int max_encoded_length = 76; /* per rfc2047 */
+ +      int i;
+ +      int line_len = last_line_length(sb);
+ +
         strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
         strbuf_addf(sb, "=?%s?q?", encoding);
         line_len += strlen(encoding) + 5; /* 5 for =??q? */
         for (i = 0; i < len; i++) {
                 unsigned ch = line[i] & 0xFF;
+ +              int is_special = is_rfc2047_special(ch, type);
+ +
+ +              /*
+ +               * According to RFC 2047, we could encode the special character
+ +               * ' ' (space) with '_' (underscore) for readability. But many
+ +               * programs do not understand this and just leave the
+ +               * underscore in place. Thus, we do nothing special here, which
+ +               * causes ' ' to be encoded as '=20', avoiding this problem.
+ +               */
   
- -              if (line_len >= max_length - 2) {
+ +              if (line_len + 2 + (is_special ? 3 : 1) > max_encoded_length) {
                         strbuf_addf(sb, "?=\n =?%s?q?", encoding);
                         line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */
                 }
   
- -              /*
- -               * We encode ' ' using '=20' even though rfc2047
- -               * allows using '_' for readability.  Unfortunately,
- -               * many programs do not understand this and just
- -               * leave the underscore in place.
- -               */
- -              if (is_rfc2047_special(ch) || ch == ' ' || ch == '\n') {
+ +              if (is_special) {
                         strbuf_addf(sb, "=%02X", ch);
                         line_len += 3;
- -              }
- -              else {
+ +              } else {
                         strbuf_addch(sb, ch);
                         line_len++;
                 }
@@@ -387,7 -323,6 +387,7 @@@ void pp_user_info(const struct pretty_p
                   const char *what, struct strbuf *sb,
                   const char *line, const char *encoding)
   {
+ +      int max_length = 78; /* per rfc2822 */
         char *date;
         int namelen;
         unsigned long time;
@@@ -405,27 -340,25 +405,27 @@@
         if (pp->fmt == CMIT_FMT_EMAIL) {
                 char *name_tail = strchr(line, '<');
                 int display_name_length;
- -              int final_line;
                 if (!name_tail)
                         return;
                 while (line < name_tail && isspace(name_tail[-1]))
                         name_tail--;
                 display_name_length = name_tail - line;
                 strbuf_addstr(sb, "From: ");
- -              if (!has_rfc822_specials(line, display_name_length)) {
- -                      add_rfc2047(sb, line, display_name_length, encoding);
- -              } else {
+ +              if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) {
+ +                      add_rfc2047(sb, line, display_name_length,
+ +                                              encoding, RFC2047_ADDRESS);
+ +                      max_length = 76; /* per rfc2047 */
+ +              } else if (needs_rfc822_quoting(line, display_name_length)) {
                         struct strbuf quoted = STRBUF_INIT;
                         add_rfc822_quoted(&quoted, line, display_name_length);
- -                      add_rfc2047(sb, quoted.buf, quoted.len, encoding);
+ +                      strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len,
+ +                                                      -6, 1, max_length);
                         strbuf_release(&quoted);
+ +              } else {
+ +                      strbuf_add_wrapped_bytes(sb, line, display_name_length,
+ +                                                      -6, 1, max_length);
                 }
- -              for (final_line = 0; final_line < sb->len; final_line++)
- -                      if (sb->buf[sb->len - final_line - 1] == '\n')
- -                              break;
- -              if (namelen - display_name_length + final_line > 78) {
+ +              if (namelen - display_name_length + last_line_length(sb) > max_length) {
                         strbuf_addch(sb, '\n');
                         if (!isspace(name_tail[0]))
                                 strbuf_addch(sb, ' ');
@@@ -571,7 -504,7 +571,7 @@@ char *logmsg_reencode(const struct comm
                 return NULL;
         encoding = get_header(commit, "encoding");
         use_encoding = encoding ? encoding : utf8;
-       if (!strcmp(use_encoding, output_encoding))
+       if (same_encoding(use_encoding, output_encoding))
                 if (encoding) /* we'll strip encoding header later */
                         out = xstrdup(commit->buffer);
                 else
@@@ -1345,7 -1278,6 +1345,7 @@@ void pp_title_line(const struct pretty_
                    const char *encoding,
                    int need_8bit_cte)
   {
+ +      static const int max_length = 78; /* per rfc2047 */
         struct strbuf title;
   
         strbuf_init(&title, 80);
@@@ -1355,12 -1287,7 +1355,12 @@@
         strbuf_grow(sb, title.len + 1024);
         if (pp->subject) {
                 strbuf_addstr(sb, pp->subject);
- -              add_rfc2047(sb, title.buf, title.len, encoding);
+ +              if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT))
+ +                      add_rfc2047(sb, title.buf, title.len,
+ +                                              encoding, RFC2047_SUBJECT);
+ +              else
+ +                      strbuf_add_wrapped_bytes(sb, title.buf, title.len,
+ +                                       -last_line_length(sb), 1, max_length);
         } else {
                 strbuf_addbuf(sb, &title);
         }
diff --combined sequencer.c

index be0cb8b1076dc29bee5a77fbbde155514723c6a8,f2f5b137eafe822249bbcb173d1db5118a85dd04..22604902aa4c4dd146f562c4841c344a773a9bda
--- 1/sequencer.c
--- 2/sequencer.c
+++ b/sequencer.c
@@@ -17,9 -17,7 +17,9 @@@
   
   #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION"
   
- -void remove_sequencer_state(void)
+ +const char sign_off_header[] = "Signed-off-by: ";
+ +
+ +static void remove_sequencer_state(void)
   {
         struct strbuf seq_dir = STRBUF_INIT;
   
@@@ -60,7 -58,7 +60,7 @@@ static int get_message(struct commit *c
   
         out->reencoded_message = NULL;
         out->message = commit->buffer;
-       if (strcmp(encoding, git_commit_encoding))
+       if (same_encoding(encoding, git_commit_encoding))
                 out->reencoded_message = reencode_string(commit->buffer,
                                         git_commit_encoding, encoding);
         if (out->reencoded_message)
@@@ -191,7 -189,7 +191,7 @@@ static int fast_forward_to(const unsign
         struct ref_lock *ref_lock;
   
         read_cache();
- -      if (checkout_fast_forward(from, to))
+ +      if (checkout_fast_forward(from, to, 1))
                 exit(1); /* the callee should have complained already */
         ref_lock = lock_any_ref_for_update("HEAD", from, 0);
         return write_ref_sha1(ref_lock, to, "cherry-pick");
@@@ -235,9 -233,6 +235,9 @@@ static int do_recursive_merge(struct co
                 die(_("%s: Unable to write new index file"), action_name(opts));
         rollback_lock_file(&index_lock);
   
+ +      if (opts->signoff)
+ +              append_signoff(msgbuf, 0);
+ +
         if (!clean) {
                 int i;
                 strbuf_addstr(msgbuf, "\nConflicts:\n");
@@@ -316,9 -311,6 +316,9 @@@ static int run_git_commit(const char *d
         if (allow_empty)
                 argv_array_push(&array, "--allow-empty");
   
+ +      if (opts->allow_empty_message)
+ +              argv_array_push(&array, "--allow-empty-message");
+ +
         rc = run_command_v_opt(array.argv, RUN_GIT_CMD);
         argv_array_clear(&array);
         return rc;
@@@ -1016,63 -1008,3 +1016,63 @@@ int sequencer_pick_revisions(struct rep
         save_opts(opts);
         return pick_commits(todo_list, opts);
   }
+ +
+ +static int ends_rfc2822_footer(struct strbuf *sb, int ignore_footer)
+ +{
+ +      int ch;
+ +      int hit = 0;
+ +      int i, j, k;
+ +      int len = sb->len - ignore_footer;
+ +      int first = 1;
+ +      const char *buf = sb->buf;
+ +
+ +      for (i = len - 1; i > 0; i--) {
+ +              if (hit && buf[i] == '\n')
+ +                      break;
+ +              hit = (buf[i] == '\n');
+ +      }
+ +
+ +      while (i < len - 1 && buf[i] == '\n')
+ +              i++;
+ +
+ +      for (; i < len; i = k) {
+ +              for (k = i; k < len && buf[k] != '\n'; k++)
+ +                      ; /* do nothing */
+ +              k++;
+ +
+ +              if ((buf[k] == ' ' || buf[k] == '\t') && !first)
+ +                      continue;
+ +
+ +              first = 0;
+ +
+ +              for (j = 0; i + j < len; j++) {
+ +                      ch = buf[i + j];
+ +                      if (ch == ':')
+ +                              break;
+ +                      if (isalnum(ch) ||
+ +                          (ch == '-'))
+ +                              continue;
+ +                      return 0;
+ +              }
+ +      }
+ +      return 1;
+ +}
+ +
+ +void append_signoff(struct strbuf *msgbuf, int ignore_footer)
+ +{
+ +      struct strbuf sob = STRBUF_INIT;
+ +      int i;
+ +
+ +      strbuf_addstr(&sob, sign_off_header);
+ +      strbuf_addstr(&sob, fmt_name(getenv("GIT_COMMITTER_NAME"),
+ +                              getenv("GIT_COMMITTER_EMAIL")));
+ +      strbuf_addch(&sob, '\n');
+ +      for (i = msgbuf->len - 1 - ignore_footer; i > 0 && msgbuf->buf[i - 1] != '\n'; i--)
+ +              ; /* do nothing */
+ +      if (prefixcmp(msgbuf->buf + i, sob.buf)) {
+ +              if (!i || !ends_rfc2822_footer(msgbuf, ignore_footer))
+ +                      strbuf_splice(msgbuf, msgbuf->len - ignore_footer, 0, "\n", 1);
+ +              strbuf_splice(msgbuf, msgbuf->len - ignore_footer, 0, sob.buf, sob.len);
+ +      }
+ +      strbuf_release(&sob);
+ +}
diff --combined utf8.c

index 28791a7c3174924967182d54c8b4a7f9600c87bf,6a52834576e23b15de7a38ef407fdb41179dbe4a..5c61bbe1131e7bbdd939c8b815bd5222b872e3fb
--- 1/utf8.c
--- 2/utf8.c
+++ b/utf8.c
@@@ -353,7 -353,7 +353,7 @@@ retry
   
                 c = *text;
                 if (!c || isspace(c)) {
- -                      if (w < width || !space) {
+ +                      if (w <= width || !space) {
                                 const char *start = bol;
                                 if (!c && text == start)
                                         return w;
@@@ -423,6 -423,13 +423,13 @@@ int is_encoding_utf8(const char *name
         return 0;
   }
   
+ int same_encoding(const char *src, const char *dst)
+ {
+       if (is_encoding_utf8(src) && is_encoding_utf8(dst))
+               return 1;
+       return !strcasecmp(src, dst);
+ }
+ 
   /*
    * Given a buffer and its encoding, return it re-encoded
    * with iconv.  If the conversion fails, returns NULL.
author	Junio C Hamano <gitster@pobox.com>
author	Thu, 15 Nov 2012 18:24:05 +0000 (10:24 -0800)
committer	Junio C Hamano <gitster@pobox.com>
committer	Thu, 15 Nov 2012 18:24:05 +0000 (10:24 -0800)
		1	2
builtin/mailinfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
notes.c	patch \|	diff1 \|	diff2 \|	blob \| history
pretty.c	patch \|	diff1 \|	diff2 \|	blob \| history
sequencer.c	patch \|	diff1 \|	diff2 \|	blob \| history
utf8.c	patch \|	diff1 \|	diff2 \|	blob \| history