grep/pcre: prepare locale-dependent tables for icase matching
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sat, 25 Jun 2016 05:22:33 +0000 (07:22 +0200)
committerJunio C Hamano <gitster@pobox.com>
Fri, 1 Jul 2016 19:44:57 +0000 (12:44 -0700)
The default tables are usually built with C locale and only suitable
for LANG=C or similar. This should make case insensitive search work
correctly for all single-byte charsets.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
grep.c
grep.h
t/t7813-grep-icase-iso.sh [new file with mode: 0755]
diff --git a/grep.c b/grep.c
index 6325cafe73fe695d5bb04bc979d79917669255f4..af920c45425cddc2778cdf627c28adf6e7138fb7 100644 (file)
--- a/grep.c
+++ b/grep.c
@@ -324,11 +324,14 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
        int erroffset;
        int options = PCRE_MULTILINE;
 
-       if (opt->ignore_case)
+       if (opt->ignore_case) {
+               if (has_non_ascii(p->pattern))
+                       p->pcre_tables = pcre_maketables();
                options |= PCRE_CASELESS;
+       }
 
        p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
-                       NULL);
+                                     p->pcre_tables);
        if (!p->pcre_regexp)
                compile_regexp_failed(p, error);
 
@@ -362,6 +365,7 @@ static void free_pcre_regexp(struct grep_pat *p)
 {
        pcre_free(p->pcre_regexp);
        pcre_free(p->pcre_extra_info);
+       pcre_free((void *)p->pcre_tables);
 }
 #else /* !USE_LIBPCRE */
 static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
diff --git a/grep.h b/grep.h
index 95f197a8d9bfc2a264530d17fcfa90f68dfa840d..cee4357b1738ed145cc06090e891147a7b4e9420 100644 (file)
--- a/grep.h
+++ b/grep.h
@@ -48,6 +48,7 @@ struct grep_pat {
        regex_t regexp;
        pcre *pcre_regexp;
        pcre_extra *pcre_extra_info;
+       const unsigned char *pcre_tables;
        kwset_t kws;
        unsigned fixed:1;
        unsigned ignore_case:1;
diff --git a/t/t7813-grep-icase-iso.sh b/t/t7813-grep-icase-iso.sh
new file mode 100755 (executable)
index 0000000..efef7fb
--- /dev/null
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_ISO_LOCALE 'setup' '
+       printf "TILRAUN: Halló Heimur!" >file &&
+       git add file &&
+       LC_ALL="$is_IS_iso_locale" &&
+       export LC_ALL
+'
+
+test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' '
+       git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" &&
+       git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!"
+'
+
+test_done