/config.mak.autogen
/config.mak.append
/configure
-/unicode
/tags
/TAGS
/cscope*
--- /dev/null
+uniset/
+UnicodeData.txt
+EastAsianWidth.txt
--- /dev/null
+TL;DR: Run update_unicode.sh after the publication of a new Unicode
+standard and commit the resulting unicode_widths.h file.
+
+The long version
+================
+
+The Git source code ships the file unicode_widths.h which contains
+tables of zero and double width Unicode code points, respectively.
+These tables are generated using update_unicode.sh in this directory.
+update_unicode.sh itself uses a third-party tool, uniset, to query two
+Unicode data files for the interesting code points.
+
+On first run, update_unicode.sh clones uniset from Github and builds it.
+This requires a current-ish version of autoconf (2.69 works per December
+2016).
+
+On each run, update_unicode.sh checks whether more recent Unicode data
+files are available from the Unicode consortium, and rebuilds the header
+unicode_widths.h with the new data. The new header can then be
+committed.
--- /dev/null
+#!/bin/sh
+#See http://www.unicode.org/reports/tr44/
+#
+#Me Enclosing_Mark an enclosing combining mark
+#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
+#Cf Format a format control character
+#
+cd "$(dirname "$0")"
+UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
+(
+ if ! test -f UnicodeData.txt; then
+ wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+ fi &&
+ if ! test -f EastAsianWidth.txt; then
+ wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
+ fi &&
+ if ! test -d uniset; then
+ git clone https://github.com/depp/uniset.git
+ fi &&
+ (
+ cd uniset &&
+ if ! test -x uniset; then
+ autoreconf -i &&
+ ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
+ fi &&
+ make
+ ) &&
+ UNICODE_DIR=. && export UNICODE_DIR &&
+ cat >$UNICODEWIDTH_H <<-EOF
+ static const struct interval zero_width[] = {
+ $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
+ grep -v plane)
+ };
+ static const struct interval double_width[] = {
+ $(uniset/uniset --32 eaw:F,W)
+ };
+ EOF
+)
+++ /dev/null
-#!/bin/sh
-#See http://www.unicode.org/reports/tr44/
-#
-#Me Enclosing_Mark an enclosing combining mark
-#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
-#Cf Format a format control character
-#
-UNICODEWIDTH_H=../unicode_width.h
-if ! test -d unicode; then
- mkdir unicode
-fi &&
-( cd unicode &&
- if ! test -f UnicodeData.txt; then
- wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
- fi &&
- if ! test -f EastAsianWidth.txt; then
- wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
- fi &&
- if ! test -d uniset; then
- git clone https://github.com/depp/uniset.git
- fi &&
- (
- cd uniset &&
- if ! test -x uniset; then
- autoreconf -i &&
- ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
- fi &&
- make
- ) &&
- UNICODE_DIR=. && export UNICODE_DIR &&
- cat >$UNICODEWIDTH_H <<-EOF
- static const struct interval zero_width[] = {
- $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
- grep -v plane)
- };
- static const struct interval double_width[] = {
- $(uniset/uniset --32 eaw:F,W)
- };
- EOF
-)