APPROVE COMMIT
NOTE: This patch has been committed
# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1507320562 -3600
# Fri Oct 06 21:09:22 2017 +0100
# Node ID e3925877abce049cee4c83fcfd8a78a875ee813b
# Parent 19876e056bb7630f1854719c8b037a1f2fb691e8
Drop Ben's attempt to make Big5 into one charset, unicode-internal.
src/ChangeLog addition:
2017-10-06 Aidan Kehoe <kehoea(a)parhasard.net>
* lisp.h:
* mule-charset.c:
* mule-charset.c (syms_of_mule_charset):
* mule-charset.c (complex_vars_of_mule_charset):
* mule-charset.c (init_mule_charset):
* mule-coding.c:
* mule-coding.c (byte_big5_two_byte_1_p):
* mule-coding.c (big5_decode):
* mule-coding.c (big5_encode):
* mule-coding.c (decode_big5_char):
* mule-coding.c (big5_char_to_fake_codepoint):
* mule-coding.c (Fencode_big5_char):
* mule-coding.c (init_mule_coding):
* text.c:
* unicode.c (verify_load_unicode_args):
Remove the attempt to implement Big5 as one charset under
unicode-internal; this makes unicode-internal builds incompatible
with byte-code compiled by 21.4 and even source code,
e.g. leim/quail/Punct-B5.el.
lisp/ChangeLog addition:
2017-10-06 Aidan Kehoe <kehoea(a)parhasard.net>
* mule/chinese.el:
* mule/chinese.el (for):
* mule/chinese.el (chinese-big5/list):
* mule/chinese.el (find-charset):
* mule/mule-category.el (predefined-category-list):
* mule/mule-charset.el:
Remove Ben's attempt under unicode-internal to make Big5 into one
charset. This is incompatible with byte-compiled code from 21.4,
which uses the hackish chinese-big5-1, chinese-big5-2 charsets
with ISO-2022 escapes to encode these characters.
If at some point in the future where we use UTF-8 for compiled
code this could be revisited.
diff -r 19876e056bb7 -r e3925877abce lisp/ChangeLog
--- a/lisp/ChangeLog Thu Oct 05 00:08:17 2017 +0100
+++ b/lisp/ChangeLog Fri Oct 06 21:09:22 2017 +0100
@@ -1,3 +1,18 @@
+2017-10-06 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * mule/chinese.el:
+ * mule/chinese.el (for):
+ * mule/chinese.el (chinese-big5/list):
+ * mule/chinese.el (find-charset):
+ * mule/mule-category.el (predefined-category-list):
+ * mule/mule-charset.el:
+ Remove Ben's attempt under unicode-internal to make Big5 into one
+ charset. This is incompatible with byte-compiled code from 21.4,
+ which uses the hackish chinese-big5-1, chinese-big5-2 charsets
+ with ISO-2022 escapes to encode these characters.
+ If at some point in the future where we use UTF-8 for compiled
+ code this could be revisited.
+
2017-09-30 Aidan Kehoe <kehoea(a)parhasard.net>
* mule/thai.el:
diff -r 19876e056bb7 -r e3925877abce lisp/mule/chinese.el
--- a/lisp/mule/chinese.el Thu Oct 05 00:08:17 2017 +0100
+++ b/lisp/mule/chinese.el Fri Oct 06 21:09:22 2017 +0100
@@ -37,11 +37,8 @@
;; Setup auto-fill-chars for charsets that should invoke auto-filling.
;; SPACE and NEWLINE are already set.
-(loop for l in `(chinese-gb2312
- ,@(if (find-charset 'chinese-big5-1)
- '(chinese-big5-1 chinese-big5-2)
- '(chinese-big5)))
- do (put-char-table l t auto-fill-chars))
+(loop for l in '(chinese-gb2312 chinese-big5-1 chinese-big5-2)
+ do (put-char-table l t auto-fill-chars))
;; CNS11643 Plane3 thru Plane7
;; These represent more and more obscure Chinese characters.
@@ -111,11 +108,9 @@
:list '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
chinese-cns11643-7))
-(define-charset-tag 'chinese-big5/list
- :list (if (featurep 'unicode-internal) '(chinese-big5)
- '(chinese-big5-1 chinese-big5-2)))
-(define-charset-tag 'chinese-gb/list
- :list '(chinese-gb2312 chinese-sisheng))
+(define-charset-tag 'chinese-big5/list :list '(chinese-big5-1 chinese-big5-2))
+
+(define-charset-tag 'chinese-gb/list :list '(chinese-gb2312 chinese-sisheng))
(define-charset-tag 'chinese-gb-env/list
:list '(chinese-gb/list chinese-big5/list chinese-cns/list chinese/language))
@@ -302,13 +297,8 @@
(define-coding-system-alias 'cn-big5 'big5)
-;; Need to use `compiled-when' because otherwise we will get an error when
-;; compiling this file under Unicode-internal because (charset-id
-;; 'chinese-big5-2) gets evaluated at compile time and fails.
-(compiled-when (find-charset 'chinese-big5-1)
- ;; Big5 font requires special encoding. But under Unicode-internal we
- ;; have one single charset `chinese-big5', with no special encoding
- ;; needed.
+(when (find-charset 'chinese-big5-1)
+ ;; Big5 font requires special encoding.
(define-ccl-program ccl-encode-big5-font
`(0
;; In: R0:chinese-big5-1 or chinese-big5-2
diff -r 19876e056bb7 -r e3925877abce lisp/mule/mule-category.el
--- a/lisp/mule/mule-category.el Thu Oct 05 00:08:17 2017 +0100
+++ b/lisp/mule/mule-category.el Fri Oct 06 21:09:22 2017 +0100
@@ -238,10 +238,8 @@
;;much better category definitions.
;;(chinese-cns11643-1 ?t "Chinese Taiwan (CNS or Big5) 2-byte character
set")
(chinese-cns11643-2 ?t "Chinese Taiwan (CNS or Big5) 2-byte character
set")
- ,@(if (find-charset 'chinese-big5-1)
- '((chinese-big5-1 ?t)
- (chinese-big5-2 ?t))
- '((chinese-big5 ?t)))
+ (chinese-big5-1 ?t)
+ (chinese-big5-2 ?t)
(korean-ksc5601 ?h "Hangul (Korean) 2-byte character set")
(jit-ucs-charset-0 ?J "Just-in-time-allocated Unicode character")
)
diff -r 19876e056bb7 -r e3925877abce lisp/mule/mule-charset.el
--- a/lisp/mule/mule-charset.el Thu Oct 05 00:08:17 2017 +0100
+++ b/lisp/mule/mule-charset.el Fri Oct 06 21:09:22 2017 +0100
@@ -1254,73 +1254,64 @@
tags (cns kanji traditional-chinese chinese/language)
))
-(if (featurep 'unicode-internal)
- ;; We can support Big5 directly.
- (make-charset
- 'chinese-big5
- "Big5 (Chinese traditional)"
- '(dimension 2
- ;; Big5 claims to be a 94x157 charset, but with gaps in the middle.
- ;; In particular, the rows are (theoretically) indexed from A1 - FE
- ;; and the columns from 40 - 7E and A1 - FE. In fact, there are gaps
- ;; in the rows as well (rows C7 and C8 are missing, as well as rows
- ;; FA - FE), but that appears to be due to accident -- i.e. they just
- ;; ran out of chars and/or wanted to make room for expansion. Note
- ;; also that the gap at C7 and C8 is due to the Level-1/Level-2
- ;; division of Big5 (see below). The 94 rows are those between
- ;; A1 and FE, inclusive. The 157 columns count the sum of the columns
- ;; in each disjoint set. For us, we need to use the size of the range
- ;; [40, FE], which is 191.
- chars (94 191)
- offset (161 64)
- short-name "Chinese traditional (Big5)"
- long-name "Chinese traditional (Big5)"
- registries ["big5.eten-0"]
- unicode-map ("unicode/unicode-consortium/EASTASIA/OBSOLETE/BIG5.TXT")
- tags (kanji traditional-chinese chinese/language)
- ))
- ;; Old Mule situation; we can only handle up to 96x96 charsets.
- ;; So we split it into two charsets. According to Ken Lunde's CJKV
- ;; book, Big5 itself is split into "Big Five Level 1" (rows A1-C6)
- ;; and "Big Five Level 2" (rows C9-F9), with the latter containing
- ;; less used characters. We split the same way then coerce the
- ;; result into a 94x94 block.
- (make-charset
- 'chinese-big5-1
- "Frequently used part (A141-C67F) of Big5 (Chinese traditional)"
- '(dimension 2
- chars 94
- final ?0
- graphic 0
- short-name "Chinese traditional (Big5), L1"
- long-name "Chinese traditional (Big5) (Level-1) A141-C67F"
- registries ["big5.eten-0"]
- ;; no unicode map, see chinese-big5-2
- tags (kanji traditional-chinese chinese/language)
- ))
- (make-charset
- 'chinese-big5-2
- "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
- '(dimension 2
- chars 94
- final ?1
- graphic 0
- short-name "Chinese traditional (Big5), L2"
- long-name "Chinese traditional (Big5) (Level-2) C940-FEFE"
- registries ["big5.eten-0"]
- ;; HACK HACK HACK! The `big5' special flag tells the internal code
- ;; in Fload_unicode_mapping_table() to take codepoints out of the
- ;; Big5 table, convert them to a codepoint in a "fake" chinese-big5-1
- ;; or chinese-big5-2, and store appropriately. Hence, it actually
- ;; ignores the name of the charset on which the property is set and
- ;; always stores in the "right" place. Correspondingly, we must set
- ;; the property on big5-2, not 1, so that both charsets will be
- ;; created by the time we initialize the map.
- unicode-map ("unicode/unicode-consortium/EASTASIA/OBSOLETE/BIG5.TXT"
- nil nil nil big5)
- tags (kanji traditional-chinese chinese/language)
- ))
- )
+;; Big5 claims to be a 94x157 charset, but with gaps in the middle.
+;; In particular, the rows are (theoretically) indexed from A1 - FE
+;; and the columns from 40 - 7E and A1 - FE. In fact, there are gaps
+;; in the rows as well (rows C7 and C8 are missing, as well as rows
+;; FA - FE), but that appears to be due to accident -- i.e. they just
+;; ran out of chars and/or wanted to make room for expansion. Note
+;; also that the gap at C7 and C8 is due to the Level-1/Level-2
+;; division of Big5 (see below). The 94 rows are those between
+;; A1 and FE, inclusive. The 157 columns count the sum of the columns
+;; in each disjoint set. For us, we need to use the size of the range
+;; [40, FE], which is 191.
+
+;; Old Mule situation; we can only handle up to 96x96 charsets.
+;; we split it into two charsets. According to Ken Lunde's CJKV
+;; book, Big5 itself is split into "Big Five Level 1" (rows A1-C6)
+;; and "Big Five Level 2" (rows C9-F9), with the latter containing
+;; less used characters. We split the same way then coerce the
+;; result into a 94x94 block.
+
+;; Since escape-quoted, used for byte-compiled files and auto-saves even on
+;; unicode-internal, is ISO 2022-based, it's not practical to switch, as Ben
+;; attempted to do, to a single Big5 charset for unicode-internal; doing this
+;; would mean byte-complied code from 21.4 would error on 21.5.
+
+(make-charset
+ 'chinese-big5-1
+ "Frequently used part (A141-C67F) of Big5 (Chinese traditional)"
+ '(dimension 2
+ chars 94
+ final ?0
+ graphic 0
+ short-name "Chinese traditional (Big5), L1"
+ long-name "Chinese traditional (Big5) (Level-1) A141-C67F"
+ registries ["big5.eten-0"]
+ ;; no unicode map, see chinese-big5-2
+ tags (kanji traditional-chinese chinese/language)))
+
+(make-charset
+ 'chinese-big5-2
+ "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
+ '(dimension 2
+ chars 94
+ final ?1
+ graphic 0
+ short-name "Chinese traditional (Big5), L2"
+ long-name "Chinese traditional (Big5) (Level-2) C940-FEFE"
+ registries ["big5.eten-0"]
+ ;; HACK HACK HACK! The `big5' special flag tells the internal code
+ ;; in Fload_unicode_mapping_table() to take codepoints out of the
+ ;; Big5 table, convert them to a codepoint in a "fake" chinese-big5-1
+ ;; or chinese-big5-2, and store appropriately. Hence, it actually
+ ;; ignores the name of the charset on which the property is set and
+ ;; always stores in the "right" place. Correspondingly, we must set
+ ;; the property on big5-2, not 1, so that both charsets will be
+ ;; created by the time we initialize the map.
+ unicode-map ("unicode/unicode-consortium/EASTASIA/OBSOLETE/BIG5.TXT"
+ nil nil nil big5)
+ tags (kanji traditional-chinese chinese/language)))
;;;;;;;;;;;;;;;;;;;;; Korean ;;;;;;;;;;;;;;;;;;;;
diff -r 19876e056bb7 -r e3925877abce src/ChangeLog
--- a/src/ChangeLog Thu Oct 05 00:08:17 2017 +0100
+++ b/src/ChangeLog Fri Oct 06 21:09:22 2017 +0100
@@ -1,3 +1,25 @@
+2017-10-06 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * lisp.h:
+ * mule-charset.c:
+ * mule-charset.c (syms_of_mule_charset):
+ * mule-charset.c (complex_vars_of_mule_charset):
+ * mule-charset.c (init_mule_charset):
+ * mule-coding.c:
+ * mule-coding.c (byte_big5_two_byte_1_p):
+ * mule-coding.c (big5_decode):
+ * mule-coding.c (big5_encode):
+ * mule-coding.c (decode_big5_char):
+ * mule-coding.c (big5_char_to_fake_codepoint):
+ * mule-coding.c (Fencode_big5_char):
+ * mule-coding.c (init_mule_coding):
+ * text.c:
+ * unicode.c (verify_load_unicode_args):
+ Remove the attempt to implement Big5 as one charset under
+ unicode-internal; this makes unicode-internal builds incompatible
+ with byte-code compiled by 21.4 and even source code,
+ e.g. leim/quail/Punct-B5.el.
+
2017-10-04 Aidan Kehoe <kehoea(a)parhasard.net>
* text.c (old_mule_ichar_to_unicode):
diff -r 19876e056bb7 -r e3925877abce src/lisp.h
--- a/src/lisp.h Thu Oct 05 00:08:17 2017 +0100
+++ b/src/lisp.h Fri Oct 06 21:09:22 2017 +0100
@@ -5824,12 +5824,10 @@
Qchinese_cns11643_1,
Qchinese_cns11643_2,
#ifdef UNICODE_INTERNAL
- Qchinese_big5,
Qjapanese_shift_jis,
-#else /* not UNICODE_INTERNAL */
+#endif /* UNICODE_INTERNAL */
Qchinese_big5_1,
Qchinese_big5_2,
-#endif /* UNICODE_INTERNAL */
Qchinese_sisheng,
Qcomposite;
diff -r 19876e056bb7 -r e3925877abce src/mule-charset.c
--- a/src/mule-charset.c Thu Oct 05 00:08:17 2017 +0100
+++ b/src/mule-charset.c Fri Oct 06 21:09:22 2017 +0100
@@ -60,12 +60,10 @@
Lisp_Object Vcharset_japanese_jisx0212;
Lisp_Object Vcharset_chinese_cns11643_1;
Lisp_Object Vcharset_chinese_cns11643_2;
-#ifdef UNICODE_INTERNAL
-Lisp_Object Vcharset_chinese_big5;
-Lisp_Object Vcharset_japanese_shift_jis;
-#else
Lisp_Object Vcharset_chinese_big5_1;
Lisp_Object Vcharset_chinese_big5_2;
+#ifdef UNICODE_INTERNAL
+Lisp_Object Vcharset_japanese_shift_jis;
#endif /* UNICODE_INTERNAL */
Lisp_Object Vcharset_composite;
@@ -197,12 +195,10 @@
Qjapanese_jisx0212,
Qchinese_cns11643_1,
Qchinese_cns11643_2,
-#ifdef UNICODE_INTERNAL
- Qchinese_big5,
- Qjapanese_shift_jis,
-#else /* not UNICODE_INTERNAL */
Qchinese_big5_1,
Qchinese_big5_2,
+#ifdef UNICODE_INTERNAL
+ Qjapanese_shift_jis,
#endif /* UNICODE_INTERNAL */
Qchinese_sisheng,
Qcomposite;
@@ -1838,12 +1834,10 @@
DEFSYMBOL (Qjapanese_jisx0212);
DEFSYMBOL (Qchinese_cns11643_1);
DEFSYMBOL (Qchinese_cns11643_2);
-#ifdef UNICODE_INTERNAL
- DEFSYMBOL (Qchinese_big5);
- DEFSYMBOL (Qjapanese_shift_jis);
-#else /* not UNICODE_INTERNAL */
DEFSYMBOL (Qchinese_big5_1);
DEFSYMBOL (Qchinese_big5_2);
+#ifdef UNICODE_INTERNAL
+ DEFSYMBOL (Qjapanese_shift_jis);
#endif /* UNICODE_INTERNAL */
DEFSYMBOL (Qcomposite);
@@ -2021,17 +2015,14 @@
Vcharset_chinese_cns11643_1 = Qnil;
staticpro (&Vcharset_chinese_cns11643_2);
Vcharset_chinese_cns11643_2 = Qnil;
-#ifdef UNICODE_INTERNAL
- /* We can support Shift-JIS and Big5 directly.*/
- staticpro (&Vcharset_japanese_shift_jis);
- Vcharset_japanese_shift_jis = Qnil;
- staticpro (&Vcharset_chinese_big5);
- Vcharset_chinese_big5 = Qnil;
-#else
staticpro (&Vcharset_chinese_big5_1);
Vcharset_chinese_big5_1 = Qnil;
staticpro (&Vcharset_chinese_big5_2);
Vcharset_chinese_big5_2 = Qnil;
+#ifdef UNICODE_INTERNAL
+ /* We can support Shift-JIS directly.*/
+ staticpro (&Vcharset_japanese_shift_jis);
+ Vcharset_japanese_shift_jis = Qnil;
#endif /* UNICODE_INTERNAL */
#ifdef ENABLE_COMPOSITE_CHARS
@@ -2144,12 +2135,10 @@
Vcharset_japanese_jisx0212 = Fget_charset (Qjapanese_jisx0212);
Vcharset_chinese_cns11643_1 = Fget_charset (Qchinese_cns11643_1);
Vcharset_chinese_cns11643_2 = Fget_charset (Qchinese_cns11643_2);
-#ifdef UNICODE_INTERNAL
- /* We can support Shift-JIS and Big5 directly.*/
- Vcharset_japanese_shift_jis = Fget_charset (Qjapanese_shift_jis);
- Vcharset_chinese_big5 = Fget_charset (Qchinese_big5);
-#else
Vcharset_chinese_big5_1 = Fget_charset (Qchinese_big5_1);
Vcharset_chinese_big5_2 = Fget_charset (Qchinese_big5_2);
+#ifdef UNICODE_INTERNAL
+ /* We can support Shift-JIS directly.*/
+ Vcharset_japanese_shift_jis = Fget_charset (Qjapanese_shift_jis);
#endif /* UNICODE_INTERNAL */
}
diff -r 19876e056bb7 -r e3925877abce src/mule-coding.c
--- a/src/mule-coding.c Thu Oct 05 00:08:17 2017 +0100
+++ b/src/mule-coding.c Fri Oct 06 21:09:22 2017 +0100
@@ -898,8 +898,6 @@
/* BIG5 (used for Mandarin in Taiwan). */
DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (big5);
-#ifndef UNICODE_INTERNAL
-
/* BIG5 is a coding system encoding two character sets: ASCII and
Big5. An ASCII character is encoded as is. Big5 is a two-byte
character set and is encoded in two-byte.
@@ -991,8 +989,6 @@
b2 += b2 < 0x3F ? 0x40 : 0x62; \
} while (0)
-#endif /* not UNICODE_INTERNAL */
-
inline static int
byte_big5_two_byte_1_p (int c)
{
@@ -1024,17 +1020,11 @@
/* Previous character was first byte of Big5 char. */
if (byte_big5_two_byte_2_p (c))
{
-#ifdef UNICODE_INTERNAL
- non_ascii_charset_codepoint_to_dynarr
- (Vcharset_chinese_big5, data->ch, c, dst,
- CONVERR_USE_PRIVATE);
-#else /* not UNICODE_INTERNAL */
Lisp_Object charset;
int b1, b2;
DECODE_BIG5 (data->ch, c, charset, b1, b2);
non_ascii_charset_codepoint_to_dynarr
(charset, b1, b2, dst, CONVERR_USE_PRIVATE);
-#endif /* UNICODE_INTERNAL */
}
else
{
@@ -1084,13 +1074,6 @@
}
ichar_to_charset_codepoint
(ich, Vbig5_precedence, &charset, &c1, &c2, CONVERR_FAIL);
-#ifdef UNICODE_INTERNAL
- if (EQ (charset, Vcharset_chinese_big5))
- {
- Dynarr_add (dst, c1);
- Dynarr_add (dst, c2);
- }
-#else /* not UNICODE_INTERNAL */
if (EQ (charset, Vcharset_chinese_big5_1) ||
EQ (charset, Vcharset_chinese_big5_2))
{
@@ -1099,7 +1082,6 @@
Dynarr_add (dst, b1);
Dynarr_add (dst, b2);
}
-#endif /* UNICODE_INTERNAL */
else
{
handle_standard_encoding_error (str, src, dst);
@@ -1131,10 +1113,6 @@
static Ichar
decode_big5_char (int b1, int b2)
{
-#ifdef UNICODE_INTERNAL
- return charset_codepoint_to_ichar (Vcharset_chinese_big5, b1, b2,
- CONVERR_FAIL);
-#else /* not UNICODE_INTERNAL */
if (byte_big5_two_byte_1_p (b1) &&
byte_big5_two_byte_2_p (b2))
{
@@ -1146,11 +1124,8 @@
}
else
return -1;
-#endif /* UNICODE_INTERNAL */
}
-#ifndef UNICODE_INTERNAL
-
void
big5_char_to_fake_codepoint (int b1, int b2, Lisp_Object *charset, int *c1,
int *c2)
@@ -1168,8 +1143,6 @@
}
}
-#endif /* not UNICODE_INTERNAL */
-
DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /*
Convert Big Five character codes in CODE into a character.
CODE is a cons of two integers specifying the codepoints in Big Five.
@@ -1208,12 +1181,6 @@
CHECK_CHAR_COERCE_INT (character);
ichar_to_charset_codepoint (XCHAR (character), Vbig5_precedence,
&charset, &c1, &c2, CONVERR_FAIL);
-#ifdef UNICODE_INTERNAL
- if (EQ (charset, Vcharset_chinese_big5))
- {
- return Fcons (make_fixnum (c1), make_fixnum (c2));
- }
-#else /* not UNICODE_INTERNAL */
if (EQ (charset, Vcharset_chinese_big5_1) ||
EQ (charset, Vcharset_chinese_big5_2))
{
@@ -1221,7 +1188,6 @@
ENCODE_BIG5 (charset, c1, c2, b1, b2);
return Fcons (make_fixnum (b1), make_fixnum (b2));
}
-#endif /* UNICODE_INTERNAL */
else
return Qnil;
}
@@ -4187,14 +4153,7 @@
simple_convert_predence_list_to_array
(list3 (Vcharset_japanese_jisx0208, Vcharset_japanese_jisx0208_1978,
Vcharset_katakana_jisx0201));
-
-#ifdef UNICODE_INTERNAL
- Vbig5_precedence =
- simple_convert_predence_list_to_array
- (list1 (Vcharset_chinese_big5));
-#else /* not UNICODE_INTERNAL */
Vbig5_precedence =
simple_convert_predence_list_to_array
(list2 (Vcharset_chinese_big5_1, Vcharset_chinese_big5_2));
-#endif /* UNICODE_INTERNAL */
}
diff -r 19876e056bb7 -r e3925877abce src/text.c
--- a/src/text.c Thu Oct 05 00:08:17 2017 +0100
+++ b/src/text.c Fri Oct 06 21:09:22 2017 +0100
@@ -5521,14 +5521,11 @@
All codes above are two decimal numbers except for Big Five and ANSI
Z39.64, which we don't support. We add 32 to each of the decimal numbers.
-Note that in an old-Mule world (see below), Big Five is split in a rather
-hackish fashion into two charsets, `chinese-big5-1' and `chinese-big5-2',
-due to its excessive size (94x157), with the first codepoint in the range
-0xA1 to 0xFE and the second in the range 0x40 to 0x7E or 0xA1 to 0xFE.
-`decode-big5-char' is used to generate the char from its codes, and
-`encode-big5-char' extracts the codes. This hack doesn't exist with
-Unicode-internal, and hence the expression (make-char 'chinese-big5 164 116)
-could be used.
+Big Five is split in a rather hackish fashion into two charsets,
+`chinese-big5-1' and `chinese-big5-2', due to its excessive size (94x157),
+with the first codepoint in the range 0xA1 to 0xFE and the second in the range
+0x40 to 0x7E or 0xA1 to 0xFE. `decode-big5-char' is used to generate the char
+from its codes, and `encode-big5-char' extracts the codes.
Note that there are three different internal formats for characters:
diff -r 19876e056bb7 -r e3925877abce src/unicode.c
--- a/src/unicode.c Thu Oct 05 00:08:17 2017 +0100
+++ b/src/unicode.c Fri Oct 06 21:09:22 2017 +0100
@@ -2248,10 +2248,8 @@
{
if (EQ (elt, Qignore_first_column))
*flags_out |= LOAD_UNICODE_IGNORE_FIRST_COLUMN;
-#ifndef UNICODE_INTERNAL
else if (EQ (elt, Qbig5))
*flags_out |= LOAD_UNICODE_BIG5;
-#endif /* not UNICODE_INTERNAL */
else
invalid_constant
("Unrecognized `load-unicode-mapping-table' flag", elt);
@@ -2324,9 +2322,7 @@
int flgs;
int stage;
int to_unicode_min_val[256], to_unicode_max_val[256];
-#ifndef UNICODE_INTERNAL
int big5_other_unicode_min_val[256], big5_other_unicode_max_val[256];
-#endif
int i;
/* This may be called to autoload the Unicode tables, from a function
@@ -2338,7 +2334,6 @@
verify_load_unicode_args (filename, start, end, offset, flags,
&st, &en, &of, &flgs);
-#ifndef UNICODE_INTERNAL
if (flgs & LOAD_UNICODE_BIG5)
{
/* At this point the charsets haven't been initialized
@@ -2353,7 +2348,6 @@
big5_other_unicode_max_val[i] = 0;
}
}
-#endif /* not UNICODE_INTERNAL */
for (i = 0; i < 256; i++)
{
@@ -2482,7 +2476,6 @@
cp1high = cp1 >> 8;
cp1low = cp1 & 255;
-#ifndef UNICODE_INTERNAL
if (flgs & LOAD_UNICODE_BIG5)
{
big5_char_to_fake_codepoint (cp1high, cp1low,
@@ -2509,7 +2502,6 @@
set_unicode_conversion (cp2, charset, c1, c2);
}
else
-#endif /* not UNICODE_INTERNAL */
{
int l1, l2, h1, h2;
c1 = cp1high, c2 = cp1low;
@@ -2520,9 +2512,7 @@
goto out_of_range;
}
-#ifndef UNICODE_INTERNAL
do_it:
-#endif
if (stage == 0)
{
if (c2 < to_unicode_min_val[c1])
--
‘As I sat looking up at the Guinness ad, I could never figure out /
How your man stayed up on the surfboard after forty pints of stout’
(C. Moore)