unicode-internal-commit: first pass, implement autoloading charset Unicode tables
Ben Wing
ben at xemacs.org
Tue Mar 23 08:21:28 EDT 2010
changeset: 5307:50fe73edfac4
branch: ben-unicode-internal
user: Ben Wing <ben at xemacs.org>
date: Fri Mar 19 15:30:09 2010 -0500
files: lisp/ChangeLog lisp/mule/chinese.el lisp/mule/cyrillic.el lisp/mule/ethiopic.el lisp/mule/japanese.el lisp/mule/korean.el lisp/mule/latin.el lisp/mule/misc-lang.el lisp/mule/mule-charset.el lisp/mule/thai.el lisp/mule/vietnamese.el lisp/mule/windows.el src/ChangeLog src/charset.h src/lisp.h src/mule-charset.c src/unicode.c
description:
first pass, implement autoloading charset Unicode tables
-------------------- ChangeLog entries follow: --------------------
lisp/ChangeLog addition:
2010-03-19 Ben Wing <ben at xemacs.org>
* mule/chinese.el (make-chinese-cns11643-charset):
* mule/chinese.el (chinese-sisheng):
* mule/cyrillic.el (cyrillic-koi8-r):
* mule/cyrillic.el (cyrillic-koi8-u):
* mule/cyrillic.el (cyrillic-alternativnyj):
* mule/cyrillic.el (cyrillic-koi8-ru):
* mule/cyrillic.el (cyrillic-koi8-t):
* mule/cyrillic.el (cyrillic-koi8-c):
* mule/cyrillic.el (cyrillic-koi8-o):
* mule/ethiopic.el:
* mule/japanese.el (japanese-jisx0213-1):
* mule/japanese.el (japanese-jisx0213-2):
* mule/korean.el (korean-johab):
* mule/latin.el (latin-iso8859-10):
* mule/latin.el (latin-iso8859-13):
* mule/latin.el (latin-iso8859-14):
* mule/latin.el (latin-iso8859-16):
* mule/latin.el (latin-macintosh-roman):
* mule/misc-lang.el:
* mule/mule-charset.el (make-128-byte-charset):
* mule/mule-charset.el (make-internal-charset): Removed.
* mule/mule-charset.el (make-iso8859-charset):
* mule/mule-charset.el (katakana-jisx0201):
* mule/mule-charset.el (latin-jisx0201):
* mule/mule-charset.el (japanese-jisx0208-1978):
* mule/mule-charset.el (japanese-jisx0208):
* mule/mule-charset.el (japanese-jisx0212):
* mule/mule-charset.el (featurep):
* mule/mule-charset.el (chinese-gb2312):
* mule/mule-charset.el (chinese-cns11643-1):
* mule/mule-charset.el (chinese-cns11643-2):
* mule/mule-charset.el (korean-ksc5601):
* mule/mule-charset.el (thai-tis620):
* mule/mule-charset.el (vietnamese-viscii-lower):
* mule/mule-charset.el (vietnamese-viscii-upper):
* mule/mule-charset.el (indian-is13194):
* mule/mule-charset.el (indian-1-column):
* mule/mule-charset.el (indian-2-column):
* mule/mule-charset.el (lao):
* mule/mule-charset.el (ethiopic):
* mule/mule-charset.el (tibetan-1-column):
* mule/mule-charset.el (tibetan):
* mule/thai.el (thai-iso8859-11):
* mule/vietnamese.el:
* mule/vietnamese.el (vietnamese-viscii):
* mule/windows.el (make-windows-charset):
Remove `make-internal-charset', use plain `make-charset' instead.
Rename `make-internal-128-byte-charset' to `make-128-byte-charset'.
`make-internal-charset' no longer needed because
`load-unicode-mapping-tables' now automatically looks relative
to `data-directory'.
src/ChangeLog addition:
2010-03-19 Ben Wing <ben at xemacs.org>
* charset.h:
* charset.h (struct Lisp_Charset):
* charset.h (CHARSET_DO_AUTOLOAD):
* charset.h (XCHARSET_DO_AUTOLOAD):
* lisp.h:
Add a `do_autoload' flag indicating whether the Unicode tables need
to be autoloaded still.
In charset_codepoint_to_unicode_raw_1(), autoload Unicode tables
if needed.
* lisp.h:
* lisp.h (XCADDDDDDR):
Add X7TH() - X10TH(), as well as X1STCDR() - X10THCDR(), and
corresponding XCAD*R and XDD*R macros.
* mule-charset.c (Fmake_charset):
Document the `unicode-map' property.
* unicode.c (get_unicode_conversion_1):
Autoload Unicode tables if needed.
* unicode.c (set_unicode_conversion):
Don't try to retrieve previous conversion if we haven't yet autoloaded
the Unicode map, or we will trigger an infinite loop.
* unicode.c (enum load_unicode_flags):
* unicode.c (verify_load_unicode_args):
New function, to verify arguments to `load-unicode-mapping-table'.
Add bit flags to correspond to the `flags' argument.
* lisp.h:
* unicode.c (Fload_unicode_mapping_table):
Use verify_load_unicode_args(). If filename is relative, expand it
relative to `data-directory' (or `../etc' relative to `lisp-directory'
during loadup). Document this behavior in the docstring.
* unicode.c (autoload_charset_unicode_tables):
Load charset Unicode tables by calling Fload_unicode_mapping_table().
* unicode.c (init_charset_unicode_map):
Instead of directly loading the Unicode tables from a file,
verify the arguments to `load-unicode-mapping-table', store them,
and set the `do_autoload' flag, so that the tables will be autoloaded
when needed.
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/ChangeLog
--- a/lisp/ChangeLog Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/ChangeLog Fri Mar 19 15:30:09 2010 -0500
@@ -1,3 +1,57 @@
+2010-03-19 Ben Wing <ben at xemacs.org>
+
+ * mule/chinese.el (make-chinese-cns11643-charset):
+ * mule/chinese.el (chinese-sisheng):
+ * mule/cyrillic.el (cyrillic-koi8-r):
+ * mule/cyrillic.el (cyrillic-koi8-u):
+ * mule/cyrillic.el (cyrillic-alternativnyj):
+ * mule/cyrillic.el (cyrillic-koi8-ru):
+ * mule/cyrillic.el (cyrillic-koi8-t):
+ * mule/cyrillic.el (cyrillic-koi8-c):
+ * mule/cyrillic.el (cyrillic-koi8-o):
+ * mule/ethiopic.el:
+ * mule/japanese.el (japanese-jisx0213-1):
+ * mule/japanese.el (japanese-jisx0213-2):
+ * mule/korean.el (korean-johab):
+ * mule/latin.el (latin-iso8859-10):
+ * mule/latin.el (latin-iso8859-13):
+ * mule/latin.el (latin-iso8859-14):
+ * mule/latin.el (latin-iso8859-16):
+ * mule/latin.el (latin-macintosh-roman):
+ * mule/misc-lang.el:
+ * mule/mule-charset.el (make-128-byte-charset):
+ * mule/mule-charset.el (make-internal-charset): Removed.
+ * mule/mule-charset.el (make-iso8859-charset):
+ * mule/mule-charset.el (katakana-jisx0201):
+ * mule/mule-charset.el (latin-jisx0201):
+ * mule/mule-charset.el (japanese-jisx0208-1978):
+ * mule/mule-charset.el (japanese-jisx0208):
+ * mule/mule-charset.el (japanese-jisx0212):
+ * mule/mule-charset.el (featurep):
+ * mule/mule-charset.el (chinese-gb2312):
+ * mule/mule-charset.el (chinese-cns11643-1):
+ * mule/mule-charset.el (chinese-cns11643-2):
+ * mule/mule-charset.el (korean-ksc5601):
+ * mule/mule-charset.el (thai-tis620):
+ * mule/mule-charset.el (vietnamese-viscii-lower):
+ * mule/mule-charset.el (vietnamese-viscii-upper):
+ * mule/mule-charset.el (indian-is13194):
+ * mule/mule-charset.el (indian-1-column):
+ * mule/mule-charset.el (indian-2-column):
+ * mule/mule-charset.el (lao):
+ * mule/mule-charset.el (ethiopic):
+ * mule/mule-charset.el (tibetan-1-column):
+ * mule/mule-charset.el (tibetan):
+ * mule/thai.el (thai-iso8859-11):
+ * mule/vietnamese.el:
+ * mule/vietnamese.el (vietnamese-viscii):
+ * mule/windows.el (make-windows-charset):
+ Remove `make-internal-charset', use plain `make-charset' instead.
+ Rename `make-internal-128-byte-charset' to `make-128-byte-charset'.
+ `make-internal-charset' no longer needed because
+ `load-unicode-mapping-tables' now automatically looks relative
+ to `data-directory'.
+
2010-03-14 Ben Wing <ben at xemacs.org>
* mule/chinese.el (cn-gb-2312):
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/chinese.el
--- a/lisp/mule/chinese.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/chinese.el Fri Mar 19 15:30:09 2010 -0500
@@ -54,7 +54,7 @@
(flet
((make-chinese-cns11643-charset
(name plane final)
- (make-internal-charset
+ (make-charset
name (concat "CNS 11643 Plane " plane " (Chinese traditional)")
`(registries
,(vector (concat "cns11643.1992-" plane ))
@@ -81,7 +81,7 @@
;; ISO-IR-165 (CCITT Extended GB)
;; It is based on CCITT Recommendation T.101, includes GB 2312-80 +
;; GB 8565-88 table A4 + 293 characters.
-(make-internal-charset ;; not in FSF 21.1
+(make-charset ;; not in FSF 21.1
'chinese-isoir165
"ISO-IR-165 (CCITT Extended GB; Chinese simplified)"
`(registries ["isoir165-0"]
@@ -94,7 +94,7 @@
long-name "ISO-IR-165 (CCITT Extended GB; Chinese simplified)"))
;; PinYin-ZhuYin
-(make-internal-charset
+(make-charset
'chinese-sisheng
"SiSheng characters for PinYin/ZhuYin"
'(dimension 1
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/cyrillic.el
--- a/lisp/mule/cyrillic.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/cyrillic.el Fri Mar 19 15:30:09 2010 -0500
@@ -123,7 +123,7 @@
(documentation . "Support for Cyrillic ISO-8859-5."))
'("Cyrillic"))
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-koi8-r "Cyrillic KOI8-R"
:unicode-map '("unicode/unicode-consortium/VENDORS/MISC/KOI8-R.TXT" #x80)
:tags '(koi8 cyrillic russian))
@@ -167,7 +167,7 @@
(remassq 'locale (copy-list (cdr (assoc "Russian" language-info-alist))))
'("Cyrillic"))
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-koi8-u "Cyrillic KOI8-u"
:unicode-map '("unicode/unicode-consortium/VENDORS/MISC/KOI8-U.TXT" #x80)
:tags '(koi8 cyrillic ukrainian))
@@ -218,7 +218,7 @@
Byelorussian in the early 1990s.)"))
'("Cyrillic"))
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-alternativnyj "Cyrillic Alternativnyj"
:unicode-map '("unicode/unicode-consortium/VENDORS/MICSFT/PC/CP866.TXT" #x80)
:tags '(cyrillic))
@@ -243,7 +243,7 @@
(documentation . "Support for Cyrillic ALTERNATIVNYJ."))
'("Cyrillic"))
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-koi8-ru "Cyrillic ÐÐÐ-8 (Russian, Ukrainian)"
:unicode-map '("unicode/libiconv/KOI8-RU.TXT" #x80)
:tags '(koi8 cyrillic russian ukrainian))
@@ -259,7 +259,7 @@
;; We should provide an input method and the corresponding language
;; environments for the next three coding systems.
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-koi8-t "Cyrillic ÐÐÐ-8 for Tajik."
:unicode-map '("unicode/libiconv/KOI8-T.TXT" #x80)
:tags '(koi8 cyrillic tajik))
@@ -275,7 +275,7 @@
language of Tajikistan and a close relative of Persian. "))
;; Support fot the languages of the Caucasus.
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-koi8-c "Cyrillic KOI-8, Caucasus."
:unicode-map
'((#x80 #x0493) ;; CYRILLIC SMALL LETTER GHE WITH STROKE
@@ -417,7 +417,7 @@
for that, see koi8-o. "))
;; Archaic Russian support.
-(make-internal-128-byte-charset
+(make-128-byte-charset
'cyrillic-koi8-o "Cyrillic Old-orthography Russian"
:unicode-map
'((#x80 #x0402) ;; CYRILLIC CAPITAL LETTER DJE
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/ethiopic.el
--- a/lisp/mule/ethiopic.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/ethiopic.el Fri Mar 19 15:30:09 2010 -0500
@@ -30,7 +30,7 @@
;;; Code:
;;; Ethiopic characters (Amharic and Tigrinya).
-;(make-internal-charset 'ethiopic "Ethiopic characters" ...
+;(make-charset 'ethiopic "Ethiopic characters" ...
;moved to mule-charset.el.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/japanese.el
--- a/lisp/mule/japanese.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/japanese.el Fri Mar 19 15:30:09 2010 -0500
@@ -33,7 +33,7 @@
;;; Code:
-(make-internal-charset
+(make-charset
'japanese-jisx0213-1 "JISX0213 Plane 1 (Japanese)"
'(dimension
2
@@ -49,7 +49,7 @@
))
;; JISX0213 Plane 2
-(make-internal-charset
+(make-charset
'japanese-jisx0213-2 "JISX0213 Plane 2 (Japanese)"
'(dimension
2
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/korean.el
--- a/lisp/mule/korean.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/korean.el Fri Mar 19 15:30:09 2010 -0500
@@ -34,7 +34,7 @@
;; Hangul uses the range [84 - D3], [41 - 7E, 81 - FE]
;; Symbols and Hanja use [D8 - DE, E0 - F9], [31 - 7E, 91 - FE]
;; So for our purposes, this is [84 - F9], [31 - FE]
-(make-internal-charset
+(make-charset
'korean-johab
"Johab (Korean)"
'(dimension
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/latin.el
--- a/lisp/mule/latin.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/latin.el Fri Mar 19 15:30:09 2010 -0500
@@ -298,7 +298,7 @@
;; Latin-6 (ISO 8859-10) Northern Europe.
-(make-internal-charset
+(make-charset
'latin-iso8859-10
"Supplementary Set for Latin Alphabet No. 6 (ISO/IEC 8859-10): ISO-IR-157
\"This set is intended for a version of ISO 4873 using the coding method of
@@ -325,7 +325,7 @@
;; Latin-7 (ISO 8859-13) Baltic Rim.
-(make-internal-charset
+(make-charset
'latin-iso8859-13
"Baltic Rim Supplementary Set (Latin-7) (ISO/IEC 8859-13): ISO-IR-179"
'(dimension
@@ -354,7 +354,7 @@
;; Never widely used. Current-orthography Gaelic, both Irish and Scots, is
;; easily written with Latin-1. Wikipedia says the same about Welsh.
-(make-internal-charset
+(make-charset
'latin-iso8859-14
"Celtic Supplementary Latin Set (Latin-8) (ISO/IEC 8859-14): ISO-IR-199
FIELD OF UTILIZATION: \"Communication and processing of text in the Celtic
@@ -532,7 +532,7 @@
;;
;; "South-Eastern European." Not, to my knowledge, ever widely used.
-(make-internal-charset
+(make-charset
'latin-iso8859-16
"Romanian Character Set for Information Interchange (Latin-10) (ISO/IEC 8859-16): ISO-IR-226
FIELD OF UTILIZATION: \"Communication, processing, transfer of text in the
@@ -833,7 +833,7 @@
;; Restore the normal case mappings for the characters.
(put-case-table-pair ?I ?i (standard-case-table))))
-(make-internal-128-byte-charset
+(make-128-byte-charset
'latin-macintosh-roman "MacRoman"
:unicode-map '("unicode/unicode-consortium/VENDORS/APPLE/ROMAN.TXT" #x80)
:tags '(macintosh latin)
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/misc-lang.el
--- a/lisp/mule/misc-lang.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/misc-lang.el Fri Mar 19 15:30:09 2010 -0500
@@ -33,7 +33,7 @@
;; Fuck this garbage. Rather than have bogus charsets like this,
;; just use the Unicode support.
;;; IPA characters for phonetic symbols.
-;(make-internal-charset
+;(make-charset
; 'ipa "IPA (International Phonetic Association)"
; '(dimension
; 1
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/mule-charset.el
--- a/lisp/mule/mule-charset.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/mule-charset.el Fri Mar 19 15:30:09 2010 -0500
@@ -993,46 +993,27 @@
; ,@(and unicode-map `(tags ,tags))
; )))
-(defun make-internal-charset (name doc-string props)
- "Make an internal charset.
-This is the same as `make-charset' except that if a `unicode-map'
-specifies a file name, the name is assumed relative to `data-directory', and
-will be made so."
- (let ((unicode-map (plist-get props 'unicode-map)))
- (when (and unicode-map (stringp (car unicode-map)))
- ;; During loadup, data-directory is nil, but source-directory is
- ;; defined
- (let ((data-dir
- (or data-directory (expand-file-name "etc" source-directory))))
- (setq props
- (plist-put props 'unicode-map
- (cons (expand-file-name (car unicode-map) data-dir)
- (cdr unicode-map)))))))
- (make-charset name doc-string props))
-
-(defun* make-internal-128-byte-charset (name short-name &key long-name
- doc-string unicode-map tags)
- "Make an internal one-dimension size-128 charset.
+(defun* make-128-byte-charset (name short-name &key long-name
+ doc-string unicode-map tags)
+ "Make a one-dimension size-128 charset.
NAME is a symbol, the charset's name.
SHORT-NAME is a string describing the charset briefly, and will be used as
the `short-name' property.
The keys :long-name, :doc-string, :unicode-map and :tags will be used to
set the associated charset properties. If unspecified, :long-name defaults
-to `short-name', and :doc-string defaults to :long-name. If :unicode-map
-specifies a file name, the name is assumed relative to `data-directory',
-and will be made so."
+to `short-name', and :doc-string defaults to :long-name."
(setq long-name (or long-name short-name))
(setq doc-string (or doc-string long-name))
- (make-internal-charset name doc-string
- `(dimension 1
- offset 128
- chars 128
- ,@(and unicode-map `(unicode-map ,unicode-map))
- short-name ,short-name
- long-name ,long-name
- ,@(and tags `(tags ,tags))
- )))
+ (make-charset name doc-string
+ `(dimension 1
+ offset 128
+ chars 128
+ ,@(and unicode-map `(unicode-map ,unicode-map))
+ short-name ,short-name
+ long-name ,long-name
+ ,@(and tags `(tags ,tags))
+ )))
;;;;;;;;;;;;;;;;;;;;; ASCII, Control-1, Composite, etc. ;;;;;;;;;;;;;;;;;;;;
@@ -1070,7 +1051,7 @@
(or doc-string
(format "Right-Hand Part of %s (ISO/IEC %s): %s"
alphabet-name str8859 iso-ir-name))))
- (make-internal-charset
+ (make-charset
symbol doc-string
`(dimension 1
offset 160
@@ -1120,7 +1101,7 @@
;;;;;;;;;;;;;;;;;;;;; Japanese ;;;;;;;;;;;;;;;;;;;;
-(make-internal-charset
+(make-charset
'katakana-jisx0201
"Katakana Part of JISX0201.1976"
'(dimension 1
@@ -1135,7 +1116,7 @@
tags (jis katakana japanese)
))
-(make-internal-charset
+(make-charset
'latin-jisx0201
"Roman Part of JISX0201.1976"
'(dimension 1
@@ -1151,7 +1132,7 @@
))
-(make-internal-charset
+(make-charset
'japanese-jisx0208-1978
"JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
'(dimension 2
@@ -1166,7 +1147,7 @@
tags (jis kanji japanese)
))
-(make-internal-charset
+(make-charset
'japanese-jisx0208
"JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
'(dimension 2
@@ -1180,7 +1161,7 @@
tags (jis kanji japanese)
))
-(make-internal-charset
+(make-charset
'japanese-jisx0212
"JISX0212 Japanese supplement: ISO-IR-159"
'(dimension 2
@@ -1196,7 +1177,7 @@
(when (featurep 'unicode-internal)
;; We can support Shift-JIS directly.
- (make-internal-charset
+ (make-charset
'japanese-shift-jis
;; You could imagine trying to declare this to be an "algorithmic" charset
;; with indices shifted in a programmatic way from JIS X 0208:1997.
@@ -1223,7 +1204,7 @@
;;;;;;;;;;;;;;;;;;;;; Chinese ;;;;;;;;;;;;;;;;;;;;
-(make-internal-charset
+(make-charset
'chinese-gb2312
"GB2312 Chinese simplified: ISO-IR-58"
'(dimension 2
@@ -1237,7 +1218,7 @@
tags (gb kanji simplified-chinese chinese/language)
))
-(make-internal-charset
+(make-charset
'chinese-cns11643-1
"CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
'(dimension 2
@@ -1258,7 +1239,7 @@
tags (cns kanji traditional-chinese chinese/language)
))
-(make-internal-charset
+(make-charset
'chinese-cns11643-2
"CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
'(dimension 2
@@ -1277,7 +1258,7 @@
(if (featurep 'unicode-internal)
;; We can support Big5 directly.
- (make-internal-charset
+ (make-charset
'chinese-big5
"Big5 (Chinese traditional)"
'(dimension 2
@@ -1306,7 +1287,7 @@
;; and "Big Five Level 2" (rows C9-F9), with the latter containing
;; less used characters. We split the same way then coerce the
;; result into a 94x94 block.
- (make-internal-charset
+ (make-charset
'chinese-big5-1
"Frequently used part (A141-C67F) of Big5 (Chinese traditional)"
'(dimension 2
@@ -1319,7 +1300,7 @@
;; no unicode map, see chinese-big5-2
tags (kanji traditional-chinese chinese/language)
))
- (make-internal-charset
+ (make-charset
'chinese-big5-2
"Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
'(dimension 2
@@ -1345,7 +1326,7 @@
;;;;;;;;;;;;;;;;;;;;; Korean ;;;;;;;;;;;;;;;;;;;;
-(make-internal-charset
+(make-charset
'korean-ksc5601
"KSC5601 Korean Hangul and Hanja: ISO-IR-149"
'(dimension 2
@@ -1364,7 +1345,7 @@
;;;;;;;;;;;;;;;;;;;;; Thai ;;;;;;;;;;;;;;;;;;;;
-(make-internal-charset
+(make-charset
'thai-tis620
"Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166"
'(dimension 1
@@ -1391,7 +1372,7 @@
;; more than 96 characters. Since Emacs can't handle it as one
;; character set, it is divided into two: lower case letters and upper
;; case letters.
-(make-internal-charset
+(make-charset
'vietnamese-viscii-lower "VISCII1.1 lower-case"
'(dimension 1
registries ["VISCII1.1"]
@@ -1405,7 +1386,7 @@
tags (latin vietnamese)
))
-(make-internal-charset
+(make-charset
'vietnamese-viscii-upper "VISCII1.1 upper-case"
'(dimension 1
registries ["VISCII1.1"]
@@ -1423,7 +1404,7 @@
; ;; not assigned. They are automatically converted to each Indian
; ;; script which IS-13194 supports.
-(make-internal-charset
+(make-charset
'indian-is13194
"Generic Indian charset for data exchange with IS 13194"
'(dimension 1
@@ -1440,7 +1421,7 @@
))
;; Actual Glyph for 1-column width.
-(make-internal-charset
+(make-charset
'indian-1-column
"Indian charset for 2-column width glyphs"
'(dimension 2
@@ -1455,7 +1436,7 @@
))
;; Actual Glyph for 2-column width.
-(make-internal-charset
+(make-charset
'indian-2-column
"Indian charset for 2-column width glyphs"
'(dimension 2
@@ -1471,7 +1452,7 @@
;; Lao script.
;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F.
-(make-internal-charset
+(make-charset
'lao "Lao characters (ISO10646 0E80..0EDF)"
'(dimension 1
registries ["MuleLao-1"]
@@ -1488,7 +1469,7 @@
;; CAN'T BE DEFINED THERE BECAUSE: The charset is used inside of that file.
;; Ethiopic characters (Amharic and Tigrinya).
-(make-internal-charset
+(make-charset
'ethiopic "Ethiopic characters"
'(dimension 2
registries ["Ethiopic-Unicode"]
@@ -1501,7 +1482,7 @@
tags (ethiopic)
))
-(make-internal-charset
+(make-charset
'tibetan-1-column "Tibetan 1 column glyph"
'(dimension 2
registries ["MuleTibetan-1"]
@@ -1515,7 +1496,7 @@
))
;; Tibetan script.
-(make-internal-charset
+(make-charset
'tibetan "Tibetan characters"
'(dimension 2
registries ["MuleTibetan-2"]
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/thai.el
--- a/lisp/mule/thai.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/thai.el Fri Mar 19 15:30:09 2010 -0500
@@ -31,7 +31,7 @@
;;; Code:
-(make-internal-charset
+(make-charset
'thai-iso8859-11
"Right-Hand Part of Latin/Thai Alphabet (ISO/IEC 8859-11)"
'(dimension 1
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/vietnamese.el
--- a/lisp/mule/vietnamese.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/vietnamese.el Fri Mar 19 15:30:09 2010 -0500
@@ -35,14 +35,14 @@
;; character set, it is divided into two: lower case letters and upper
;; case letters.
;Moved to mule-charset.el.
-;(make-internal-charset 'vietnamese-viscii-lower "VISCII1.1 lower-case" ...
-;(make-internal-charset 'vietnamese-viscii-upper "VISCII1.1 upper-case" ...
+;(make-charset 'vietnamese-viscii-lower "VISCII1.1 lower-case" ...
+;(make-charset 'vietnamese-viscii-upper "VISCII1.1 upper-case" ...
(define-category ?v "Vietnamese character.")
(modify-category-entry 'vietnamese-viscii-lower ?v)
(modify-category-entry 'vietnamese-viscii-upper ?v)
-(make-internal-charset
+(make-charset
'vietnamese-viscii "Vietnamese VISCII1.1"
'(dimension
1
diff -r 9eb363c32010 -r 50fe73edfac4 lisp/mule/windows.el
--- a/lisp/mule/windows.el Mon Mar 15 23:28:43 2010 -0500
+++ b/lisp/mule/windows.el Fri Mar 19 15:30:09 2010 -0500
@@ -51,7 +51,7 @@
dimension chars offset
&key tags ansioem unicode-map)
(or (listp tags) (setq tags (list tags)))
- (make-internal-charset
+ (make-charset
(intern (format "%s-windows-%s" script codepage))
(format "Windows code page %s (%s)" codepage name)
`(dimension ,dimension
diff -r 9eb363c32010 -r 50fe73edfac4 src/ChangeLog
--- a/src/ChangeLog Mon Mar 15 23:28:43 2010 -0500
+++ b/src/ChangeLog Fri Mar 19 15:30:09 2010 -0500
@@ -1,3 +1,51 @@
+2010-03-19 Ben Wing <ben at xemacs.org>
+
+ * charset.h:
+ * charset.h (struct Lisp_Charset):
+ * charset.h (CHARSET_DO_AUTOLOAD):
+ * charset.h (XCHARSET_DO_AUTOLOAD):
+ * lisp.h:
+ Add a `do_autoload' flag indicating whether the Unicode tables need
+ to be autoloaded still.
+
+ In charset_codepoint_to_unicode_raw_1(), autoload Unicode tables
+ if needed.
+
+ * lisp.h:
+ * lisp.h (XCADDDDDDR):
+ Add X7TH() - X10TH(), as well as X1STCDR() - X10THCDR(), and
+ corresponding XCAD*R and XDD*R macros.
+
+ * mule-charset.c (Fmake_charset):
+ Document the `unicode-map' property.
+
+ * unicode.c (get_unicode_conversion_1):
+ Autoload Unicode tables if needed.
+
+ * unicode.c (set_unicode_conversion):
+ Don't try to retrieve previous conversion if we haven't yet autoloaded
+ the Unicode map, or we will trigger an infinite loop.
+
+ * unicode.c (enum load_unicode_flags):
+ * unicode.c (verify_load_unicode_args):
+ New function, to verify arguments to `load-unicode-mapping-table'.
+ Add bit flags to correspond to the `flags' argument.
+
+ * lisp.h:
+ * unicode.c (Fload_unicode_mapping_table):
+ Use verify_load_unicode_args(). If filename is relative, expand it
+ relative to `data-directory' (or `../etc' relative to `lisp-directory'
+ during loadup). Document this behavior in the docstring.
+
+ * unicode.c (autoload_charset_unicode_tables):
+ Load charset Unicode tables by calling Fload_unicode_mapping_table().
+
+ * unicode.c (init_charset_unicode_map):
+ Instead of directly loading the Unicode tables from a file,
+ verify the arguments to `load-unicode-mapping-table', store them,
+ and set the `do_autoload' flag, so that the tables will be autoloaded
+ when needed.
+
2010-03-15 Ben Wing <ben at xemacs.org>
* tests.c (Ftest_data_format_conversion):
diff -r 9eb363c32010 -r 50fe73edfac4 src/charset.h
--- a/src/charset.h Mon Mar 15 23:28:43 2010 -0500
+++ b/src/charset.h Fri Mar 19 15:30:09 2010 -0500
@@ -137,6 +137,7 @@
Lisp_Object short_name;
Lisp_Object long_name;
Lisp_Object unicode_map;
+ int do_autoload; /* if true, Unicode maps still need to be loaded */
Lisp_Object tags;
Lisp_Object reverse_direction_charset;
@@ -250,6 +251,7 @@
#define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
#define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table)
#define CHARSET_UNICODE_MAP(cs) ((cs)->unicode_map)
+#define CHARSET_DO_AUTOLOAD(cs) ((cs)->do_autoload)
#define CHARSET_TAGS(cs) ((cs)->tags)
#define CHARSET_MIN_CODE(cs, dim) CHARSET_OFFSET (cs, dim)
#define CHARSET_MAX_CODE(cs, dim) \
@@ -275,6 +277,7 @@
#define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
#define XCHARSET_TO_UNICODE_TABLE(cs) CHARSET_TO_UNICODE_TABLE (XCHARSET (cs))
#define XCHARSET_UNICODE_MAP(cs) CHARSET_UNICODE_MAP (XCHARSET (cs))
+#define XCHARSET_DO_AUTOLOAD(cs) CHARSET_DO_AUTOLOAD (XCHARSET (cs))
#define XCHARSET_TAGS(cs) CHARSET_TAGS (XCHARSET (cs))
#define XCHARSET_MIN_CODE(cs, dim) CHARSET_MIN_CODE (XCHARSET (cs), dim)
#define XCHARSET_MAX_CODE(cs, dim) CHARSET_MAX_CODE (XCHARSET (cs), dim)
@@ -508,6 +511,8 @@
int retval;
INLINE_ASSERT_VALID_CHARSET_CODEPOINT (charset, c1, c2);
+ if (XCHARSET_DO_AUTOLOAD (charset))
+ autoload_charset_unicode_tables (charset);
#ifdef ALLOW_ALGORITHMIC_CONVERSION_TABLES
{
/* Conceivably a good idea not to have this in Unicode-internal, since
diff -r 9eb363c32010 -r 50fe73edfac4 src/lisp.h
--- a/src/lisp.h Mon Mar 15 23:28:43 2010 -0500
+++ b/src/lisp.h Fri Mar 19 15:30:09 2010 -0500
@@ -2724,12 +2724,34 @@
#define XCDDDDDR(a) (XCDR (XCDDDDR (a)))
#define XCADDDDDR(a) (XCAR (XCDDDDDR (a)))
#define XCDDDDDDR(a) (XCDR (XCDDDDDR (a)))
-#define X1ST(a) XCAR (a)
-#define X2ND(a) XCADR (a)
-#define X3RD(a) XCADDR (a)
-#define X4TH(a) XCADDDR (a)
-#define X5TH(a) XCADDDDR (a)
-#define X6TH(a) XCADDDDDR (a)
+#define XCADDDDDDR(a) (XCAR (XCDDDDDDR (a)))
+#define XCDDDDDDDR(a) (XCDR (XCDDDDDDR (a)))
+#define XCADDDDDDDR(a) (XCAR (XCDDDDDDDR (a)))
+#define XCDDDDDDDDR(a) (XCDR (XCDDDDDDDR (a)))
+#define XCADDDDDDDDR(a) (XCAR (XCDDDDDDDDR (a)))
+#define XCDDDDDDDDDR(a) (XCDR (XCDDDDDDDDR (a)))
+#define XCADDDDDDDDDR(a) (XCAR (XCDDDDDDDDDR (a)))
+#define XCDDDDDDDDDDR(a) (XCDR (XCDDDDDDDDDR (a)))
+#define X1ST(a) XCAR (a)
+#define X2ND(a) XCADR (a)
+#define X3RD(a) XCADDR (a)
+#define X4TH(a) XCADDDR (a)
+#define X5TH(a) XCADDDDR (a)
+#define X6TH(a) XCADDDDDR (a)
+#define X7TH(a) XCADDDDDDR (a)
+#define X8TH(a) XCADDDDDDDR (a)
+#define X9TH(a) XCADDDDDDDDR (a)
+#define X10TH(a) XCADDDDDDDDDR (a)
+#define X1STCDR(a) XCDR (a)
+#define X2NDCDR(a) XCDDR (a)
+#define X3RDCDR(a) XCDDDR (a)
+#define X4THCDR(a) XCDDDDR (a)
+#define X5THCDR(a) XCDDDDDR (a)
+#define X6THCDR(a) XCDDDDDDR (a)
+#define X7THCDR(a) XCDDDDDDDR (a)
+#define X8THCDR(a) XCDDDDDDDDR (a)
+#define X9THCDR(a) XCDDDDDDDDDR (a)
+#define X10THCDR(a) XCDDDDDDDDDDR (a)
#define XSETCAR(a, b) (XCONS (a)->car_ = (b))
#define XSETCDR(a, b) (XCONS (a)->cdr_ = (b))
@@ -5401,6 +5423,7 @@
extern Lisp_Object Vmodule_directory;
extern Lisp_Object Vsite_directory;
extern Lisp_Object Vsite_module_directory;
+extern Lisp_Object Vlisp_directory;
/* Defined in emodules.c */
#ifdef HAVE_SHLIB
@@ -6914,6 +6937,7 @@
int decode_unicode (Lisp_Object unicode, enum unicode_allow allow);
void free_precedence_array (Lisp_Object preclist);
void init_charset_unicode_map (Lisp_Object charset, Lisp_Object map);
+void autoload_charset_unicode_tables (Lisp_Object charset);
EXFUN (Fset_charset_tags, 2);
diff -r 9eb363c32010 -r 50fe73edfac4 src/mule-charset.c
--- a/src/mule-charset.c Mon Mar 15 23:28:43 2010 -0500
+++ b/src/mule-charset.c Fri Mar 19 15:30:09 2010 -0500
@@ -924,7 +924,24 @@
`direction' `l2r' (left-to-right) or `r2l' (right-to-left).
Defaults to `l2r'.
`unicode-map' Information describing how to map this charset to/from
- Unicode.
+ Unicode. This is either a list
+
+ (FILENAME START END OFFSET FLAGS)
+
+ specifying a filename to load the map from and corresponding
+ arguments to `load-unicode-mapping-table', or a list
+
+ ((UNICODE-CODEPOINT CHARSET-CODEPOINT)
+ (UNICODE-CODEPOINT CHARSET-CODEPOINT)
+ ...
+ )
+
+ directly specifying Unicode codepoints and corresponding
+ charset codepoints. In the former format, any of the
+ arguments other than FILENAME can be omitted, as with the
+ arguments to `load-unicode-mapping-table'. In the latter
+ format, either one or two charset codepoints need to be
+ given, depending on the dimension of the charset.
`ccl-program' A compiled CCL program used to convert a character in
this charset into an index into the font. The CCL program
is passed the octets of the character, which will be within
diff -r 9eb363c32010 -r 50fe73edfac4 src/unicode.c
--- a/src/unicode.c Mon Mar 15 23:28:43 2010 -0500
+++ b/src/unicode.c Fri Mar 19 15:30:09 2010 -0500
@@ -986,8 +986,14 @@
static void
set_unicode_conversion (int code, Lisp_Object charset, int c1, int c2)
{
- int old_code = charset_codepoint_to_unicode (charset, c1, c2, CONVERR_FAIL);
+ int old_code = -1;
int combined_code = (c1 << 8) + c2;
+
+ /* If charset hasn't been autoloaded yet (or we're in the middle of
+ autoloading), then we certainly have no old code, and don't try to
+ convert, or we may get in an infinite autoloading loop. */
+ if (!XCHARSET_DO_AUTOLOAD (charset))
+ old_code = charset_codepoint_to_unicode (charset, c1, c2, CONVERR_FAIL);
ASSERT_VALID_CHARSET_CODEPOINT (charset, c1, c2);
if (code != -1)
@@ -1058,6 +1064,8 @@
{
void *table;
+ if (XCHARSET_DO_AUTOLOAD (charset))
+ autoload_charset_unicode_tables (charset);
table = XCHARSET_FROM_UNICODE_TABLE (charset);
#ifdef ALLOW_ALGORITHMIC_CONVERSION_TABLES
if (!table)
@@ -2043,6 +2051,55 @@
return Qnil;
}
+enum load_unicode_flags
+{
+ LOAD_UNICODE_IGNORE_FIRST_COLUMN = 1,
+ LOAD_UNICODE_BIG5 = 2
+};
+
+static void
+verify_load_unicode_args (Lisp_Object filename, Lisp_Object start,
+ Lisp_Object end, Lisp_Object offset,
+ Lisp_Object flags, int *st, int *en, int *of,
+ int *flags_out)
+{
+ CHECK_STRING (filename);
+ if (!NILP (start))
+ {
+ CHECK_INT (start);
+ *st = XINT (start);
+ }
+ if (!NILP (end))
+ {
+ CHECK_INT (end);
+ *en = XINT (end);
+ }
+ if (!NILP (offset))
+ {
+ CHECK_INT (offset);
+ *of = XINT (offset);
+ }
+
+ if (!LISTP (flags))
+ flags = list1 (flags);
+
+ *flags_out = 0;
+ {
+ EXTERNAL_LIST_LOOP_2 (elt, flags)
+ {
+ if (EQ (elt, Qignore_first_column))
+ *flags_out |= LOAD_UNICODE_IGNORE_FIRST_COLUMN;
+#ifndef UNICODE_INTERNAL
+ else if (EQ (elt, Qbig5))
+ *flags_out |= LOAD_UNICODE_BIG5;
+#endif /* not UNICODE_INTERNAL */
+ else
+ invalid_constant
+ ("Unrecognized `load-unicode-mapping-table' flag", elt);
+ }
+ }
+}
+
/* "cerrar el fulano" = close the so-and-so */
static Lisp_Object
cerrar_el_fulano (Lisp_Object fulano)
@@ -2055,6 +2112,13 @@
DEFUN ("load-unicode-mapping-table", Fload_unicode_mapping_table,
2, 6, 0, /*
Load Unicode tables with the Unicode mapping data in FILENAME for CHARSET.
+
+If FILENAME is relative, it will be interpreted relative to `data-directory',
+which normally corresponds to the `etc/' directory in the source tree and
+contains architecture-independent XEmacs data files. (During loadup,
+`data-directory' isn't defined and so the directory is determined by looking
+for `../etc' relative to `lisp-directory'.)
+
Data is text, in the form of one translation per line. Lines are of the
form
@@ -2099,56 +2163,31 @@
struct gcpro gcpro1;
char line[1025];
int fondo = specpdl_depth (); /* "fondo" = depth */
- int ignore_first_column = 0;
+ int flgs;
+
+ charset = Fget_charset (charset);
+ verify_load_unicode_args (filename, start, end, offset, flags,
+ &st, &en, &of, &flgs);
+
#ifndef UNICODE_INTERNAL
- int big5 = 0;
+ if (flgs & LOAD_UNICODE_BIG5)
+ {
+ /* At this point the charsets haven't been initialized
+ yet, so at least set the values for big5-1 and big5-2
+ so we can use big5_char_to_fake_codepoint(). */
+ Vcharset_chinese_big5_1 = Fget_charset (Qchinese_big5_1);
+ Vcharset_chinese_big5_2 = Fget_charset (Qchinese_big5_2);
+ }
#endif /* not UNICODE_INTERNAL */
- CHECK_STRING (filename);
- charset = Fget_charset (charset);
- if (!NILP (start))
- {
- CHECK_INT (start);
- st = XINT (start);
- }
- if (!NILP (end))
- {
- CHECK_INT (end);
- en = XINT (end);
- }
- if (!NILP (offset))
- {
- CHECK_INT (offset);
- of = XINT (offset);
- }
-
- if (!LISTP (flags))
- flags = list1 (flags);
-
- {
- EXTERNAL_LIST_LOOP_2 (elt, flags)
- {
- if (EQ (elt, Qignore_first_column))
- ignore_first_column = 1;
-#ifndef UNICODE_INTERNAL
- else if (EQ (elt, Qbig5))
- {
- big5 = 1;
- /* At this point the charsets haven't been initialzied
- yet, so at least set the values for big5-1 and big5-2
- so we can use big5_char_to_fake_codepoint(). */
- Vcharset_chinese_big5_1 = Fget_charset (Qchinese_big5_1);
- Vcharset_chinese_big5_2 = Fget_charset (Qchinese_big5_2);
- }
-#endif /* not UNICODE_INTERNAL */
- else
- invalid_constant
- ("Unrecognized `load-unicode-mapping-table' flag", elt);
- }
- }
-
GCPRO1 (filename);
- filename = Fexpand_file_name (filename, Qnil);
+ if (!NILP (Vdata_directory))
+ filename = Fexpand_file_name (filename, Vdata_directory);
+ else
+ filename = Fexpand_file_name (filename,
+ Fexpand_file_name
+ (build_ascstring ("../etc"),
+ Vlisp_directory));
file = qxe_fopen (XSTRING_DATA (filename), READ_TEXT);
if (!file)
report_file_error ("Cannot open", filename);
@@ -2186,7 +2225,7 @@
/* First check for a range. */
scanf_count =
- (!ignore_first_column ?
+ (!(flgs & LOAD_UNICODE_IGNORE_FIRST_COLUMN) ?
sscanf (p, "%i-%i %i%n", &cp1from, &cp1to, &cp2, &endcount) :
sscanf (p, "%i-%i %i %i%n", &dummy, &cp1from, &cp1to, &cp2,
&endcount) - 1);
@@ -2194,7 +2233,7 @@
if (scanf_count < 3)
{
scanf_count =
- (!ignore_first_column ?
+ (!(flgs & LOAD_UNICODE_IGNORE_FIRST_COLUMN) ?
sscanf (p, "%i %i%n", &cp1from, &cp2, &endcount) :
sscanf (p, "%i %i %i%n", &dummy, &cp1from, &cp2, &endcount) - 1);
cp1to = cp1from;
@@ -2253,7 +2292,7 @@
cp1low = cp1 & 255;
#ifndef UNICODE_INTERNAL
- if (big5)
+ if (flgs & LOAD_UNICODE_BIG5)
{
Lisp_Object fake_charset;
int c1, c2;
@@ -2293,15 +2332,35 @@
}
void
+autoload_charset_unicode_tables (Lisp_Object charset)
+{
+ Lisp_Object map = XCHARSET_UNICODE_MAP (charset);
+ /* We construct the list ourselves, so we know it's length 5 and we know
+ no one has fucked with it */
+ Lisp_Object filename = X1ST (map);
+ Lisp_Object start = X2ND (map);
+ Lisp_Object end = X3RD (map);
+ Lisp_Object offset = X4TH (map);
+ Lisp_Object flags = X5TH (map);
+
+ assert (EQ (Flength (map), make_int (5)));
+ Fload_unicode_mapping_table (filename, charset, start, end, offset, flags);
+ XCHARSET_DO_AUTOLOAD (charset) = 0;
+}
+
+void
init_charset_unicode_map (Lisp_Object charset, Lisp_Object map)
{
+ int autoload = 0;
Lisp_Object savemap = map;
CHECK_TRUE_LIST (map);
if (STRINGP (XCAR (map)))
{
- Lisp_Object filename = XCAR (map);
- Lisp_Object start = Qnil, end = Qnil, offset = Qnil, flags = Qnil;
+ Lisp_Object filename = XCAR (map), start = Qnil, end = Qnil,
+ offset = Qnil, flags = Qnil;
+ int st, en, of, flgs;
+
map = XCDR (map);
if (!NILP (map))
{
@@ -2326,8 +2385,14 @@
if (!NILP (map))
invalid_argument ("Unicode map can have at most 5 arguments",
savemap);
- Fload_unicode_mapping_table (filename, charset, start, end,
- offset, flags);
+
+ /* Verify the arguments as much as possible before actually calling
+ load-unicode-mapping-table, and then make a copy of the arglist so
+ the user can't mess with it. */
+ verify_load_unicode_args (filename, start, end, offset, flags,
+ &st, &en, &of, &flgs);
+ map = list5 (filename, start, end, offset, flags);
+ autoload = 1;
}
else
{
@@ -2355,7 +2420,8 @@
/* Only set map after we have gone through everything and gotten
no errors */
- XCHARSET_UNICODE_MAP (charset) = savemap;
+ XCHARSET_UNICODE_MAP (charset) = map;
+ XCHARSET_DO_AUTOLOAD (charset) = autoload;
}
#endif /* MULE */
More information about the XEmacs-Patches
mailing list