# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1217920001 -7200
# Node ID b75b075a9041ab8969a6351fa85560e739dcdbbb
# Parent 6b0000935adc3f79cb189350d6014d4b4aff734e
Support displaying invalid UTF-8 in language-environment-specific ways.
2008-08-05 Aidan Kehoe <kehoea(a)parhasard.net>
* specifier.el (current-display-table): Initialise this here, not
in x-init.el, since we want it even on non-X builds to use the
support for displaying Unicode error sequences according to the
current locale.
* mule/mule-cmds.el (set-language-info):
Document error-sequence-coding-system, used to describe how to
display characters that are not valid Unicode on disk.
* mule/mule-cmds.el (finish-set-language-environment):
Implement error-sequence-coding-system.
* unicode.el (unicode-error-sequence-warning-face):
New face, to make it possible to distinguish invalid Unicode
sequences from the characters given by the valid Unicode
sequences.
* mule/cyrillic.el ("Russian"):
("Ukrainian"):
("Bulgarian"):
("Belarusian"):
("Cyrillic-ALT"): Add support for error-sequence-coding-system for
all these languages.
* mule/latin.el:
Add support for error-sequence-coding-system for the
Latin-alphabet language environments.
diff -r 6b0000935adc -r b75b075a9041 lisp/ChangeLog
--- a/lisp/ChangeLog Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/ChangeLog Tue Aug 05 09:06:41 2008 +0200
@@ -1,3 +1,28 @@
+2008-08-05 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * specifier.el (current-display-table): Initialise this here, not
+ in x-init.el, since we want it even on non-X builds to use the
+ support for displaying Unicode error sequences according to the
+ current locale.
+ * mule/mule-cmds.el (set-language-info):
+ Document error-sequence-coding-system, used to describe how to
+ display characters that are not valid Unicode on disk.
+ * mule/mule-cmds.el (finish-set-language-environment):
+ Implement error-sequence-coding-system.
+ * unicode.el (unicode-error-sequence-warning-face):
+ New face, to make it possible to distinguish invalid Unicode
+ sequences from the characters given by the valid Unicode
+ sequences.
+ * mule/cyrillic.el ("Russian"):
+ ("Ukrainian"):
+ ("Bulgarian"):
+ ("Belarusian"):
+ ("Cyrillic-ALT"): Add support for error-sequence-coding-system for
+ all these languages.
+ * mule/latin.el:
+ Add support for error-sequence-coding-system for the
+ Latin-alphabet language environments.
+
2008-07-26 Aidan Kehoe <kehoea(a)parhasard.net>
* x-init.el (x-initialize-compose):
diff -r 6b0000935adc -r b75b075a9041 lisp/mule/cyrillic.el
--- a/lisp/mule/cyrillic.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/cyrillic.el Tue Aug 05 09:06:41 2008 +0200
@@ -370,6 +370,7 @@
(coding-system koi8-r)
(native-coding-system koi8-r)
(coding-priority koi8-r)
+ (error-sequence-coding-system koi8-r)
(input-method . "cyrillic-yawerty")
(features cyril-util)
(locale "ru")
@@ -543,6 +544,7 @@
"Ukrainian" '((coding-system koi8-u)
(coding-priority koi8-u)
(locale "uk")
+ (error-sequence-coding-system koi8-u)
(input-method . "cyrillic-ukrainian")
(documentation
. "Support for Ukrainian."))
@@ -689,6 +691,7 @@
(set-language-info-alist
"Bulgarian" '((coding-system windows-1251)
(coding-priority windows-1251)
+ (error-sequence-coding-system windows-1251)
(input-method . "bulgarian-bds")
(locale "bg")
(documentation
@@ -699,6 +702,7 @@
(set-language-info-alist
"Belarusian" '((coding-system windows-1251)
(coding-priority windows-1251)
+ (error-sequence-coding-system windows-1251)
(locale "be")
(input-method . "belarusian")
(documentation
@@ -845,6 +849,7 @@
"Cyrillic-ALT" '((charset cyrillic-iso8859-5)
(coding-system alternativnyj)
(native-coding-system alternativnyj)
+ (error-sequence-coding-system alternativnyj)
(coding-priority alternativnyj)
(input-method . "cyrillic-yawerty")
(features cyril-util)
diff -r 6b0000935adc -r b75b075a9041 lisp/mule/greek.el
--- a/lisp/mule/greek.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/greek.el Tue Aug 05 09:06:41 2008 +0200
@@ -328,6 +328,7 @@
(coding-system iso-8859-7)
(coding-priority iso-8859-7)
(native-coding-system iso-8859-7)
+ (error-sequence-coding-system iso-8859-7)
(locale "el")
(input-method . "greek")
(sample-text . "Greek (Ηλλένικα) Γειά σας")
diff -r 6b0000935adc -r b75b075a9041 lisp/mule/latin.el
--- a/lisp/mule/latin.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/latin.el Tue Aug 05 09:06:41 2008 +0200
@@ -957,11 +957,12 @@
for ((charset codesys default-input nice-charset-1 nice-charset-2
;; supported-langs is a list if the doc string is replaced
;; entirely
- supported-langs)
+ supported-langs error-sequence-coding-system)
langenvs) in
'(((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1"
"ISO-8859-1"
" Danish, Dutch, English, Faeroese, Finnish, French, German, Icelandic,
- Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.")
+ Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish."
+ windows-1252)
(("Danish" "da")
("Dutch" "nl" "TUTORIAL.nl")
("Faeroese" "fo")
@@ -1024,6 +1025,8 @@
(coding-system ,codesys)
(coding-priority ,codesys)
(native-coding-system ,codesys)
+ (error-sequence-coding-system ,(or error-sequence-coding-system
+ codesys))
(documentation . ,(if (listp supported-langs) (car supported-langs)
(format "\
Generic language environment for %s (%s)." nice-charset-1 nice-charset-2))))
diff -r 6b0000935adc -r b75b075a9041 lisp/mule/mule-cmds.el
--- a/lisp/mule/mule-cmds.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/mule-cmds.el Tue Aug 05 09:06:41 2008 +0200
@@ -225,7 +225,15 @@
If there is no value for this property, the MS Windows
locale is assumed to have the same name as the
- language environment."
+ language environment.
+
+ error-sequence-coding-system
+ VALUE is a fixed-width 8-bit coding system used to
+ display Unicode error sequences (using a face to make
+ it clear that the data is invalid). In Western Europe
+ this is normally windows-1252; in the Russia and the
+ former Soviet Union koi8-ru or windows-1251 makes more
+ sense."
(if (symbolp lang-env)
(setq lang-env (symbol-name lang-env)))
(let (lang-slot prop-slot)
@@ -759,6 +767,24 @@
(let ((func (get-language-info language-name 'setup-function)))
(if (functionp func)
(funcall func)))
+
+ (let ((error-sequence-coding-system
+ (get-language-info language-name 'error-sequence-coding-system))
+ (disp-table (specifier-instance current-display-table))
+ glyph)
+ (when (consp error-sequence-coding-system)
+ (setq error-sequence-coding-system (car error-sequence-coding-system)))
+ (map-char-table
+ #'(lambda (key entry)
+ (setq glyph (make-glyph
+ (vector
+ 'string :data
+ (decode-coding-string (string entry)
+ error-sequence-coding-system))))
+ (set-glyph-face glyph 'unicode-error-sequence-warning-face)
+ (put-char-table key glyph disp-table)
+ nil)
+ unicode-error-default-translation-table))
;; Fit the charsets preferences in unicode conversions for the
;; language environment.
diff -r 6b0000935adc -r b75b075a9041 lisp/specifier.el
--- a/lisp/specifier.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/specifier.el Tue Aug 05 09:06:41 2008 +0200
@@ -988,4 +988,18 @@
(specifier-instance specifier domain))))
(list (cons nil inst))))))))))
+;; Character 160 (octal 0240) displays incorrectly under some X
+;; installations apparently due to a universally crocked font width
+;; specification. Display it as a space since that's what's expected.
+;;
+;; (make-char-table 'generic) instead of (make-display-table) because
+;; make-display-table isn't dumped, and this file is.
+;;
+;; We also want the global display table to be actually globally
+;; initialised; that's why this is here, and not in x-init.el, these days.
+
+(set-specifier current-display-table
+ #s(char-table type generic data (?\xA0 ?\x20))
+ 'global)
+
;;; specifier.el ends here
diff -r 6b0000935adc -r b75b075a9041 lisp/unicode.el
--- a/lisp/unicode.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/unicode.el Tue Aug 05 09:06:41 2008 +0200
@@ -611,6 +611,9 @@
(translate-region start finish table))
begin end buffer))
+;; Sure would be nice to be able to use defface here.
+(copy-face 'highlight 'unicode-error-sequence-warning-face)
+
(unless (featurep 'mule)
;; We do this in such a roundabout way--instead of having the above defun
;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
diff -r 6b0000935adc -r b75b075a9041 lisp/x-init.el
--- a/lisp/x-init.el Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/x-init.el Tue Aug 05 09:06:41 2008 +0200
@@ -312,15 +312,4 @@
(if (equal display "") (setq display nil))
(make-frame-on-device 'x display props))
-;; Character 160 (octal 0240) displays incorrectly under X apparently
-;; due to a universally crocked font width specification. Display it
-;; as a space since that's what seems to be expected.
-;;
-;; (make-char-table 'generic) instead of (make-display-table) because
-;; make-display-table isn't dumped, and this file is.
-
-(let ((tab (make-char-table 'generic)))
- (put-char-table 160 " " tab)
- (set-specifier current-display-table tab 'global 'x))
-
;;; x-init.el ends here
--
¿Dónde estará ahora mi sobrino Yoghurtu Nghé, que tuvo que huir
precipitadamente de la aldea por culpa de la escasez de rinocerontes?
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches