APPROVE COMMIT
NOTE: This patch has been committed.
This gives a huge speed improvement when checking whether Unihan.txt can be
encoded by a Unicode coding system, it’s now down to one second on my
machine versus three minutes and three seconds previously. Current GNU has
the code in C and does it in a fraction of a second.
# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1233749678 0
# Node ID 4fc32a3a086efffb57b45f49ac1867f4ecf69bbe
# Parent a1a8728fec10bb035ad37b3a21b248dee9d11ef6
Fix a couple of bugs, #'query-coding-region, #'query-coding-string.
2009-02-04 Aidan Kehoe <kehoea(a)parhasard.net>
* coding.el (query-coding-region):
Revert this to being a defun, add a compiler macro without
needless binding.
(query-coding-string):
Correct a bug here, string indices are zero- not one-based.
* mule/general-late.el (unicode-query-coding-skip-chars-arg):
Correct the algorithm used to initialise this variable.
diff -r a1a8728fec10 -r 4fc32a3a086e lisp/ChangeLog
--- a/lisp/ChangeLog Wed Feb 04 11:38:25 2009 +0000
+++ b/lisp/ChangeLog Wed Feb 04 12:14:38 2009 +0000
@@ -1,3 +1,13 @@
+2009-02-04 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * coding.el (query-coding-region):
+ Revert this to being a defun, add a compiler macro without
+ needless binding.
+ (query-coding-string):
+ Correct a bug here, string indices are zero- not one-based.
+ * mule/general-late.el (unicode-query-coding-skip-chars-arg):
+ Correct the algorithm used to initialise this variable.
+
2009-02-04 Aidan Kehoe <kehoea(a)parhasard.net>
* help.el (describe-function-1):
diff -r a1a8728fec10 -r 4fc32a3a086e lisp/coding.el
--- a/lisp/coding.el Wed Feb 04 11:38:25 2009 +0000
+++ b/lisp/coding.el Wed Feb 04 12:14:38 2009 +0000
@@ -398,8 +398,8 @@
(values nil ranges)
(values t nil))))))
-(defsubst query-coding-region (start end coding-system &optional buffer
- errorp highlight)
+(defun query-coding-region (start end coding-system &optional buffer
+ errorp highlight)
"Work out whether CODING-SYSTEM can losslessly encode a region.
START and END are the beginning and end of the region to check.
@@ -423,7 +423,15 @@
#'default-query-coding-region)
start end coding-system buffer errorp highlight))
-(defsubst query-coding-string (string coding-system &optional errorp highlight)
+(define-compiler-macro query-coding-region (start end coding-system
+ &optional buffer errorp highlight)
+ `(funcall (or (coding-system-get ,coding-system 'query-coding-function)
+ #'default-query-coding-region)
+ ,start ,end ,coding-system ,@(append (if buffer (list buffer))
+ (if errorp (list errorp))
+ (if highlight (list highlight)))))
+
+(defun query-coding-string (string coding-system &optional errorp highlight)
"Work out whether CODING-SYSTEM can losslessly encode STRING.
CODING-SYSTEM is the coding system to check.
@@ -442,9 +450,21 @@
`make-range-table'."
(with-temp-buffer
(insert string)
- (query-coding-region (point-min) (point-max) coding-system (current-buffer)
- ;; ### Will highlight work here?
- errorp highlight)))
+ (multiple-value-bind (result ranges)
+ (query-coding-region (point-min) (point-max) coding-system
+ (current-buffer) errorp
+ ;; #### Highlight won't work here,
+ ;; query-coding-region may need to be modified.
+ highlight)
+ (unless result
+ ;; Sigh, string indices are zero-based, buffer offsets are
+ ;; one-based.
+ (map-range-table
+ #'(lambda (begin end value)
+ (remove-range-table begin end ranges)
+ (put-range-table (1- begin) (1- end) value ranges))
+ ranges))
+ (values result ranges))))
;; Function docstring and API are taken from GNU coding.c version 1.353, GPLv2.
(defun unencodable-char-position (start end coding-system
diff -r a1a8728fec10 -r 4fc32a3a086e lisp/mule/general-late.el
--- a/lisp/mule/general-late.el Wed Feb 04 11:38:25 2009 +0000
+++ b/lisp/mule/general-late.el Wed Feb 04 12:14:38 2009 +0000
@@ -71,7 +71,7 @@
unicode-query-coding-skip-chars-arg
(eval-when-compile
- (when-fboundp #'map-charset-chars
+ (when-fboundp 'map-charset-chars
(loop
for charset in (charset-list)
with skip-chars-string = ""
@@ -80,17 +80,16 @@
(map-charset-chars
#'(lambda (begin end)
(loop
- while (/= end begin)
+ while (and begin (>= end begin))
do
(when (= -1 (char-to-unicode begin))
- (setq this-charset-works nil)
(return-from no-ucs-mapping))
(setq begin (int-to-char (1+ begin)))))
charset)
(setq skip-chars-string
(concat skip-chars-string
(charset-skip-chars-string charset))))
- finally return (skip-chars-quote skip-chars-string)))))
+ finally return skip-chars-string))))
;; At this point in the dump, all the charsets have been loaded. Now, load
;; their Unicode mappings.
--
¿Dónde estará ahora mi sobrino Yoghurtu Nghe, que tuvo que huir
precipitadamente de la aldea por culpa de la escasez de rinocerontes?
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches