changeset: 4549:68d1ca56cffadada6a0026664ef87ae7a5b1a0b8
parent: 4402:e70cc8a90e9043075a0e40fe9587d2c931fb618e
user: Aidan Kehoe <kehoea(a)parhasard.net>
date: Mon Jan 21 22:51:21 2008 +0100
files: lisp/ChangeLog lisp/coding.el lisp/mule/general-late.el
lisp/mule/mule-charset.el lisp/mule/mule-coding.el lisp/unicode.el
description:
First part of interactive checks that coding systems encode regions.
2008-01-21 Aidan Kehoe <kehoea(a)parhasard.net>
* coding.el (decode-coding-string):
(encode-coding-string): Accept GNU's NOCOPY argument for
these. Todo; write compiler macros to use it.
(query-coding-warning-face): New face, to show unencodable
characters.
(default-query-coding-region-safe-charset-skip-chars-map):
New variable, a cache used by #'default-query-coding-region.
(default-query-coding-region): Default implementation of
#'query-coding-region, using the safe-charsets and safe-chars
coding systemproperties.
(query-coding-region): New function; can a given coding system
encode a given region?
(query-coding-string): New function; can a given coding system
encode a given string?
(unencodable-char-position): Function API taken from GNU; return
the first unencodable position given a string and coding system.
(encode-coding-char): Function API taken from GNU; return CHAR
encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash
CHAR.
((unless (featurep 'mule)): Override the default
query-coding-region implementation on non-Mule.
* mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a
duplicate comment.
(make-8-bit-choose-category): Simplify implementation.
(8-bit-fixed-query-coding-region): Implementation of
#'query-coding-region for coding systems created with
#'make-8-bit-coding-system.
(make-8-bit-coding-system): Initialise the #'query-coding-region
implementation for these character sets.
(make-8-bit-coding-system): Ditto for the compiler macro version
of this function.
* unicode.el (unicode-query-coding-skip-chars-arg): New variable,
used by unicode-query-coding-region, initialised in
mule/general-late.el.
(unicode-query-coding-region): New function, the
#'query-coding-region implementation for Unicode coding systems.
Initialise the query-coding-function property for the Unicode
coding systems to #'unicode-query-coding-region.
* mule/mule-charset.el (charset-skip-chars-string): New
function. Return a #'skip-chars-forward argument that skips all
characters in CHARSET.
(map-charset-chars): Function synced from GNU, modified to work
with XEmacs. Map FUNC across the int value charset ranges of
CHARSET.
diff -r e70cc8a90e9043075a0e40fe9587d2c931fb618e -r
68d1ca56cffadada6a0026664ef87ae7a5b1a0b8 lisp/ChangeLog
--- a/lisp/ChangeLog Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/ChangeLog Mon Jan 21 22:51:21 2008 +0100
@@ -1,3 +1,50 @@ 2008-01-17 Mike Sperber <mike(a)xemacs.o
+2008-01-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * coding.el (decode-coding-string):
+ (encode-coding-string): Accept GNU's NOCOPY argument for
+ these. Todo; write compiler macros to use it.
+ (query-coding-warning-face): New face, to show unencodable
+ characters.
+ (default-query-coding-region-safe-charset-skip-chars-map):
+ New variable, a cache used by #'default-query-coding-region.
+ (default-query-coding-region): Default implementation of
+ #'query-coding-region, using the safe-charsets and safe-chars
+ coding systemproperties.
+ (query-coding-region): New function; can a given coding system
+ encode a given region?
+ (query-coding-string): New function; can a given coding system
+ encode a given string?
+ (unencodable-char-position): Function API taken from GNU; return
+ the first unencodable position given a string and coding system.
+ (encode-coding-char): Function API taken from GNU; return CHAR
+ encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash
+ CHAR.
+ ((unless (featurep 'mule)): Override the default
+ query-coding-region implementation on non-Mule.
+ * mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a
+ duplicate comment.
+ (make-8-bit-choose-category): Simplify implementation.
+ (8-bit-fixed-query-coding-region): Implementation of
+ #'query-coding-region for coding systems created with
+ #'make-8-bit-coding-system.
+ (make-8-bit-coding-system): Initialise the #'query-coding-region
+ implementation for these character sets.
+ (make-8-bit-coding-system): Ditto for the compiler macro version
+ of this function.
+ * unicode.el (unicode-query-coding-skip-chars-arg): New variable,
+ used by unicode-query-coding-region, initialised in
+ mule/general-late.el.
+ (unicode-query-coding-region): New function, the
+ #'query-coding-region implementation for Unicode coding systems.
+ Initialise the query-coding-function property for the Unicode
+ coding systems to #'unicode-query-coding-region.
+ * mule/mule-charset.el (charset-skip-chars-string): New
+ function. Return a #'skip-chars-forward argument that skips all
+ characters in CHARSET.
+ (map-charset-chars): Function synced from GNU, modified to work
+ with XEmacs. Map FUNC across the int value charset ranges of
+ CHARSET.
+
2008-01-17 Mike Sperber <mike(a)xemacs.org>
* files.el (insert-directory): Bind `coding-system-for-read' to
diff -r e70cc8a90e9043075a0e40fe9587d2c931fb618e -r
68d1ca56cffadada6a0026664ef87ae7a5b1a0b8 lisp/coding.el
--- a/lisp/coding.el Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/coding.el Mon Jan 21 22:51:21 2008 +0100
@@ -125,15 +125,20 @@ from a Lisp program, use `detect-coding-
(interactive "r\nP")
(princ (detect-coding-region start end)))
-(defun decode-coding-string (str coding-system)
+(defun decode-coding-string (str coding-system &optional nocopy)
"Decode the string STR which is encoded in CODING-SYSTEM.
-Does not modify STR. Returns the decoded string on successful conversion."
+Normally does not modify STR. Returns the decoded string on
+successful conversion.
+Optional argument NOCOPY says that modifying STR and returning it is
+allowed."
(with-string-as-buffer-contents
str (decode-coding-region (point-min) (point-max) coding-system)))
-(defun encode-coding-string (str coding-system)
+(defun encode-coding-string (str coding-system &optional nocopy)
"Encode the string STR using CODING-SYSTEM.
-Does not modify STR. Returns the encoded string on successful conversion."
+Does not modify STR. Returns the encoded string on successful conversion.
+Optional argument NOCOPY says that the original string may be returned
+if does not differ from the encoded string. "
(with-string-as-buffer-contents
str (encode-coding-region (point-min) (point-max) coding-system)))
@@ -274,4 +279,204 @@ Does not modify STR. Returns the encode
(make-compatible-variable 'enable-multibyte-characters "Unimplemented")
+;; Sure would be nice to be able to use defface here.
+(copy-face 'highlight 'query-coding-warning-face)
+
+(defvar default-query-coding-region-safe-charset-skip-chars-map
+ #s(hash-table test equal data ())
+ "A map from list of charsets to `skip-chars-forward' arguments for
them.")
+
+(defun default-query-coding-region (begin end coding-system
+ &optional buffer errorp highlightp)
+ "The default `query-coding-region' implementation.
+
+Uses the `safe-charsets' and `safe-chars' coding system properties.
+The former is a list of XEmacs character sets that can be safely
+encoded by CODING-SYSTEM; the latter a char table describing, in
+addition, characters that can be safely encoded by CODING-SYSTEM."
+ (check-argument-type #'coding-system-p
+ (setq coding-system (find-coding-system coding-system)))
+ (check-argument-type #'integer-or-marker-p begin)
+ (check-argument-type #'integer-or-marker-p end)
+ (let* ((safe-charsets
+ (coding-system-get coding-system 'safe-charsets))
+ (safe-chars (coding-system-get coding-system 'safe-chars))
+ (skip-chars-arg
+ (gethash safe-charsets
+ default-query-coding-region-safe-charset-skip-chars-map))
+ (ranges (make-range-table))
+ fail-range-start fail-range-end previous-fail char-after
+ looking-at-arg failed extent)
+ (unless skip-chars-arg
+ (setq skip-chars-arg
+ (puthash safe-charsets
+ (mapconcat #'charset-skip-chars-string
+ safe-charsets "")
+ default-query-coding-region-safe-charset-skip-chars-map)))
+ (if (and (zerop (length skip-chars-arg)) (null safe-chars))
+ (progn
+ ;; Uh-oh, nothing known about this coding system. Fail.
+ (when errorp
+ (error 'text-conversion-error
+ "Coding system doesn't say what it can encode"
+ (coding-system-name coding-system)))
+ (put-range-table begin end t ranges)
+ (when highlightp
+ (setq extent (make-extent begin end buffer))
+ (set-extent-priority extent (+ mouse-highlight-priority 2))
+ (set-extent-face extent 'query-coding-warning-face))
+ (values nil ranges))
+ (setq looking-at-arg (if (equal "" skip-chars-arg)
+ ;; Regexp that will never match.
+ #r".\{0,0\}"
+ (concat "[" skip-chars-arg "]")))
+ (save-excursion
+ (goto-char begin buffer)
+ (skip-chars-forward skip-chars-arg end buffer)
+ (while (< (point buffer) end)
+ (message
+ "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+ fail-range-start previous-fail (point buffer) end)
+ (setq char-after (char-after (point buffer) buffer)
+ fail-range-start (point buffer))
+ (while (and
+ (< (point buffer) end)
+ (not (looking-at looking-at-arg))
+ (or (not safe-chars)
+ (not (get-char-table char-after safe-chars))))
+ (forward-char 1 buffer)
+ (setq char-after (char-after (point buffer) buffer)
+ failed t))
+ (if (= fail-range-start (point buffer))
+ ;; The character can actually be encoded by the coding
+ ;; system; check the characters past it.
+ (forward-char 1 buffer)
+ ;; Can't be encoded; note this.
+ (when errorp
+ (error 'text-conversion-error
+ (format "Cannot encode %s using coding system"
+ (buffer-substring fail-range-start (point buffer)
+ buffer))
+ (coding-system-name coding-system)))
+ (put-range-table fail-range-start
+ ;; If char-after is non-nil, we're not at
+ ;; the end of the buffer.
+ (setq fail-range-end (if char-after
+ (point buffer)
+ (point-max buffer)))
+ t ranges)
+ (when highlightp
+ (setq extent (make-extent fail-range-start fail-range-end buffer))
+ (set-extent-priority extent (+ mouse-highlight-priority 2))
+ (set-extent-face extent 'query-coding-warning-face)))
+ (skip-chars-forward skip-chars-arg end buffer))
+ (if failed
+ (values nil ranges)
+ (values t nil))))))
+
+(defsubst query-coding-region (start end coding-system &optional buffer
+ errorp highlight)
+ "Work out whether CODING-SYSTEM can losslessly encode a region.
+
+START and END are the beginning and end of the region to check.
+CODING-SYSTEM is the coding system to try.
+
+Optional argument BUFFER is the buffer to check, and defaults to the current
+buffer. Optional argument ERRORP says to signal a `text-conversion-error'
+if some character in the region cannot be encoded, and defaults to nil.
+
+Optional argument HIGHLIGHT says to display unencodable characters in the
+region using `query-coding-warning-face'. It defaults to nil.
+
+This function returns a list; the intention is that callers use use
+`multiple-value-bind' or the related CL multiple value functions to deal
+with it. The first element is `t' if the string can be encoded using
+CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string
+can be encoded using CODING-SYSTEM; otherwise, it is a range table
+describing the positions of the unencodable characters. See
+`make-range-table'."
+ (funcall (or (coding-system-get coding-system 'query-coding-function)
+ #'default-query-coding-region)
+ start end coding-system buffer errorp highlight))
+
+(defun query-coding-string (string coding-system &optional errorp highlight)
+ "Work out whether CODING-SYSTEM can losslessly encode STRING.
+CODING-SYSTEM is the coding system to check.
+
+Optional argument ERRORP says to signal a `text-conversion-error' if some
+character in the region cannot be encoded, and defaults to nil.
+
+Optional argument HIGHLIGHT says to display unencodable characters in the
+region using `query-coding-warning-face'. It defaults to nil.
+
+This function returns a list; the intention is that callers use use
+`multiple-value-bind' or the related CL multiple value functions to deal
+with it. The first element is `t' if the string can be encoded using
+CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string
+can be encoded using CODING-SYSTEM; otherwise, it is a range table
+describing the positions of the unencodable characters. See
+`make-range-table'."
+ (with-temp-buffer
+ (insert string)
+ (query-coding-region (point-min) (point-max) coding-system (current-buffer)
+ ;; ### Will highlight work here?
+ errorp highlight)))
+
+(defun unencodable-char-position (start end coding-system
+ &optional count string)
+ "Return position of first un-encodable character in a region.
+START and END specify the region and CODING-SYSTEM specifies the
+encoding to check. Return nil if CODING-SYSTEM does encode the region.
+
+If optional 4th argument COUNT is non-nil, it specifies at most how
+many un-encodable characters to search. In this case, the value is a
+list of positions.
+
+If optional 5th argument STRING is non-nil, it is a string to search
+for un-encodable characters. In that case, START and END are indexes
+in the string."
+ (flet ((thunk ()
+ (multiple-value-bind (result ranges)
+ (query-coding-region start end coding-system)
+ (if result
+ ;; If query-coding-region thinks the entire region is
+ ;; encodable, result will be t, and the thunk should
+ ;; return nil, because there are no unencodable
+ ;; positions in the region.
+ nil
+ (if count
+ (block counted
+ (map-range-table
+ #'(lambda (begin end value)
+ (while (and (<= begin end) (<= begin count))
+ (push begin result)
+ (incf begin))
+ (if (> begin count) (return-from counted)))
+ ranges))
+ (map-range-table
+ #'(lambda (begin end value)
+ (while (<= begin end)
+ (push begin result)
+ (incf begin))) ranges))
+ result))))
+ (if string
+ (with-temp-buffer (insert string) (thunk))
+ (thunk))))
+
+(defun encode-coding-char (char coding-system)
+ "Encode CHAR by CODING-SYSTEM and return the resulting string.
+If CODING-SYSTEM can't safely encode CHAR, return nil."
+ (check-argument-type #'characterp char)
+ (multiple-value-bind (succeededp)
+ (query-coding-string char coding-system)
+ (when succeededp
+ (encode-coding-string char coding-system))))
+
+(unless (featurep 'mule)
+ ;; If we're under non-Mule, every XEmacs character can be encoded
+ ;; with every XEmacs coding system.
+ (fset #'default-query-coding-region
+ #'(lambda (&rest ignored) (values t nil)))
+ (unintern 'default-query-coding-region-safe-charset-skip-chars-map))
+
;;; coding.el ends here
diff -r e70cc8a90e9043075a0e40fe9587d2c931fb618e -r
68d1ca56cffadada6a0026664ef87ae7a5b1a0b8 lisp/mule/general-late.el
--- a/lisp/mule/general-late.el Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/mule/general-late.el Mon Jan 21 22:51:21 2008 +0100
@@ -63,7 +63,34 @@
(decode-coding-string
Installation-string
Installation-file-coding-system)
- Installation-string))
+ Installation-string)
+
+ ;; Convince the byte compiler that, really, this file can't be encoded
+ ;; as binary. Ugh.
+ system-type (symbol-value (intern "\u0073ystem-type"))
+
+ unicode-query-coding-skip-chars-arg
+ (eval-when-compile
+ (when-fboundp #'map-charset-chars
+ (loop
+ for charset in (charset-list)
+ with skip-chars-string = ""
+ do
+ (block no-ucs-mapping
+ (map-charset-chars
+ #'(lambda (begin end)
+ (loop
+ while (/= end begin)
+ do
+ (when (= -1 (char-to-unicode begin))
+ (setq this-charset-works nil)
+ (return-from no-ucs-mapping))
+ (setq begin (int-to-char (1+ begin)))))
+ charset)
+ (setq skip-chars-string
+ (concat skip-chars-string
+ (charset-skip-chars-string charset))))
+ finally return skip-chars-string))))
;; At this point in the dump, all the charsets have been loaded. Now, load
;; their Unicode mappings.
diff -r e70cc8a90e9043075a0e40fe9587d2c931fb618e -r
68d1ca56cffadada6a0026664ef87ae7a5b1a0b8 lisp/mule/mule-charset.el
--- a/lisp/mule/mule-charset.el Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/mule/mule-charset.el Mon Jan 21 22:51:21 2008 +0100
@@ -116,6 +116,65 @@ See `make-charset'."
(defun charset-bytes (charset)
"Useless in XEmacs, returns 1."
1)
+
+(defun charset-skip-chars-string (charset)
+ "Given CHARSET, return a string suitable for for `skip-chars-forward'.
+Passing the string to `skip-chars-forward' will cause it to skip all
+characters in CHARSET."
+ (setq charset (get-charset charset))
+ (cond
+ ;; Aargh, the general algorithm doesn't work for these charsets, because
+ ;; make-char strips the high bit. Hard code them.
+ ((eq (find-charset 'ascii) charset) "\x00-\x7f")
+ ((eq (find-charset 'control-1) charset) "\x80-\x9f")
+ (t
+ (let (charset-lower charset-upper row-upper row-lower)
+ (if (= 1 (charset-dimension charset))
+ (condition-case args-out-of-range
+ (make-char charset #x100)
+ (args-out-of-range
+ (setq charset-lower (third args-out-of-range)
+ charset-upper (fourth args-out-of-range))
+ (format "%c-%c"
+ (make-char charset charset-lower)
+ (make-char charset charset-upper))))
+ (condition-case args-out-of-range
+ (make-char charset #x100 #x22)
+ (args-out-of-range
+ (setq row-lower (third args-out-of-range)
+ row-upper (fourth args-out-of-range))))
+ (condition-case args-out-of-range
+ (make-char charset #x22 #x100)
+ (args-out-of-range
+ (setq charset-lower (third args-out-of-range)
+ charset-upper (fourth args-out-of-range))))
+ (format "%c-%c"
+ (make-char charset row-lower charset-lower)
+ (make-char charset row-upper charset-upper)))))))
+;; From GNU.
+(defun map-charset-chars (func charset)
+ "Use FUNC to map over all characters in CHARSET for side effects.
+FUNC is a function of two args, the start and end (inclusive) of a
+character code range. Thus FUNC should iterate over [START, END]."
+ (check-argument-type #'functionp func)
+ (check-argument-type #'charsetp (setq charset (find-charset charset)))
+ (let* ((dim (charset-dimension charset))
+ (chars (charset-chars charset))
+ (start (if (= chars 94)
+ 33
+ 32)))
+ (if (= dim 1)
+ (cond
+ ((eq (find-charset 'ascii) charset) (funcall func ?\x00 ?\x7f))
+ ((eq (find-charset 'control-1) charset) (funcall func ?\x80 ?\x9f))
+ (t
+ (funcall func
+ (make-char charset start)
+ (make-char charset (+ start chars -1)))))
+ (dotimes (i chars)
+ (funcall func
+ (make-char charset (+ i start) start)
+ (make-char charset (+ i start) (+ start chars -1)))))))
;;;; Define setf methods for all settable Charset properties
diff -r e70cc8a90e9043075a0e40fe9587d2c931fb618e -r
68d1ca56cffadada6a0026664ef87ae7a5b1a0b8 lisp/mule/mule-coding.el
--- a/lisp/mule/mule-coding.el Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/mule/mule-coding.el Mon Jan 21 22:51:21 2008 +0100
@@ -238,8 +238,6 @@ returns a list corresponding to such a c
(if (r0 == ,(charset-id 'ascii))
(write r1)
((if (r0 == #xABAB)
- ;; #xBFFE is a sentinel in the compiled
- ;; program.
;; #xBFFE is a sentinel in the compiled
;; program.
((r0 = r1 & #x7F)
@@ -531,12 +529,85 @@ disk to XEmacs characters for some fixed
disk to XEmacs characters for some fixed-width 8-bit coding system. "
(check-argument-type #'vectorp decode-table)
(check-argument-range (length decode-table) #x100 #x100)
- (block category
- (loop
- for i from #x80 to #xBF
- do (unless (= i (aref decode-table i))
- (return-from category 'no-conversion)))
- 'iso-8-1))
+ (loop
+ named category
+ for i from #x80 to #xBF
+ do (unless (= i (aref decode-table i))
+ (return-from category 'no-conversion))
+ finally return 'iso-8-1))
+
+(defun 8-bit-fixed-query-coding-region (begin end coding-system
+ &optional buffer errorp highlightp)
+ "The `query-coding-region' implementation for 8-bit-fixed coding systems.
+
+Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars'
+coding system properties. The former is a hash table mapping from valid
+Unicode code points to on-disk octets in the coding system; the latter a set
+of characters as used by `skip-chars-forward'. Both of these properties are
+generated automatically by `make-8-bit-coding-system'.
+
+See that the documentation of `query-coding-region'; see also
+`make-8-bit-coding-system'. "
+ (check-argument-type #'coding-system-p
+ (setq coding-system (find-coding-system coding-system)))
+ (check-argument-type #'integer-or-marker-p begin)
+ (check-argument-type #'integer-or-marker-p end)
+ (let ((from-unicode
+ (coding-system-get coding-system '8-bit-fixed-query-from-unicode))
+ (skip-chars-arg
+ (coding-system-get coding-system '8-bit-fixed-query-skip-chars))
+ (ranges (make-range-table))
+ char-after fail-range-start fail-range-end previous-fail extent
+ failed)
+ (check-type from-unicode hash-table)
+ (check-type skip-chars-arg string)
+ (save-excursion
+ (goto-char begin buffer)
+ (skip-chars-forward skip-chars-arg end buffer)
+ (while (< (point buffer) end)
+ (message
+ "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+ fail-range-start previous-fail (point buffer) end)
+ (setq char-after (char-after (point buffer) buffer)
+ fail-range-start (point buffer))
+ (message "arguments are %S %S"
+ (< (point buffer) end)
+ (not (gethash (encode-char char-after 'ucs) from-unicode)))
+ (while (and
+ (< (point buffer) end)
+ (not (gethash (encode-char char-after 'ucs) from-unicode)))
+ (forward-char 1 buffer)
+ (setq char-after (char-after (point buffer) buffer)
+ failed t))
+ (if (= fail-range-start (point buffer))
+ ;; The character can actually be encoded by the coding
+ ;; system; check the characters past it.
+ (forward-char 1 buffer)
+ ;; The character actually failed.
+ (message "past the move through, point now %S" (point buffer))
+ (when errorp
+ (error 'text-conversion-error
+ (format "Cannot encode %s using coding system"
+ (buffer-substring fail-range-start (point buffeR)
+ buffer))
+ (coding-system-name coding-system)))
+ (put-range-table fail-range-start
+ ;; If char-after is non-nil, we're not at
+ ;; the end of the buffer.
+ (setq fail-range-end (if char-after
+ (point buffer)
+ (point-max buffer)))
+ t ranges)
+ (when highlightp
+ (message "highlighting")
+ (setq extent (make-extent fail-range-start fail-range-end buffer))
+ (set-extent-priority extent (+ mouse-highlight-priority 2))
+ (set-extent-face extent 'query-coding-warning-face))
+ (skip-chars-forward skip-chars-arg end buffer))
+ (message "about to give the result, ranges %S" ranges))
+ (if failed
+ (values nil ranges)
+ (values t nil)))))
;;;###autoload
(defun make-8-bit-coding-system (name unicode-map &optional description props)
@@ -618,13 +689,27 @@ the code for tilde `~'. "
(coding-system-put name '8-bit-fixed t)
(coding-system-put name 'category
(make-8-bit-choose-category decode-table))
+ (coding-system-put name '8-bit-fixed-query-skip-chars
+ (apply #'string (append decode-table nil)))
+ (coding-system-put name '8-bit-fixed-query-from-unicode encode-table)
+
+ (coding-system-put name 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
+ (coding-system-put (intern (format "%s-unix" name))
+ 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
+ (coding-system-put (intern (format "%s-dos" name))
+ 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
+ (coding-system-put (intern (format "%s-mac" name))
+ 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
(loop for alias in aliases
do (define-coding-system-alias alias name))
result))
(define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map
&optional description props)
-
;; We provide the compiler macro (= macro that is expanded only on
;; compilation, and that can punt to a runtime version of the
;; associate function if necessary) not for reasons of speed, though
@@ -674,8 +759,9 @@ the code for tilde `~'. "
;; (invalid-read-syntax "Multiply defined symbol label" 1)
;;
;; when the file is byte compiled.
- (case-fold-search t))
- (define-translation-hash-table encode-table-sym ,encode-table)
+ (case-fold-search t)
+ (encode-table ,encode-table))
+ (define-translation-hash-table encode-table-sym encode-table)
(make-coding-system
',name 'ccl ,description
(plist-put (plist-put ',props 'decode
@@ -688,8 +774,22 @@ the code for tilde `~'. "
(symbol-value 'encode-table-sym)))
',encode-program))))
(coding-system-put ',name '8-bit-fixed t)
- (coding-system-put ',name 'category ',
- (make-8-bit-choose-category decode-table))
+ (coding-system-put ',name 'category
+ ',(make-8-bit-choose-category decode-table))
+ (coding-system-put ',name '8-bit-fixed-query-skip-chars
+ ',(apply #'string (append decode-table nil)))
+ (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table)
+ (coding-system-put ',name 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
+ (coding-system-put ',(intern (format "%s-unix" name))
+ 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
+ (coding-system-put ',(intern (format "%s-dos" name))
+ 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
+ (coding-system-put ',(intern (format "%s-mac" name))
+ 'query-coding-function
+ #'8-bit-fixed-query-coding-region)
,(macroexpand `(loop for alias in ',aliases
do (define-coding-system-alias alias
',name)))
@@ -703,4 +803,3 @@ the code for tilde `~'. "
'(mnemonic "Latin 1"
documentation "The most used encoding of Western Europe and the Americas."
aliases (iso-latin-1 latin-1)))
-
diff -r e70cc8a90e9043075a0e40fe9587d2c931fb618e -r
68d1ca56cffadada6a0026664ef87ae7a5b1a0b8 lisp/unicode.el
--- a/lisp/unicode.el Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/unicode.el Mon Jan 21 22:51:21 2008 +0100
@@ -611,6 +611,71 @@ mapping from the error sequences to the
(translate-region start finish table))
begin end buffer))
+(defvar unicode-query-coding-skip-chars-arg nil ;; Set in general-late.el
+ "Used by `unicode-query-coding-region' to skip chars with known
mappings.")
+
+(defun unicode-query-coding-region (begin end coding-system
+ &optional buffer errorp highlightp)
+ "The `query-coding-region' implementation for Unicode coding systems."
+ (check-argument-type #'coding-system-p
+ (setq coding-system (find-coding-system coding-system)))
+ (check-argument-type #'integer-or-marker-p begin)
+ (check-argument-type #'integer-or-marker-p end)
+ (let* ((skip-chars-arg unicode-query-coding-skip-chars-arg)
+ (ranges (make-range-table))
+ (looking-at-arg (concat "[" skip-chars-arg "]"))
+ fail-range-start fail-range-end previous-fail char-after
+ failed extent)
+ (save-excursion
+ (goto-char begin buffer)
+ (skip-chars-forward skip-chars-arg end buffer)
+ (while (< (point buffer) end)
+ (message
+ "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+ fail-range-start previous-fail (point buffer) end)
+ (setq char-after (char-after (point buffer) buffer)
+ fail-range-start (point buffer))
+ (while (and
+ (< (point buffer) end)
+ (not (looking-at looking-at-arg))
+ (= -1 (char-to-unicode char-after)))
+ (forward-char 1 buffer)
+ (message "what?!?")
+ (setq char-after (char-after (point buffer) buffer)
+ failed t))
+ (if (= fail-range-start (point buffer))
+ ;; The character can actually be encoded by the coding
+ ;; system; check the characters past it.
+ (forward-char 1 buffer)
+ ;; Can't be encoded; note this.
+ (when errorp
+ (error 'text-conversion-error
+ (format "Cannot encode %s using coding system"
+ (buffer-substring fail-range-start (point buffer)
+ buffer))
+ (coding-system-name coding-system)))
+ (put-range-table fail-range-start
+ ;; If char-after is non-nil, we're not at
+ ;; the end of the buffer.
+ (setq fail-range-end (if char-after
+ (point buffer)
+ (point-max buffer)))
+ t ranges)
+ (when highlightp
+ (setq extent (make-extent fail-range-start fail-range-end buffer))
+ (set-extent-priority extent (+ mouse-highlight-priority 2))
+ (set-extent-face extent 'query-coding-warning-face)))
+ (skip-chars-forward skip-chars-arg end buffer))
+ (if failed
+ (values nil ranges)
+ (values t nil)))))
+
+(loop
+ for coding-system in (coding-system-list)
+ do (when (eq 'unicode (coding-system-type coding-system))
+ (coding-system-put coding-system 'query-coding-function
+ #'unicode-query-coding-region)))
+
(unless (featurep 'mule)
;; We do this in such a roundabout way--instead of having the above defun
;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches