User: aidan
Date: 05/04/04 12:25:37
Modified: packages/xemacs-packages/vm ChangeLog vm-mime.el
Log:
(Revised2) Have VM figure out what MIME charset to use by coverage.
Revision Changes Path
1.55 +30 -0 XEmacs/packages/xemacs-packages/vm/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/vm/ChangeLog,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -p -r1.54 -r1.55
--- ChangeLog 2004/12/08 01:31:57 1.54
+++ ChangeLog 2005/04/04 10:25:36 1.55
@@ -1,3 +1,33 @@
+2005-04-04 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * vm-mime.el (vm-coding-system-priorities): New.
+ * vm-mime.el (vm-get-coding-system-priorities): New.
+ Add a list of preferred Mule coding systems for outgoing mail;
+ write a function to access it, to allow us to have reasonable
+ defaults--based on various packages' loaded state--in the event of
+ it being nil.
+
+ * vm-mime.el (vm-mime-ucs-list): New.
+ * vm-mime.el (vm-get-mime-ucs-list): New.
+ Add a list of Mule coding systems that can encode every character;
+ use a function to access them, so we can check for utf-8 being
+ available on lookup.
+
+ * vm-mime.el (vm-update-mime-charset-maps): New.
+ Function to update the mule coding system -> Mime character set
+ maps, called after load for un-define and latin-unity.
+
+ * vm-mime.el (vm-mime-charset-decode-region): If the TTY coding
+ system we're using can display a MIME charset, go ahead and decode
+ the message.
+ * vm-mime.el (vm-determine-proper-charset): Use latin-unity if
+ available, and other logic if not, to work out the appropriate
+ MIME character set to use for a region.
+ * vm-mime.el (vm-mime-tty-can-display-mime-charset): New.
+ * vm-mime.el (vm-mime-charset-internally-displayable-p): Add a
+ docstring, use vm-mime-tty-can-display-mime-charset now it's
+ available.
+
2004-12-07 Ben Wing <ben(a)xemacs.org>
* Makefile (EARLY_GENERATED_LISP):
1.30 +348 -30 XEmacs/packages/xemacs-packages/vm/vm-mime.el
Index: vm-mime.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/vm/vm-mime.el,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -p -r1.29 -r1.30
--- vm-mime.el 2003/09/03 00:43:45 1.29
+++ vm-mime.el 2005/04/04 10:25:37 1.30
@@ -33,6 +33,86 @@
(put 'vm-mime-error 'error-conditions '(vm-mime-error error))
(put 'vm-mime-error 'error-message "MIME error"))
+;; A lot of the more complicated MIME character set processing is only
+;; practical under MULE.
+
+(eval-when-compile
+ (defvar vm-coding-system-priorities)
+ (defvar vm-mime-ucs-list)
+ (defvar latin-unity-ucs-list))
+
+(defcustom vm-coding-system-priorities nil
+ "*List of coding systems for VM to use, for outgoing mail, in order of
+preference.
+
+If you find that your outgoing mail is being encoded in `iso-2022-jp' and
+you'd prefer something more widely used outside of Japan be used instead,
+you could load the `latin-unity' and `un-define' libraries under XEmacs
+21.4, and intialize this list to something like `(iso-8859-1 iso-8859-15
+utf-8)'. ")
+
+(defun vm-get-coding-system-priorities ()
+ "Return the value of `vm-coding-system-priorities', or a reasonable
+default for it if it's nil. "
+ (if vm-coding-system-priorities
+ vm-coding-system-priorities
+ (let ((res '(iso-8859-1 iso-8859-2 iso-8859-15 iso-8859-16 utf-8)))
+ (dolist (list-item res)
+ ;; Assumes iso-8859-1 is always available, which is reasonable.
+ (unless (coding-system-p (find-coding-system list-item))
+ (delq list-item res)))
+ res)))
+
+(defcustom vm-mime-ucs-list nil
+ "*List of coding systems that can encode all chars emacs knows.")
+
+(defun vm-get-mime-ucs-list ()
+ "Return the value of `vm-mime-ucs-list', or a reasonable default for it if
+it's nil. This is used instead of `vm-mime-ucs-list' directly in order to
+allow runtime checks for optional features like `mule-ucs' or
+`latin-unity'. "
+ (if vm-mime-ucs-list
+ vm-mime-ucs-list
+ (if (featurep 'latin-unity)
+ latin-unity-ucs-list
+ (if (coding-system-p (find-coding-system 'utf-8))
+ '(utf-8 iso-2022-jp ctext escape-quoted)
+ '(iso-2022-jp ctext escape-quoted)))))
+
+(defun vm-update-mime-charset-maps ()
+ "Check for the presence of certain Mule coding systems, and add
+information about the corresponding MIME character sets to VM's
+configuration. "
+ ;; Add some extra charsets that may not have been defined onto the end
+ ;; of vm-mime-mule-charset-to-coding-alist.
+ (mapcar (lambda (x)
+ (and (coding-system-p (find-coding-system x))
+ ;; Not using vm-string-assoc because of some quoting
+ ;; weirdness it's doing.
+ (if (not (assoc
+ (format "%s" x)
+ vm-mime-mule-charset-to-coding-alist))
+ (add-to-list 'vm-mime-mule-charset-to-coding-alist
+ (list (format "%s" x) x)))))
+ '(utf-8 iso-8859-15 iso-8859-14 iso-8859-16))
+
+ ;; And make sure that the map back from coding-systems is good for
+ ;; those charsets.
+ (mapcar (lambda (x)
+ (or (assoc (car (cdr x)) vm-mime-mule-coding-to-charset-alist)
+ (add-to-list 'vm-mime-mule-coding-to-charset-alist
+ (list (car (cdr x)) (car x)))))
+ vm-mime-mule-charset-to-coding-alist)
+ ;; Whoops, doesn't get picked up for some reason.
+ (add-to-list 'vm-mime-mule-coding-to-charset-alist
+ '(iso-8859-1 "iso-8859-1")))
+
+(when vm-xemacs-mule-p
+ (unless (coding-system-p (find-coding-system 'utf-8))
+ (eval-after-load "un-define" `(vm-update-mime-charset-maps)))
+ (unless (featurep 'latin-unity)
+ (eval-after-load "latin-unity" `(vm-update-mime-charset-maps))))
+
(defun vm-make-layout (&rest plist)
(vector
(plist-get plist 'type)
@@ -236,6 +316,7 @@
(cond ((or vm-xemacs-mule-p vm-fsfemacs-mule-p)
(if (or (and vm-xemacs-p (memq (device-type) '(x gtk mswindows)))
vm-fsfemacs-p
+ (vm-mime-tty-can-display-mime-charset charset)
nil)
(let ((buffer-read-only nil)
(cell (cdr (vm-string-assoc
@@ -1294,34 +1375,164 @@
(defvar buffer-file-coding-system)
+;; Possible further work; integrate with the FSF's unify-8859-on-encoding-mode
+;; stuff.
+
(defun vm-determine-proper-charset (beg end)
+ "Work out what MIME character set to use for sending a message.
+
+Uses `us-ascii' if the message is entirely ASCII compatible. If MULE is not
+available, and the message contains contains non-ASCII characters, consults
+the variable `vm-mime-8bit-composition-charset' or uses `iso-8859-1.' if
+that is nil.
+
+Under MULE, `vm-coding-system-priorities' is searched, in order, for a
+coding system that will encode all the characters in the message. If none is
+found, `iso-2022-jp' is used, which will preserve information for all the
+character sets of which Emacs is aware--at the expense of being incompatible
+with the recipient's software, if that recipient is outside of East Asia."
(save-excursion
(save-restriction
(narrow-to-region beg end)
- (catch 'done
- (goto-char (point-min))
(if (or vm-xemacs-mule-p
(and vm-fsfemacs-mule-p enable-multibyte-characters))
- (let ((charsets (delq 'ascii (vm-charsets-in-region
- (point-min) (point-max)))))
- (cond ((null charsets)
- "us-ascii")
- ((cdr charsets)
- (or (car (cdr
- (assq (vm-coding-system-name
- buffer-file-coding-system)
- vm-mime-mule-coding-to-charset-alist)))
- "iso-2022-jp"))
- (t
- (or (car (cdr
- (assoc
- (car charsets)
- vm-mime-mule-charset-to-charset-alist)))
- "unknown"))))
- (and (re-search-forward "[^\000-\177]" nil t)
- (throw 'done (or vm-mime-8bit-composition-charset
- "iso-8859-1")))
- (throw 'done vm-mime-7bit-composition-charset))))))
+ ;; Okay, we're on a MULE build.
+ (let ((charsets (delq 'ascii
+ (vm-charsets-in-region (point-min)
+ (point-max)))))
+ (cond
+ ;; No non-ASCII chars? Right, that makes it easy for us.
+ ((null charsets) "us-ascii")
+
+ ;; Check whether the buffer can be encoded using one of the
+ ;; vm-coding-system-priorities coding systems.
+ ((catch 'done
+
+ ;; We can't really do this intelligently unless latin-unity
+ ;; is available.
+ (if (featurep 'latin-unity)
+ (let ((csetzero charsets)
+ ;; Check what latin character sets are in the
+ ;; buffer.
+ (csets (latin-unity-representations-feasible-region
+ beg end))
+ (psets (latin-unity-representations-present-region
+ beg end))
+ (systems (vm-get-coding-system-priorities)))
+
+ ;; If one of the character sets is outside of latin
+ ;; unity's remit, check for a universal character
+ ;; set in vm-coding-system-priorities, and pass back
+ ;; the first one.
+ ;;
+ ;; Otherwise, there's no remapping that latin unity
+ ;; can do for us, and we should default to something
+ ;; iso-2022 based. (Since we're not defaulting to
+ ;; Unicode, at the moment.)
+
+ (while csetzero
+ (if (not (memq
+ (car csetzero) latin-unity-character-sets))
+ (let ((ucs-list (vm-get-mime-ucs-list))
+ (preapproved
+ (vm-get-coding-system-priorities)))
+ (while preapproved
+ (if (memq (car preapproved) ucs-list)
+ (throw 'done
+ (car (cdr (assq
+ (vm-coding-system-name
+ (car preapproved))
+ vm-mime-mule-coding-to-charset-alist)))))
+ (setq preapproved (cdr preapproved)))
+ ;; Nothing universal in the preapproved list.
+ (throw 'done nil)))
+ (setq csetzero (cdr csetzero)))
+
+ ;; Okay, we're able to remap using latin-unity. Do so.
+ (while systems
+ (let ((sys (latin-unity-massage-name (car systems)
+ 'buffer-default)))
+ (when (latin-unity-maybe-remap (point-min)
+ (point-max) sys
+ csets psets t)
+ (throw 'done (second (assq
+ (vm-coding-system-name sys)
+ vm-mime-mule-coding-to-charset-alist)))))
+ (setq systems (cdr systems)))
+ (throw 'done nil))
+
+ ;; Right, latin-unity isn't available. If there's only
+ ;; one non-ASCII character set in the region, and the
+ ;; corresponding coding system is on the preapproved
+ ;; list before the first universal character set, pass
+ ;; it back. Otherwise, if a universal character set is
+ ;; on the preapproved list, pass the first one of them
+ ;; back. Otherwise, pass back nil and use the
+ ;; "iso-2022-jp" entry below.
+
+ (let ((csetzero charsets)
+ (preapproved (vm-get-coding-system-priorities))
+ (ucs-list (vm-get-mime-ucs-list)))
+ (if (null (cdr csetzero))
+ (while preapproved
+ ;; If we encounter a universal character set on
+ ;; the preapproved list, pass it back.
+ (if (memq (car preapproved) ucs-list)
+ (throw 'done (second (assq
+ (vm-coding-system-name
+ (car preapproved))
+ vm-mime-mule-coding-to-charset-alist))))
+
+ ;; The preapproved entry isn't universal. Check if
+ ;; it's related to the single non-ASCII MULE
+ ;; charset in the buffer (that is, if the
+ ;; conceptually unordered MULE list of characters
+ ;; is based on a corresponding ISO character set,
+ ;; and thus the ordered ISO character set can
+ ;; encode all the characters in the MIME charset.)
+ ;;
+ ;; The string equivalence test is used because we
+ ;; don't have another mapping that is useful
+ ;; here. Nnngh.
+
+ (if (string=
+ (car (cdr (assoc (car csetzero)
+ vm-mime-mule-charset-to-charset-alist)))
+ (car (cdr (assoc (car preapproved)
+ vm-mime-mule-coding-to-charset-alist))))
+ (throw 'done
+ (car (cdr (assoc (car csetzero)
+ vm-mime-mule-charset-to-charset-alist)))))
+ (setq preapproved (cdr preapproved)))
+
+ ;; Okay, there's more than one MULE character set in
+ ;; the buffer. Check for a universal entry in the
+ ;; preapproved list; if it exists pass it back,
+ ;; otherwise fall through to the iso-2022-jp below,
+ ;; because nothing on the preapproved list is
+ ;; appropriate.
+
+ (while preapproved
+ ;; If we encounter a universal character set on
+ ;; the preapproved list, pass it back.
+ (when (memq (car preapproved) ucs-list)
+ (throw 'done (second (assq
+ (vm-coding-system-name
+ (car preapproved))
+ vm-mime-mule-coding-to-charset-alist))))
+ (setq preapproved (cdr preapproved)))))
+ (throw 'done nil))))
+ ;; Couldn't do any magic with vm-coding-system-priorities. Pass
+ ;; back a Japanese iso-2022 MIME character set.
+ (t "iso-2022-jp")))
+ ;; If we're non-MULE and there are eight bit characters, use a
+ ;; sensible default.
+ (goto-char (point-min))
+ (if (re-search-forward "[^\000-\177]" nil t)
+ (or vm-mime-8bit-composition-charset "iso-8859-1")
+ ;; We're non-MULE and there are purely 7bit characters in the
+ ;; region. Return vm-mime-7bit-c-c.
+ vm-mime-7bit-composition-charset)))))
(defun vm-determine-proper-content-transfer-encoding (beg end)
(save-excursion
@@ -4059,7 +4270,39 @@ LAYOUT is the MIME layout struct for the
(or (vm-mime-types-match "text" (car (vm-mm-layout-type layout)))
(vm-mime-types-match "message" (car (vm-mm-layout-type layout)))))
+
+(defun vm-mime-tty-can-display-mime-charset (name)
+ "Can the current TTY correctly display the given MIME character set?"
+ (and (fboundp 'console-tty-output-coding-system)
+ ;; Is this check too paranoid?
+ (coding-system-p (console-tty-output-coding-system))
+ (let
+ ;; Nnngh, latin-unity-base-name isn't doing the right thing for
+ ;; me with MULE-UCS and UTF-8 as the terminal coding system. Of
+ ;; course, it's not evident that it _can_ do the right thing.
+ ;;
+ ;; The intention is that ourtermcs is the version of the
+ ;; coding-system without line-ending information attached to its
+ ;; end.
+ ((ourtermcs (or (car
+ (coding-system-get
+ (console-tty-output-coding-system)
+ 'alias-coding-systems))
+ (console-tty-output-coding-system))))
+ (or (eq ourtermcs (car
+ (cdr
+ (vm-string-assoc
+ name vm-mime-mule-charset-to-coding-alist))))
+ ;; The vm-mime-mule-charset-to-coding-alist check is to make
+ ;; sure it does the right thing with a nonsense MIME character
+ ;; set name.
+ (and (memq ourtermcs vm-mime-ucs-list)
+ (vm-string-assoc name vm-mime-mule-charset-to-coding-alist)
+ t)
+ (vm-mime-default-face-charset-p name)))))
+
(defun vm-mime-charset-internally-displayable-p (name)
+ "Can the given MIME charset be displayed within emacs by by VM?"
(cond ((and vm-xemacs-mule-p (memq (device-type) '(x gtk mswindows)))
(or (vm-string-assoc name vm-mime-mule-charset-to-coding-alist)
(vm-mime-default-face-charset-p name)))
@@ -4069,6 +4312,13 @@ LAYOUT is the MIME layout struct for the
((vm-multiple-fonts-possible-p)
(or (vm-mime-default-face-charset-p name)
(vm-string-assoc name vm-mime-charset-font-alist)))
+
+ ;; If the terminal-coding-system variable is set to something that
+ ;; can encode all the characters of the given MIME character set,
+ ;; then we can display any message in the given MIME character set
+ ;; internally.
+
+ ((vm-mime-tty-can-display-mime-charset name))
(t
(vm-mime-default-face-charset-p name))))
@@ -4975,10 +5225,32 @@ and the approriate content-type and boun
(setq buffer-undo-list (primitive-undo 1 buffer-undo-list))))))
(defvar enriched-mode)
+
+;; Non-XEmacs specific changes to this function should be made to
+;; vm-mime-fsfemacs-encode-composition as well.
-;; Non-XEmacs specific changes to this function should be
-;; made to vm-mime-fsfemacs-encode-composition as well.
(defun vm-mime-xemacs-encode-composition ()
+ "Encode the current message using MIME.
+
+The Multipurpose Internet Message Extensions extend the original format of
+Internet mail to allow non-US-ASCII textual messages, non-textual messages,
+multipart message bodies, and non-US-ASCII information in message headers.
+
+This function chooses the MIME character set(s) to use, and transforms the
+message content from the XEmacs-internal encoding to the corresponding
+octets in that MIME character set.
+
+It then applies some transfer encoding to the message. For details of the
+transfer encodings available, see the documentation for
+`vm-mime-8bit-text-transfer-encoding.'
+
+Finally, it creates the headers that are necessary to identify the message
+as one that uses MIME.
+
+Under MULE, it explicitly sets `buffer-file-coding-system' to a binary
+(no-transformation) coding system, to avoid further transformation of the
+message content when it's passed to the MTA (that is, the mail transfer
+agent; under Unix, normally sendmail.)"
(save-restriction
(widen)
(if (not (eq major-mode 'mail-mode))
@@ -4992,6 +5264,8 @@ and the approriate content-type and boun
forward-local-refs already-mimed layout e e-list boundary
type encoding charset params description disposition object
opoint-min)
+ ;; Make sure we don't double encode UTF-8 (for example) text.
+ (setq buffer-file-coding-system (vm-binary-coding-system))
(mail-text)
(setq e-list (extent-list nil (point) (point-max))
e-list (vm-delete (function
@@ -5020,11 +5294,26 @@ and the approriate content-type and boun
(if enriched
(let ((enriched-initial-annotation ""))
(enriched-encode (point-min) (point-max))))
+
(setq charset (vm-determine-proper-charset (point-min)
(point-max)))
(if vm-xemacs-mule-p
- (encode-coding-region (point-min) (point-max)
- buffer-file-coding-system))
+ (encode-coding-region
+ (point-min) (point-max)
+
+ ;; What about the case where vm-m-m-c-t-c-a doesn't have an
+ ;; entry for the given charset? That shouldn't happen, if
+ ;; vm-mime-mule-coding-to-charset-alist and
+ ;; vm-mime-mule-charset-to-coding-alist have complete and
+ ;; matching entries. Admittedly this last is not a
+ ;; given. Should we make it so on startup? (By setting the
+ ;; key for any missing entries in
+ ;; vm-mime-mule-coding-to-charset-alist to being (format
+ ;; "%s" coding-system), if necessary.)
+
+ (car (cdr (vm-string-assoc
+ charset vm-mime-mule-charset-to-coding-alist)))))
+
(setq encoding (vm-determine-proper-content-transfer-encoding
(point-min)
(point-max))
@@ -5058,8 +5347,23 @@ and the approriate content-type and boun
(setq charset (vm-determine-proper-charset (point-min)
(point-max)))
(if vm-xemacs-mule-p
- (encode-coding-region (point-min) (point-max)
- buffer-file-coding-system))
+ (encode-coding-region
+ (point-min) (point-max)
+
+ ;; What about the case where vm-m-m-c-t-c-a doesn't have an
+ ;; entry for the given charset? That shouldn't happen, if
+ ;; vm-mime-mule-coding-to-charset-alist and
+ ;; vm-mime-mule-charset-to-coding-alist have complete and
+
+ ;; matching entries. Admittedly this last is not a
+ ;; given. Should we make it so on startup? (By setting the
+ ;; key for any missing entries in
+ ;; vm-mime-mule-coding-to-charset-alist to being (format
+ ;; "%s" coding-system), if necessary.)
+
+ (car (cdr (vm-string-assoc
+ charset vm-mime-mule-charset-to-coding-alist)))))
+
(setq encoding (vm-determine-proper-content-transfer-encoding
(point-min)
(point-max))
@@ -5220,8 +5524,22 @@ and the approriate content-type and boun
(setq charset (vm-determine-proper-charset (point)
(point-max)))
(if vm-xemacs-mule-p
- (encode-coding-region (point) (point-max)
- buffer-file-coding-system))
+ (encode-coding-region
+ (point) (point-max)
+
+ ;; What about the case where vm-m-m-c-t-c-a doesn't have an
+ ;; entry for the given charset? That shouldn't happen, if
+ ;; vm-mime-mule-coding-to-charset-alist and
+ ;; vm-mime-mule-charset-to-coding-alist have complete and
+ ;; matching entries. Admittedly this last is not a
+ ;; given. Should we make it so on startup? (By setting the
+ ;; key for any missing entries in
+ ;; vm-mime-mule-coding-to-charset-alist to being (format "%s"
+ ;; coding-system), if necessary.)
+
+ (car (cdr (vm-string-assoc
+ charset vm-mime-mule-charset-to-coding-alist)))))
+
(setq encoding (vm-determine-proper-content-transfer-encoding
(point)
(point-max))