This incorporates an upstream change I made to
Gnus--cf.
http://quimby.gnus.org/cgi-bin/cvsweb.cgi/gnus/lisp/mm-util.el.diff?r1=6....
,
http://tinyurl.com/46rzs , together with a non-nil default for
mm-coding-system-priorities, which is an ordered list of preferred MIME
character sets for a message.
xemacs-packages/gnus/ChangeLog addition:
2005-02-27 Aidan Kehoe <kehoea(a)parhasard.net>
* lisp/mm-util.el (mm-coding-system-priorities):
Give it a non-nil default value even when we're not in Japan,
containing the UTF-8 character set if that's available.
* lisp/mm-util.el (mm-xemacs-find-mime-charset-1): New.
* lisp/mm-util.el (mm-xemacs-find-mime-charset): New.
* lisp/mm-util.el (mm-find-mime-charset-region):
Incorporate upstream XEmacs-specific changes, use latin-unity to
work out which MIME character set is appropriate.
These two changes eliminate the "needs to be split into N charset
parts. Really post?" bug on the default configuration.
XEmacs Packages source patch:
Diff command: cvs -q diff -u
Files affected: xemacs-packages/gnus/lisp/mm-util.el
Index: xemacs-packages/gnus/lisp/mm-util.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/gnus/lisp/mm-util.el,v
retrieving revision 1.5
diff -u -u -r1.5 mm-util.el
--- xemacs-packages/gnus/lisp/mm-util.el 2004/09/28 02:21:14 1.5
+++ xemacs-packages/gnus/lisp/mm-util.el 2005/02/27 16:18:47
@@ -304,7 +304,11 @@
(cond ((string= lang "Japanese")
;; Japanese users may prefer iso-2022-jp to shift-jis.
'(iso-2022-jp iso-2022-jp-2 japanese-shift-jis
- iso-latin-1 utf-8)))))
+ iso-latin-1 utf-8))
+ ((find-coding-system 'utf-8)
+ '(iso-8859-1 iso-8859-2 iso-8859-15 utf-8))
+ (t
+ '(iso-8859-1 iso-8859-2 iso-8859-15 iso-2022-jp)))))
"Preferred coding systems for encoding outgoing messages.
More than one suitable coding system may be found for some text.
@@ -539,6 +543,83 @@
(> (length (memq a priorities))
(length (memq b priorities)))))
+(eval-when-compile
+ (autoload 'latin-unity-massage-name "latin-unity")
+ (autoload 'latin-unity-maybe-remap "latin-unity")
+ (autoload 'latin-unity-representations-feasible-region "latin-unity")
+ (autoload 'latin-unity-representations-present-region "latin-unity")
+ (defvar latin-unity-coding-systems)
+ (defvar latin-unity-ucs-list))
+
+(defun mm-xemacs-find-mime-charset-1 (begin end)
+ "Determine which MIME charset to use to send region as message.
+This uses the XEmacs-specific latin-unity package to better handle the
+case where identical characters from diverse ISO-8859-? character sets
+can be encoded using a single one of the corresponding coding systems.
+
+It treats `mm-coding-system-priorities' as the list of preferred
+coding systems; a useful example setting for this list in Western
+Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default
+to the very standard Latin 1 coding system, and only move to coding
+systems that are less supported as is necessary to encode the
+characters that exist in the buffer.
+
+Latin Unity doesn't know about those non-ASCII Roman characters that
+are available in various East Asian character sets. As such, its
+behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a
+buffer and it can otherwise be encoded as Latin 1, won't be ideal.
+But this is very much a corner case, so don't worry about it."
+ (let ((systems mm-coding-system-priorities) csets psets curset)
+
+ ;; Load the Latin Unity library, if available.
+ (when (and (not (featurep 'latin-unity)) (locate-library
"latin-unity"))
+ (require 'latin-unity))
+
+ ;; Now, can we use it?
+ (if (featurep 'latin-unity)
+ (progn
+ (setq csets (latin-unity-representations-feasible-region begin end)
+ psets (latin-unity-representations-present-region begin end))
+
+ (catch 'done
+
+ ;; Pass back the first coding system in the preferred list
+ ;; that can encode the whole region.
+ (dolist (curset systems)
+ (setq curset (latin-unity-massage-name 'buffer-default curset))
+
+ ;; If the coding system is a universal coding system, then
+ ;; it can certainly encode all the characters in the region.
+ (if (memq curset latin-unity-ucs-list)
+ (throw 'done (list curset)))
+
+ ;; If a coding system isn't universal, and isn't in
+ ;; the list that latin unity knows about, we can't
+ ;; decide whether to use it here. Leave that until later
+ ;; in `mm-find-mime-charset-region' function, whence we
+ ;; have been called.
+ (unless (memq curset latin-unity-coding-systems)
+ (throw 'done nil))
+
+ ;; Right, we know about this coding system, and it may
+ ;; conceivably be able to encode all the characters in
+ ;; the region.
+ (if (latin-unity-maybe-remap begin end curset csets psets t)
+ (throw 'done (list curset))))
+
+ ;; Can't encode using anything from the
+ ;; `mm-coding-system-priorities' list.
+ ;; Leave `mm-find-mime-charset' to do most of the work.
+ nil))
+
+ ;; Right, latin unity isn't available; let `mm-find-charset-region'
+ ;; take its default action, which equally applies to GNU Emacs.
+ nil)))
+
+(defmacro mm-xemacs-find-mime-charset (begin end)
+ (when (featurep 'xemacs)
+ `(mm-xemacs-find-mime-charset-1 ,begin ,end)))
+
(defun mm-find-mime-charset-region (b e &optional hack-charsets)
"Return the MIME charsets needed to encode the region between B and E.
nil means ASCII, a single-element list represents an appropriate MIME
@@ -580,8 +661,12 @@
(setq systems nil
charsets (list cs))))))
charsets))
- ;; Otherwise we're not multibyte, we're XEmacs, or a single
- ;; coding system won't cover it.
+ ;; If we're XEmacs, and some coding system is appropriate,
+ ;; mm-xemacs-find-mime-charset will return an appropriate list.
+ ;; Otherwise, we'll get nil, and the next setq will get invoked.
+ (setq charsets (mm-xemacs-find-mime-charset b e))
+
+ ;; We're not multibyte, or a single coding system won't cover it.
(setq charsets
(mm-delete-duplicates
(mapcar 'mm-mime-charset
--
“I, for instance, am gung-ho about open source because my family is being
held hostage in Rob Malda’s basement. But who fact-checks me, or Enderle,
when we say something in public? No-one!” -- Danny O’Brien