1 new commit in XEmacs:
https://bitbucket.org/xemacs/xemacs/changeset/bed39edf91ba/
changeset: bed39edf91ba
user: kehoea
date: 2012-05-10 14:53:06
summary: Be better about word boundaries with JIT unicode characters,
mule-category.el
lisp/ChangeLog addition:
2012-05-10 Aidan Kehoe <kehoea(a)parhasard.net>
* mule/mule-category.el (word-combining-categories):
Be better about default word boundaries when text contains
just-in-time-allocated Unicode code points. Document what we
should do instead once we have Unicode internally.
* mule/misc-lang.el: IPA characters are Latin.
affected #: 3 files
diff -r 6e5a7278f9bfc88ff4111ccaf59a08a018462cb5 -r
bed39edf91ba137860fa91f24628ff7bdecb43a1 lisp/ChangeLog
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,11 @@
+2012-05-10 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * mule/mule-category.el (word-combining-categories):
+ Be better about default word boundaries when text contains
+ just-in-time-allocated Unicode code points. Document what we
+ should do instead once we have Unicode internally.
+ * mule/misc-lang.el: IPA characters are Latin.
+
2012-05-08 Aidan Kehoe <kehoea(a)parhasard.net>
* cl-macs.el (rassoc): Remove a stray parenthesis here, thank you
diff -r 6e5a7278f9bfc88ff4111ccaf59a08a018462cb5 -r
bed39edf91ba137860fa91f24628ff7bdecb43a1 lisp/mule/misc-lang.el
--- a/lisp/mule/misc-lang.el
+++ b/lisp/mule/misc-lang.el
@@ -41,4 +41,26 @@
short-name "IPA"
long-name "IPA"))
+;; XEmacs; these are Latin, it's not useful to put word boundaries between
+;; them and ASCII.
+(modify-category-entry 'ipa ?l nil t)
+
+;; XEmacs; why are these Latin? See the following:
+;;
+;; (let ((scripts
+;; (mapcar #'(lambda (character)
+;; (car
+;; (split-string
+;; (cadr (assoc "Name" (describe-char-unicode-data
+;; character))))))
+;; (loop
+;; for i from 33 to 127
+;; if (not (eql -1 (char-to-unicode (make-char 'ipa i))))
+;; nconc (list (make-char 'ipa i))))))
+;; (mapcar #'(lambda (script)
+;; (cons script (count script scripts :test #'equal)))
+;; (remove-duplicates scripts :test #'equal)))
+;; => (("GREEK" . 1) ("LATIN" . 55) ("MODIFIER" . 3))
+
+
;;; misc-lang.el ends here
diff -r 6e5a7278f9bfc88ff4111ccaf59a08a018462cb5 -r
bed39edf91ba137860fa91f24628ff7bdecb43a1 lisp/mule/mule-category.el
--- a/lisp/mule/mule-category.el
+++ b/lisp/mule/mule-category.el
@@ -252,6 +252,7 @@
(chinese-big5-1 ?t)
(chinese-big5-2 ?t)
(korean-ksc5601 ?h "Hangul (Korean) 2-byte character set")
+ (jit-ucs-charset-0 ?J "Just-in-time-allocated Unicode character")
)
"List of predefined categories.
Each element is a list of a charset, a designator, and maybe a doc string.")
@@ -275,7 +276,18 @@
;;; Setting word boundary.
(setq word-combining-categories
- '((?l . ?l)))
+ ;; XEmacs; we should change to defining scripts, as does GNU, once
+ ;; unicode-internal is the default, and placing word boundaries
+ ;; between different scripts, not different charsets, by default.
+ ;; Then we can remove the jit-ucs-charset-0 entry above and all the
+ ;; entries containing ?J in this list.
+ ;;
+ ;; These entries are a bit heuristic, working on the assumption that
+ ;; characters that will be just-in-time-allocated will not be East
+ ;; Asian in XEmacs, and there's also no mechanism to apply the ?J
+ ;; category to further newly-created JIT categories.
+ '((?l . ?l) (?J . ?l) (?l . ?J) (?J . ?y) (?y . ?J) (?J . ?b) (?b . ?J)
+ (?J . ?g) (?J . ?w) (?w . ?J)))
(setq word-separating-categories ; (2-byte character sets)
'((?A . ?K) ; Alpha numeric - Katakana
Repository URL:
https://bitbucket.org/xemacs/xemacs/
--
This is a commit notification from
bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches