1 new commit in XEmacs:
https://bitbucket.org/xemacs/xemacs/commits/1c9b5a16e782/
Changeset: 1c9b5a16e782
User: kehoea
Date: 2017-11-09 21:47:32+00:00
Summary: Parse UnicodeData.txt instead of CaseFolding.txt for mule/uni-case-conv.el
lisp/ChangeLog addition:
2017-11-09 Aidan Kehoe <kehoea(a)parhasard.net>
Ben's approach in lib-src/make-case-conv.el didn't work, there are
107 entries in CaseFolding.txt as of today where the first column
is a lowercase letter, not an uppercase one. Parse UnicodeData.txt
instead.
* mule/make-case-conv.el: New.
Replacement for lib-src/make-case-conv.py, parsing UnicodeData.txt
instead.
* mule/uni-case-conv.el:
Update this file to reflect output from make-case-conv.el with
recent UnicodeData.txt instead.
lib-src/ChangeLog addition:
2017-11-09 Aidan Kehoe <kehoea(a)parhasard.net>
* make-case-conv.py:
Document why this file's approach didn't work. Slot it for deletion.
Affected #: 5 files
diff -r 52af36c1d4781c2bce577f240c75f9d67cd5c2f5 -r
1c9b5a16e78274b6b671f85a57b9f7c88a952115 lib-src/ChangeLog
--- a/lib-src/ChangeLog
+++ b/lib-src/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-09 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * make-case-conv.py:
+ Document why this file's approach didn't work. Slot it for deletion.
+
2017-09-24 Aidan Kehoe <kehoea(a)parhasard.net>
* b2m.c (main):
diff -r 52af36c1d4781c2bce577f240c75f9d67cd5c2f5 -r
1c9b5a16e78274b6b671f85a57b9f7c88a952115 lib-src/make-case-conv.py
--- a/lib-src/make-case-conv.py
+++ b/lib-src/make-case-conv.py
@@ -35,6 +35,12 @@
# (or whatever else you have named it according to the variable
# `output_filename').
+# #### Aidan Kehoe, Do 9 Nov 2017 21:32:14 GMT; this approach doesn't work,
+# since CaseFolding folds both upper and lower case characters to lower case,
+# without marking which is which. We need to parse UnicodeData.txt instead; I
+# do this in lisp/mule/make-case-conv.el. I will remove make-case-conv.py down
+# the line.
+
### Code:
import urllib2, re, sys
diff -r 52af36c1d4781c2bce577f240c75f9d67cd5c2f5 -r
1c9b5a16e78274b6b671f85a57b9f7c88a952115 lisp/ChangeLog
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,17 @@
+2017-11-09 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ Ben's approach in lib-src/make-case-conv.el didn't work, there are
+ 107 entries in CaseFolding.txt as of today where the first column
+ is a lowercase letter, not an uppercase one. Parse UnicodeData.txt
+ instead.
+
+ * mule/make-case-conv.el: New.
+ Replacement for lib-src/make-case-conv.py, parsing UnicodeData.txt
+ instead.
+ * mule/uni-case-conv.el:
+ Update this file to reflect output from make-case-conv.el with
+ recent UnicodeData.txt instead.
+
2017-10-29 Aidan Kehoe <kehoea(a)parhasard.net>
* cl-extra.el (cl-macroexpand-all):
diff -r 52af36c1d4781c2bce577f240c75f9d67cd5c2f5 -r
1c9b5a16e78274b6b671f85a57b9f7c88a952115 lisp/mule/make-case-conv.el
--- /dev/null
+++ b/lisp/mule/make-case-conv.el
@@ -0,0 +1,151 @@
+;;; uni-case-conv.el --- Case-conversion support for Unicode
+
+;; Copyright (C) 2017 Free Software Foundation, (C) 2010 Ben Wing
+
+;; Keywords: multilingual, case, uppercase, lowercase, Unicode
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; XEmacs is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs; see the file COPYING. If not, write to the Free
+;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;; 02111-1307, USA.
+
+;;; Commentary:
+
+;; Generate uni-case-conv.el. To do this, parse the UnicodeData.txt file. Do
+;; not parse CaseFolding.txt, since it does not mark the case of the character
+;; to be folded (a lower case character can be folded to a lower case
+;; character, which is an issue at least for ?\u00b5 MICRO SIGN). This will
+;; need to be replaced once we have better case support (once we attempt to
+;; support SpecialCasing.txt.) This only needs to be done when UnicodeData.txt
+;; is updated. First commit reflects UnicodeData.txt of 20160517, md5 sum
+;; dde25b1cf9bbb4ba1140ac12e4128b0b.
+
+;; Based on Ben's make-case-conv.py, which parsed CaseFolding.txt rather than
+;; UnicodeData.txt
+
+(require 'descr-text)
+
+(let* ((output-filename "uni-case-conv.el")
+ (output-buffer (get-buffer-create output-filename))
+ (mapping (make-hash-table :test #'eql))
+ case-fold-search lower upper)
+ (format-into output-buffer
+ #r";;; %s --- Case-conversion support for Unicode
+
+;; Copyright (C) 2010 Ben Wing.
+
+;; Keywords: multilingual, case, uppercase, lowercase, Unicode
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; XEmacs is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs; see the file COPYING. If not, write to the Free
+;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;; 02111-1307, USA.
+
+;;; Commentary:
+
+;; DO NOT MODIFY THIS FILE!!!!!!!!!!!!!!!!
+;; This file is autogenerated by %s. Modify that
+;; file instead.
+
+;;; Code:
+
+;; We process UnicodeData.txt in reverse order so that the more
+;; desirable mappings, which come early, override less desirable later ones.
+;; In particular, we definitely do not want the following bindings to work
+;; both ways:
+
+;; (?\u017F ?\u0073) ;; LATIN SMALL LETTER LONG S
+;; (?\u212A ?\u006B) ;; KELVIN SIGN
+;; (?\u212B ?\u00E5) ;; ANGSTROM SIGN
+
+;; The first two are especially bad as they will cause upcasing operations
+;; on lowercase s and k to give strange results. It's actually worse than
+;; that -- for unknown reasons, with the bad mappings in place, the byte-
+;; compiler produces broken code for some files, which results in a stack-
+;; underflow crash upon loadup in preparation for dumping.
+" output-filename (subseq (file-name-nondirectory load-file-name) 0
+ (if (eql (aref load-file-name
+ (1- (length load-file-name)))
+ ?c)
+ -1)))
+ ;; This is a separate call just for the sake of indentation, so we don't
+ ;; have a ?( in the first column.
+ (write-sequence "(loop
+ for (upper lower)
+ in '(" output-buffer)
+ (labels ((tounichar (value)
+ (format (if (<= value #xFFFF) #r"?\u%04X"
#r"?\U%08X") value))
+ (choose-comment (upper lower)
+ (let* ((upper-comment
+ (cadr (assoc "Name" (describe-char-unicode-data upper))))
+ (lower-comment
+ (cadr (assoc "Name" (describe-char-unicode-data lower))))
+ (folded-upper-comment
+ (replace-in-string upper-comment " CAPITAL LETTER "
"" t)))
+ (if (equal folded-upper-comment
+ (replace-in-string
+ lower-comment " SMALL LETTER " "" t))
+ (replace-in-string lower-comment
+ " SMALL LETTER " " LETTER " t)
+ (concat upper-comment ", " lower-comment)))))
+ (with-temp-buffer
+ (insert-file-contents describe-char-unicodedata-file nil)
+ (goto-char (point-max))
+ (while (re-search-backward #r"
+\([0-9A-F]+\);[^;]+;\(L[lu]\);\([^;]*;\)\{9,9\}\([^;]*\);\([^;]*\);" nil t)
+ (if (equal (match-string 2) "Ll")
+ (when (> (- (match-end 4) (match-beginning 4)) 0)
+ (setq lower (parse-integer (match-string 1) :radix 16)
+ upper (parse-integer (match-string 4) :radix 16))
+ (when (equal (cadr (assoc "Category" (describe-char-unicode-data
upper)))
+ "uppercase letter")
+ (format-into output-buffer "(%s %s) ;; %s\n "
+ (tounichar upper) (tounichar lower)
+ (choose-comment upper lower))
+ (puthash upper lower mapping)))
+ (when (> (- (match-end 5) (match-beginning 5)) 0)
+ (setq upper (parse-integer (match-string 1) :radix 16)
+ lower (parse-integer (match-string 5) :radix 16))
+ ;; We will generally encounter the lower-case characters first.
+ (unless (eql (gethash upper mapping) lower)
+ (when (equal (cadr (assoc "Category" (describe-char-unicode-data
lower)))
+ "lowercase letter")
+ (format-into output-buffer "(%s %s) ;; %s\n "
+ (tounichar upper) (tounichar lower)
+ (choose-comment upper lower))
+ (puthash upper lower mapping))))))
+ (format-into output-buffer ")
+ with case-table = (standard-case-table)
+ do
+ (put-case-table-pair upper lower case-table))
+
+\(provide '%.*s)
+
+;;; %s ends here
+" (position ?. output-filename :from-end t) output-filename output-filename))))
+
+;; make-case-conv.el ends here
This diff is so big that we needed to truncate the remainder.
Repository URL:
https://bitbucket.org/xemacs/xemacs/
--
This is a commit notification from
bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.