commit/XEmacs: kehoea: Actually obey POSIX rules in #'posix-string-match, don't ignore them.
12 years, 8 months
Bitbucket
1 new commit in XEmacs:
https://bitbucket.org/xemacs/xemacs/changeset/d026b665014f/
changeset: d026b665014f
user: kehoea
date: 2012-04-25 21:25:33
summary: Actually obey POSIX rules in #'posix-string-match, don't ignore them.
src/ChangeLog addition:
2012-04-25 Aidan Kehoe <kehoea(a)parhasard.net>
* search.c (string_match_1): Actually use the POSIX argument here,
pass it to compile_pattern(). Thank you for the bug report, Ilya
Shlyakhter!
tests/ChangeLog addition:
2012-04-25 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el: Check that #'posix-string-match
actually returns the longest match; thank you Ilya Shlyakhter in
jn1j8t$ujq$1(a)dough.gmane.org !
affected #: 4 files
diff -r 3f4a234f4672ab40f61811656bc674bcd80664db -r d026b665014fda7a8d6148e8cc8fb9d046bff7f7 src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+2012-04-25 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * search.c (string_match_1): Actually use the POSIX argument here,
+ pass it to compile_pattern(). Thank you for the bug report, Ilya
+ Shlyakhter!
+
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
Support non-ASCII correctly in character classes ([:alnum:] and
diff -r 3f4a234f4672ab40f61811656bc674bcd80664db -r d026b665014fda7a8d6148e8cc8fb9d046bff7f7 src/search.c
--- a/src/search.c
+++ b/src/search.c
@@ -419,7 +419,7 @@
static Lisp_Object
string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
- struct buffer *buf, int UNUSED (posix))
+ struct buffer *buf, int posix)
{
Bytecount val;
Charcount s;
@@ -450,7 +450,7 @@
bufp = compile_pattern (regexp, &search_regs,
(!NILP (buf->case_fold_search)
? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil),
- string, buf, 0, ERROR_ME);
+ string, buf, posix, ERROR_ME);
QUIT;
{
Bytecount bis = string_index_char_to_byte (string, s);
diff -r 3f4a234f4672ab40f61811656bc674bcd80664db -r d026b665014fda7a8d6148e8cc8fb9d046bff7f7 tests/ChangeLog
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2012-04-25 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/regexp-tests.el: Check that #'posix-string-match
+ actually returns the longest match; thank you Ilya Shlyakhter in
+ jn1j8t$ujq$1(a)dough.gmane.org !
+
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
diff -r 3f4a234f4672ab40f61811656bc674bcd80664db -r d026b665014fda7a8d6148e8cc8fb9d046bff7f7 tests/automated/regexp-tests.el
--- a/tests/automated/regexp-tests.el
+++ b/tests/automated/regexp-tests.el
@@ -69,6 +69,15 @@
(Assert (string-match "Ä" "Ä"))
(Assert (not (string-match "Ä" "ä"))))
+;; Is posix-string-match passing the POSIX flag correctly?
+
+(Assert
+ (equal
+ (save-match-data
+ (progn (posix-string-match "i\\|ii" "ii") (match-data)))
+ '(0 2))
+ "checking #'posix-string-match actually returns the longest match"))
+
;; looking-at
(with-temp-buffer
(insert "äÄ")
Repository URL: https://bitbucket.org/xemacs/xemacs/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/xemacs-packages: 3 new changesets
12 years, 8 months
Bitbucket
3 new commits in xemacs-packages:
https://bitbucket.org/xemacs/xemacs-packages/changeset/85f0083dfeac/
changeset: 85f0083dfeac
user: Norbert Koch
date: 2012-04-23 11:01:13
summary: update cc-mode
affected #: 1 file
diff -r dacaf14c9a2a06b9c7c1f544d5c56fed8780712a -r 85f0083dfeacce6898862fafb78b7f649acb8c11 .hgsubstate
--- a/.hgsubstate
+++ b/.hgsubstate
@@ -17,7 +17,7 @@
da4e7d4a51c502e5ac05a224cb756f382f0ba4d7 xemacs-packages/c-support
11074b3808d1e349f3fddb3c4d50f8be7c0f859e xemacs-packages/calc
7524e4fb9de45d77812090a724fac4ebd7549d6e xemacs-packages/calendar
-41e69539419348a61d29654486a909ca57559777 xemacs-packages/cc-mode
+5da39434bc8692b1cdbc482898e40b13a33c33ff xemacs-packages/cc-mode
a7ae1cfb2376bcd32617c1c88afe08872b11d298 xemacs-packages/cedet-common
87dd21fac17ea98219267b1378b4696698d6c4ff xemacs-packages/clearcase
e18acdbfcd36295d052cd56fa2e6d78c68b4b7d4 xemacs-packages/cogre
https://bitbucket.org/xemacs/xemacs-packages/changeset/86195cdea26a/
changeset: 86195cdea26a
user: Norbert Koch
date: 2012-04-23 11:01:47
summary: XEmacs Package Release
affected #: 1 file
diff -r 85f0083dfeacce6898862fafb78b7f649acb8c11 -r 86195cdea26a10386e9e25c05ba17ade73c4a4c7 ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2012-04-23 Norbert Koch <viteno(a)xemacs.org>
+
+ * Packages released: cc-mode.
+
2012-04-16 Norbert Koch <viteno(a)xemacs.org>
* Packages released: cc-mode.
https://bitbucket.org/xemacs/xemacs-packages/changeset/397f8db0c64e/
changeset: 397f8db0c64e
user: Norbert Koch
date: 2012-04-23 11:06:43
summary: Prerelease cc-mode
affected #: 1 file
diff -r 86195cdea26a10386e9e25c05ba17ade73c4a4c7 -r 397f8db0c64ecdca35508d4c74c767816fb23ef9 .hgsubstate
--- a/.hgsubstate
+++ b/.hgsubstate
@@ -17,7 +17,7 @@
da4e7d4a51c502e5ac05a224cb756f382f0ba4d7 xemacs-packages/c-support
11074b3808d1e349f3fddb3c4d50f8be7c0f859e xemacs-packages/calc
7524e4fb9de45d77812090a724fac4ebd7549d6e xemacs-packages/calendar
-5da39434bc8692b1cdbc482898e40b13a33c33ff xemacs-packages/cc-mode
+391069c9f2803b7f8e5c844ea4ef72dfbd7ca510 xemacs-packages/cc-mode
a7ae1cfb2376bcd32617c1c88afe08872b11d298 xemacs-packages/cedet-common
87dd21fac17ea98219267b1378b4696698d6c4ff xemacs-packages/clearcase
e18acdbfcd36295d052cd56fa2e6d78c68b4b7d4 xemacs-packages/cogre
Repository URL: https://bitbucket.org/xemacs/xemacs-packages/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/cc-mode: 2 new changesets
12 years, 8 months
Bitbucket
2 new commits in cc-mode:
https://bitbucket.org/xemacs/cc-mode/changeset/c6a1f4bc808e/
changeset: c6a1f4bc808e
user: Norbert Koch
date: 2012-04-23 11:01:47
summary: XEmacs Package Release 1.55
affected #: 2 files
diff -r 5da39434bc8692b1cdbc482898e40b13a33c33ff -r c6a1f4bc808ef2c7d08ba5147537e99b40c78ce4 ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2012-04-23 Norbert Koch <viteno(a)xemacs.org>
+
+ * Makefile (VERSION): XEmacs package 1.55 released.
+
2012-04-16 Norbert Koch <viteno(a)xemacs.org>
* Makefile (VERSION): XEmacs package 1.54 released.
diff -r 5da39434bc8692b1cdbc482898e40b13a33c33ff -r c6a1f4bc808ef2c7d08ba5147537e99b40c78ce4 Makefile
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
-VERSION = 1.54
+VERSION = 1.55
AUTHOR_VERSION = 5.32.2
MAINTAINER = Alan Mackenzie <bug-cc-mode(a)gnu.org>
PACKAGE = cc-mode
https://bitbucket.org/xemacs/cc-mode/changeset/391069c9f280/
changeset: 391069c9f280
user: Norbert Koch
date: 2012-04-23 11:01:47
summary: Added tag cc-mode-1_55 for changeset c6a1f4bc808e
affected #: 1 file
diff -r c6a1f4bc808ef2c7d08ba5147537e99b40c78ce4 -r 391069c9f2803b7f8e5c844ea4ef72dfbd7ca510 .hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -87,3 +87,4 @@
ca5959d9f677f0de84c0607fd20cf8f19121b9ec cc-mode-1_52
39ad7f53a7def58a7d0e58eb58d1a6a0f22f710d cc-mode-1_53
f1a6c0e647399eba8c3511db89520f0efc63d535 cc-mode-1_54
+c6a1f4bc808ef2c7d08ba5147537e99b40c78ce4 cc-mode-1_55
Repository URL: https://bitbucket.org/xemacs/cc-mode/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/cc-mode: 2 new changesets
12 years, 9 months
Bitbucket
2 new commits in cc-mode:
https://bitbucket.org/xemacs/cc-mode/changeset/8dfad08a3bac/
changeset: 8dfad08a3bac
user: acm
date: 2012-04-21 10:48:29
summary: Adding a ) can hide the resulting (..) from searches. Fix it.
cc-engine (c-append-lower-brace-pair-to-state-cache): Bound the backward
search to the position of the existing (.
affected #: 1 file
diff -r 41e69539419348a61d29654486a909ca57559777 -r 8dfad08a3bacbad1943ac687dd9e3d08bcbb0ec3 cc-engine.el
--- a/cc-engine.el
+++ b/cc-engine.el
@@ -2613,13 +2613,24 @@
(setq c-state-point-min (point-min)))
(defun c-append-lower-brace-pair-to-state-cache (from &optional upper-lim)
- ;; If there is a brace pair preceding FROM in the buffer (not necessarily
- ;; immediately preceding), push a cons onto `c-state-cache' to represent it.
- ;; FROM must not be inside a literal. If UPPER-LIM is non-nil, we append
- ;; the highest brace pair whose "}" is below UPPER-LIM.
+ ;; If there is a brace pair preceding FROM in the buffer, at the same level
+ ;; of nesting (not necessarily immediately preceding), push a cons onto
+ ;; `c-state-cache' to represent it. FROM must not be inside a literal. If
+ ;; UPPER-LIM is non-nil, we append the highest brace pair whose "}" is below
+ ;; UPPER-LIM.
;;
;; Return non-nil when this has been done.
;;
+ ;; The situation it copes with is this transformation:
+ ;;
+ ;; OLD: { (.) {...........}
+ ;; ^ ^
+ ;; FROM HERE
+ ;;
+ ;; NEW: { {....} (.) {.........
+ ;; ^ ^ ^
+ ;; LOWER BRACE PAIR HERE or HERE
+ ;;
;; This routine should be fast. Since it can get called a LOT, we maintain
;; `c-state-brace-pair-desert', a small cache of "failures", such that we
;; reduce the time wasted in repeated fruitless searches in brace deserts.
@@ -2638,10 +2649,25 @@
(unless (and c-state-brace-pair-desert
(eq cache-pos (car c-state-brace-pair-desert))
(<= from (cdr c-state-brace-pair-desert)))
- ;; Only search what we absolutely need to:
- (if (and c-state-brace-pair-desert
- (eq cache-pos (car c-state-brace-pair-desert)))
- (narrow-to-region (cdr c-state-brace-pair-desert) (point-max)))
+ ;; DESERT-LIM. Only search what we absolutely need to,
+ (let ((desert-lim
+ (and c-state-brace-pair-desert
+ (eq cache-pos (car c-state-brace-pair-desert))
+ (cdr c-state-brace-pair-desert)))
+ ;; CACHE-LIM. This limit will be necessary when an opening
+ ;; paren at `cache-pos' has just had its matching close paren
+ ;; inserted. `cache-pos' continues to be a search bound, even
+ ;; though the algorithm below would skip over the new paren
+ ;; pair.
+ (cache-lim (and cache-pos (< cache-pos from) cache-pos)))
+ (narrow-to-region
+ (cond
+ ((and desert-lim cache-lim)
+ (max desert-lim cache-lim))
+ (desert-lim)
+ (cache-lim)
+ ((point-min)))
+ (point-max)))
;; In the next pair of nested loops, the inner one moves back past a
;; pair of (mis-)matching parens or brackets; the outer one moves
@@ -2675,7 +2701,7 @@
(cons new-cons (cdr c-state-cache))))
(t (setq c-state-cache (cons new-cons c-state-cache)))))
- ;; We haven't found a brace pair. Record this.
+ ;; We haven't found a brace pair. Record this in the cache.
(setq c-state-brace-pair-desert (cons cache-pos from))))))))
(defsubst c-state-push-any-brace-pair (bra+1 macro-start-or-here)
@@ -3270,6 +3296,8 @@
;; of all parens in preprocessor constructs, except for any such construct
;; containing point. We can then call `c-invalidate-state-cache-1' without
;; worrying further about macros and template delimiters.
+ (c-record-parse-state-state) (c-replay-parse-state-state)
+ (message "Invalidate: \n%s\n" here)
(if (memq 'category-properties c-emacs-features)
;; Emacs
(c-with-<->-as-parens-suppressed
@@ -3282,7 +3310,8 @@
(c-with-cpps-commented-out
(c-invalidate-state-cache-1 here))))
;; XEmacs
- (c-invalidate-state-cache-1 here)))
+ (c-invalidate-state-cache-1 here))
+ (c-record-parse-state-state) (c-replay-parse-state-state))
(defun c-parse-state ()
;; This is a wrapper over `c-parse-state-1'. See that function for a
@@ -3292,6 +3321,8 @@
;; of all parens in preprocessor constructs, except for any such construct
;; containing point. We can then call `c-parse-state-1' without worrying
;; further about macros and template delimiters.
+; (c-record-parse-state-state)
+; (c-replay-parse-state-state)
(let (here-cpp-beg here-cpp-end)
(save-excursion
(if (c-beginning-of-macro)
@@ -3330,20 +3361,21 @@
(make-variable-buffer-local 'c-parse-state-state)
(defun c-record-parse-state-state ()
(setq c-parse-state-state
- (mapcar
- (lambda (arg)
- (cons arg (symbol-value arg)))
- '(c-state-cache
- c-state-cache-good-pos
- c-state-nonlit-pos-cache
- c-state-nonlit-pos-cache-limit
- c-state-brace-pair-desert
- c-state-point-min
- c-state-point-min-lit-type
- c-state-point-min-lit-start
- c-state-min-scan-pos
- c-state-old-cpp-beg
- c-state-old-cpp-end))))
+ (cons `(point . ,(point))
+ (mapcar
+ (lambda (arg)
+ (cons arg (symbol-value arg)))
+ '(c-state-cache
+ c-state-cache-good-pos
+ c-state-nonlit-pos-cache
+ c-state-nonlit-pos-cache-limit
+ c-state-brace-pair-desert
+ c-state-point-min
+ c-state-point-min-lit-type
+ c-state-point-min-lit-start
+ c-state-min-scan-pos
+ c-state-old-cpp-beg
+ c-state-old-cpp-end)))))
(defun c-replay-parse-state-state ()
(message
(concat "(setq "
https://bitbucket.org/xemacs/cc-mode/changeset/5da39434bc86/
changeset: 5da39434bc86
user: acm
date: 2012-04-22 11:37:32
summary: Fix erroneous commit.
affected #: 1 file
diff -r 8dfad08a3bacbad1943ac687dd9e3d08bcbb0ec3 -r 5da39434bc8692b1cdbc482898e40b13a33c33ff cc-engine.el
--- a/cc-engine.el
+++ b/cc-engine.el
@@ -3296,8 +3296,6 @@
;; of all parens in preprocessor constructs, except for any such construct
;; containing point. We can then call `c-invalidate-state-cache-1' without
;; worrying further about macros and template delimiters.
- (c-record-parse-state-state) (c-replay-parse-state-state)
- (message "Invalidate: \n%s\n" here)
(if (memq 'category-properties c-emacs-features)
;; Emacs
(c-with-<->-as-parens-suppressed
@@ -3310,8 +3308,7 @@
(c-with-cpps-commented-out
(c-invalidate-state-cache-1 here))))
;; XEmacs
- (c-invalidate-state-cache-1 here))
- (c-record-parse-state-state) (c-replay-parse-state-state))
+ (c-invalidate-state-cache-1 here)))
(defun c-parse-state ()
;; This is a wrapper over `c-parse-state-1'. See that function for a
@@ -3321,8 +3318,6 @@
;; of all parens in preprocessor constructs, except for any such construct
;; containing point. We can then call `c-parse-state-1' without worrying
;; further about macros and template delimiters.
-; (c-record-parse-state-state)
-; (c-replay-parse-state-state)
(let (here-cpp-beg here-cpp-end)
(save-excursion
(if (c-beginning-of-macro)
@@ -3361,21 +3356,20 @@
(make-variable-buffer-local 'c-parse-state-state)
(defun c-record-parse-state-state ()
(setq c-parse-state-state
- (cons `(point . ,(point))
- (mapcar
- (lambda (arg)
- (cons arg (symbol-value arg)))
- '(c-state-cache
- c-state-cache-good-pos
- c-state-nonlit-pos-cache
- c-state-nonlit-pos-cache-limit
- c-state-brace-pair-desert
- c-state-point-min
- c-state-point-min-lit-type
- c-state-point-min-lit-start
- c-state-min-scan-pos
- c-state-old-cpp-beg
- c-state-old-cpp-end)))))
+ (mapcar
+ (lambda (arg)
+ (cons arg (symbol-value arg)))
+ '(c-state-cache
+ c-state-cache-good-pos
+ c-state-nonlit-pos-cache
+ c-state-nonlit-pos-cache-limit
+ c-state-brace-pair-desert
+ c-state-point-min
+ c-state-point-min-lit-type
+ c-state-point-min-lit-start
+ c-state-min-scan-pos
+ c-state-old-cpp-beg
+ c-state-old-cpp-end))))
(defun c-replay-parse-state-state ()
(message
(concat "(setq "
Repository URL: https://bitbucket.org/xemacs/cc-mode/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
[COMMIT] Support non-ASCII correctly in character classes, test this.
12 years, 9 months
Aidan Kehoe
APPROVE COMMIT
NOTE: This patch has been committed.
# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1335031108 -3600
# Node ID 3f4a234f4672ab40f61811656bc674bcd80664db
# Parent 1d9f603e9125575ac67f9cff0f2159a046d99d3e
Support non-ASCII correctly in character classes, test this.
src/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
Support non-ASCII correctly in character classes ([:alnum:] and
friends).
* regex.c:
* regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends
independent of the locale, since we want them to be consistent in
XEmacs.
* regex.c (print_partial_compiled_pattern): Print the flags for
charset_mule; don't print non-ASCII as the character values in
ranges, this breaks with locales.
* regex.c (enum):
Define various flags the charset_mule and charset_mule_not opcodes
can now take.
* regex.c (CHAR_CLASS_MAX_LENGTH): Update this.
* regex.c (re_iswctype, re_wctype): New, from GNU.
* regex.c (re_wctype_can_match_non_ascii): New; used when deciding
on whether to use charset_mule or the ASCII-only regex character
set opcode.
* regex.c (regex_compile):
Error correctly on long, non-existent character class names.
Break out the handling of charsets that can match non-ASCII into a
separate clause. Use compile_char_class when compiling character
classes.
* regex.c (compile_char_class): New. Used in regex_compile when
compiling character sets that may match non-ASCII.
* regex.c (re_compile_fastmap):
If there are flags set for charset_mule or charset_mule_not, we
can't use the fastmap (since we need to check syntax table values
that aren't available there).
* regex.c (re_match_2_internal):
Check the new flags passed to the charset_mule{,_not} opcode,
observe them if appropriate.
* regex.h:
* regex.h (enum):
Expose re_wctype_t here, imported from GNU.
tests/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
* automated/regexp-tests.el (Assert-char-class):
Check that #'string-match errors correctly with an over-long
character class name.
Add tests for character class functionality that supports
non-ASCII characters. These tests expose bugs in GNU Emacs
24.0.94.2, but pass under current XEmacs.
diff -r 1d9f603e9125 -r 3f4a234f4672 src/ChangeLog
--- a/src/ChangeLog Sat Apr 21 09:41:27 2012 +0100
+++ b/src/ChangeLog Sat Apr 21 18:58:28 2012 +0100
@@ -1,3 +1,41 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ Support non-ASCII correctly in character classes ([:alnum:] and
+ friends).
+
+ * regex.c:
+ * regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends
+ independent of the locale, since we want them to be consistent in
+ XEmacs.
+ * regex.c (print_partial_compiled_pattern): Print the flags for
+ charset_mule; don't print non-ASCII as the character values in
+ ranges, this breaks with locales.
+ * regex.c (enum):
+ Define various flags the charset_mule and charset_mule_not opcodes
+ can now take.
+ * regex.c (CHAR_CLASS_MAX_LENGTH): Update this.
+ * regex.c (re_iswctype, re_wctype): New, from GNU.
+ * regex.c (re_wctype_can_match_non_ascii): New; used when deciding
+ on whether to use charset_mule or the ASCII-only regex character
+ set opcode.
+ * regex.c (regex_compile):
+ Error correctly on long, non-existent character class names.
+ Break out the handling of charsets that can match non-ASCII into a
+ separate clause. Use compile_char_class when compiling character
+ classes.
+ * regex.c (compile_char_class): New. Used in regex_compile when
+ compiling character sets that may match non-ASCII.
+ * regex.c (re_compile_fastmap):
+ If there are flags set for charset_mule or charset_mule_not, we
+ can't use the fastmap (since we need to check syntax table values
+ that aren't available there).
+ * regex.c (re_match_2_internal):
+ Check the new flags passed to the charset_mule{,_not} opcode,
+ observe them if appropriate.
+ * regex.h:
+ * regex.h (enum):
+ Expose re_wctype_t here, imported from GNU.
+
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* regex.h (RE_SYNTAX_EMACS):
diff -r 1d9f603e9125 -r 3f4a234f4672 src/regex.c
--- a/src/regex.c Sat Apr 21 09:41:27 2012 +0100
+++ b/src/regex.c Sat Apr 21 18:58:28 2012 +0100
@@ -178,53 +178,91 @@
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
-/* Jim Meyering writes:
-
- "... Some ctype macros are valid only for character codes that
- isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
- using /bin/cc or gcc but without giving an ansi option). So, all
- ctype uses should be through macros like ISPRINT... If
- STDC_HEADERS is defined, then autoconf has verified that the ctype
- macros don't need to be guarded with references to isascii. ...
- Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding." */
-
-#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
-#define ISASCII_1(c) 1
+#ifdef emacs
+
+/* 1 if C is an ASCII character. */
+#define ISASCII(c) ((c) < 0x80)
+
+/* 1 if C is a unibyte character. */
+#define ISUNIBYTE(c) 0
+
+/* The Emacs definitions should not be directly affected by locales. */
+
+/* In Emacs, these are only used for single-byte characters. */
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f') \
+ || ((c) >= 'A' && (c) <= 'F'))
+
+/* This is only used for single-byte characters. */
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+
+/* The rest must handle multibyte characters. */
+
+#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f)
+#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c))
+#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z') \
+ || ((c) >= 'A' && (c) <= 'Z')) \
+ : ISWORD (c))
+#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c))
+
+#define ISLOWER(c) LOWERCASEP (lispbuf, c)
+
+#define ISPUNCT(c) (ISASCII (c) \
+ ? ((c) > ' ' && (c) < 0x7F \
+ && !(((c) >= 'a' && (c) <= 'z') \
+ || ((c) >= 'A' && (c) <= 'Z') \
+ || ((c) >= '0' && (c) <= '9'))) \
+ : !ISWORD (c))
+
+#define ISSPACE(c) \
+ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace)
+
+#define ISUPPER(c) UPPERCASEP (lispbuf, c)
+
+#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword)
+
+#else /* not emacs */
+
+/* 1 if C is an ASCII character. */
+#define ISASCII(c) ((c) < 0200)
+
+/* 1 if C is a unibyte character. */
+#define ISUNIBYTE(c) 0
+
+#ifdef isblank
+# define ISBLANK(c) isblank (c)
#else
-#define ISASCII_1(c) isascii(c)
-#endif
-
-#ifdef MULE
-/* The IS*() macros can be passed any character, including an extended
- one. We need to make sure there are no crashes, which would occur
- otherwise due to out-of-bounds array references. */
-#define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
-#else
-#define ISASCII(c) ISASCII_1 (c)
-#endif /* MULE */
-
-#ifdef isblank
-#define ISBLANK(c) (ISASCII (c) && isblank (c))
-#else
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) isgraph (c)
#else
-#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (isprint (c) && !isspace (c))
#endif
-#define ISPRINT(c) (ISASCII (c) && isprint (c))
-#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-#define ISALNUM(c) (ISASCII (c) && isalnum (c))
-#define ISALPHA(c) (ISASCII (c) && isalpha (c))
-#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-#define ISLOWER(c) (ISASCII (c) && islower (c))
-#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-#define ISSPACE(c) (ISASCII (c) && isspace (c))
-#define ISUPPER(c) (ISASCII (c) && isupper (c))
-#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+/* Solaris defines ISPRINT so we must undefine it first. */
+#undef ISPRINT
+#define ISPRINT(c) isprint (c)
+#define ISDIGIT(c) isdigit (c)
+#define ISALNUM(c) isalnum (c)
+#define ISALPHA(c) isalpha (c)
+#define ISCNTRL(c) iscntrl (c)
+#define ISLOWER(c) islower (c)
+#define ISPUNCT(c) ispunct (c)
+#define ISSPACE(c) isspace (c)
+#define ISUPPER(c) isupper (c)
+#define ISXDIGIT(c) isxdigit (c)
+
+#define ISWORD(c) ISALPHA (c)
+
+#ifdef _tolower
+# define TOLOWER(c) _tolower (c)
+#else
+# define TOLOWER(c) tolower (c)
+#endif
+
+#endif /* emacs */
#ifndef NULL
#define NULL (void *)0
@@ -913,6 +951,7 @@
printf ("/charset_mule [%s",
(re_opcode_t) *(p - 1) == charset_mule_not ? "^" : "");
+ printf (" flags: 0x%02x ", *p++);
nentries = unified_range_table_nentries (p);
for (i = 0; i < nentries; i++)
{
@@ -921,14 +960,14 @@
unified_range_table_get_range (p, i, &first, &last,
&dummy_val);
- if (first < 0x100)
+ if (first < 0x80)
putchar (first);
else
printf ("(0x%lx)", (long)first);
if (first != last)
{
putchar ('-');
- if (last < 0x100)
+ if (last < 0x80)
putchar (last);
else
printf ("(0x%lx)", (long)last);
@@ -1974,6 +2013,22 @@
/* The next available element. */
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+/* Bits used to implement the multibyte-part of the various character
+ classes such as [:alnum:] in a charset's range table. XEmacs; use an
+ enum, so they're visible in the debugger. */
+enum
+{
+ BIT_WORD = (1 << 0),
+ BIT_LOWER = (1 << 1),
+ BIT_PUNCT = (1 << 2),
+ BIT_SPACE = (1 << 3),
+ BIT_UPPER = (1 << 4),
+ /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII
+ (possible matches) in charset_mule. [:alpha:] matches all characters
+ with word syntax, with the exception of [0-9]. We don't need
+ BIT_MULTIBYTE. */
+ BIT_ALPHA = (1 << 5)
+};
/* Set the bit for character C in a bit vector. */
#define SET_LIST_BIT(c) \
@@ -1985,22 +2040,8 @@
/* Set the "bit" for character C in a range table. */
#define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
-/* Set the "bit" for character c in the appropriate table. */
-#define SET_EITHER_BIT(c) \
- do { \
- if (has_extended_chars) \
- SET_RANGETAB_BIT (c); \
- else \
- SET_LIST_BIT (c); \
- } while (0)
-
-#else /* not MULE */
-
-#define SET_EITHER_BIT(c) SET_LIST_BIT (c)
-
#endif
-
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
{ if (p != pend) \
@@ -2018,15 +2059,110 @@
} \
}
-#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
-
-#define IS_CHAR_CLASS(string) \
- (STREQ (string, "alpha") || STREQ (string, "upper") \
- || STREQ (string, "lower") || STREQ (string, "digit") \
- || STREQ (string, "alnum") || STREQ (string, "xdigit") \
- || STREQ (string, "space") || STREQ (string, "print") \
- || STREQ (string, "punct") || STREQ (string, "graph") \
- || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
+
+/* Map a string to the char class it names (if any). */
+static re_wctype_t
+re_wctype (const char *string)
+{
+ if (STREQ (string, "alnum")) return RECC_ALNUM;
+ else if (STREQ (string, "alpha")) return RECC_ALPHA;
+ else if (STREQ (string, "word")) return RECC_WORD;
+ else if (STREQ (string, "ascii")) return RECC_ASCII;
+ else if (STREQ (string, "nonascii")) return RECC_NONASCII;
+ else if (STREQ (string, "graph")) return RECC_GRAPH;
+ else if (STREQ (string, "lower")) return RECC_LOWER;
+ else if (STREQ (string, "print")) return RECC_PRINT;
+ else if (STREQ (string, "punct")) return RECC_PUNCT;
+ else if (STREQ (string, "space")) return RECC_SPACE;
+ else if (STREQ (string, "upper")) return RECC_UPPER;
+ else if (STREQ (string, "unibyte")) return RECC_UNIBYTE;
+ else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
+ else if (STREQ (string, "digit")) return RECC_DIGIT;
+ else if (STREQ (string, "xdigit")) return RECC_XDIGIT;
+ else if (STREQ (string, "cntrl")) return RECC_CNTRL;
+ else if (STREQ (string, "blank")) return RECC_BLANK;
+ else return RECC_ERROR;
+}
+
+/* True if CH is in the char class CC. */
+static re_bool
+re_iswctype (int ch, re_wctype_t cc)
+{
+#ifdef emacs
+ /* This is cheesy, lispbuf isn't available to us when compiling the
+ pattern. It's effectively only called (on Mule builds) when the current
+ buffer doesn't matter (e.g. for RECC_ASCII, RECC_CNTRL), so it's not a
+ big deal. */
+ struct buffer *lispbuf = current_buffer;
+#endif
+
+ switch (cc)
+ {
+ case RECC_ALNUM: return ISALNUM (ch) != 0;
+ case RECC_ALPHA: return ISALPHA (ch) != 0;
+ case RECC_BLANK: return ISBLANK (ch) != 0;
+ case RECC_CNTRL: return ISCNTRL (ch) != 0;
+ case RECC_DIGIT: return ISDIGIT (ch) != 0;
+ case RECC_GRAPH: return ISGRAPH (ch) != 0;
+ case RECC_LOWER: return ISLOWER (ch) != 0;
+ case RECC_PRINT: return ISPRINT (ch) != 0;
+ case RECC_PUNCT: return ISPUNCT (ch) != 0;
+ case RECC_SPACE: return ISSPACE (ch) != 0;
+ case RECC_UPPER: return ISUPPER (ch) != 0;
+ case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
+ case RECC_ASCII: return ISASCII (ch) != 0;
+ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch);
+ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
+ case RECC_WORD: return ISWORD (ch) != 0;
+ case RECC_ERROR: return false;
+ default:
+ abort ();
+ }
+}
+
+#ifdef MULE
+
+static re_bool
+re_wctype_can_match_non_ascii (re_wctype_t cc)
+{
+ switch (cc)
+ {
+ case RECC_ASCII:
+ case RECC_UNIBYTE:
+ case RECC_CNTRL:
+ case RECC_DIGIT:
+ case RECC_XDIGIT:
+ case RECC_BLANK:
+ return false;
+ default:
+ return true;
+ }
+}
+
+/* Return a bit-pattern to use in the range-table bits to match multibyte
+ chars of class CC. */
+static unsigned char
+re_wctype_to_bit (re_wctype_t cc)
+{
+ switch (cc)
+ {
+ case RECC_PRINT: case RECC_GRAPH:
+ case RECC_ALPHA: return BIT_ALPHA;
+ case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
+ case RECC_LOWER: return BIT_LOWER;
+ case RECC_UPPER: return BIT_UPPER;
+ case RECC_PUNCT: return BIT_PUNCT;
+ case RECC_SPACE: return BIT_SPACE;
+ case RECC_MULTIBYTE: case RECC_NONASCII:
+ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
+ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+ default:
+ abort ();
+ }
+}
+
+#endif /* emacs */
static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
@@ -2049,6 +2185,8 @@
RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax,
Lisp_Object rtab);
+static reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab,
+ Bitbyte *flags_out);
#endif /* MULE */
static re_bool group_match_null_string_p (unsigned char **p,
unsigned char *end,
@@ -2512,15 +2650,20 @@
BUF_PUSH (anychar);
break;
+#ifdef MULE
+#define MAYBE_START_OVER_WITH_EXTENDED(ch) \
+ if (ch >= 0x80) \
+ { \
+ goto start_over_with_extended; \
+ } while (0)
+#else
+#define MAYBE_START_OVER_WITH_EXTENDED(ch)
+#endif
case '[':
{
/* XEmacs change: this whole section */
re_bool had_char_class = false;
-#ifdef MULE
- re_bool has_extended_chars = false;
- REGISTER Lisp_Object rtab = Qnil;
-#endif
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2550,29 +2693,6 @@
&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
SET_LIST_BIT ('\n');
-#ifdef MULE
- start_over_with_extended:
- if (has_extended_chars)
- {
- /* There are extended chars here, which means we need to start
- over and shift to unified range-table format. */
- if (buf_end[-2] == charset)
- buf_end[-2] = charset_mule;
- else
- buf_end[-2] = charset_mule_not;
- buf_end--;
- p = p1; /* go back to the beginning of the charset, after
- a possible ^. */
- rtab = Vthe_lisp_rangetab;
- Fclear_range_table (rtab);
-
- /* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) buf_end[-1] == charset_mule_not
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
- SET_EITHER_BIT ('\n');
- }
-#endif /* MULE */
-
/* Read in characters and ranges, setting map bits. */
for (;;)
{
@@ -2580,32 +2700,22 @@
PATFETCH (c);
-#ifdef MULE
- if (c >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- /* Frumble-bumble, we've found some extended chars.
- Need to start over, process everything using
- the general extended-char mechanism, and need
- to use charset_mule and charset_mule_not instead
- of charset and charset_not. */
- goto start_over_with_extended;
- }
-#endif /* MULE */
+ /* Frumble-bumble, we may have found some extended chars.
+ Need to start over, process everything using the general
+ extended-char mechanism, and need to use charset_mule and
+ charset_mule_not instead of charset and charset_not. */
+ MAYBE_START_OVER_WITH_EXTENDED (c);
+
/* \ might escape characters inside [...] and [^...]. */
if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
{
if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c1);
-#ifdef MULE
- if (c1 >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- goto start_over_with_extended;
- }
-#endif /* MULE */
- SET_EITHER_BIT (c1);
+
+ MAYBE_START_OVER_WITH_EXTENDED (c1);
+
+ SET_LIST_BIT (c1);
continue;
}
@@ -2631,18 +2741,11 @@
{
reg_errcode_t ret;
-#ifdef MULE
- if (* (unsigned char *) p >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- goto start_over_with_extended;
- }
- if (has_extended_chars)
- ret = compile_extended_range (&p, pend, translate,
- syntax, rtab);
- else
-#endif /* MULE */
- ret = compile_range (&p, pend, translate, syntax, buf_end);
+ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p);
+
+ ret = compile_range (&p, pend, translate, syntax,
+ buf_end);
+
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
@@ -2653,18 +2756,10 @@
/* Move past the `-'. */
PATFETCH (c1);
-#ifdef MULE
- if (* (unsigned char *) p >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- goto start_over_with_extended;
- }
- if (has_extended_chars)
- ret = compile_extended_range (&p, pend, translate,
- syntax, rtab);
- else
-#endif /* MULE */
- ret = compile_range (&p, pend, translate, syntax, buf_end);
+ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p);
+
+ ret = compile_range (&p, pend, translate, syntax, buf_end);
+
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
@@ -2674,6 +2769,7 @@
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
{ /* Leave room for the null. */
char str[CHAR_CLASS_MAX_LENGTH + 1];
+ int ch = 0;
PATFETCH (c);
c1 = 0;
@@ -2683,14 +2779,14 @@
for (;;)
{
- /* #### This code is unused.
- Correctness is not checked after TRT
- table change. */
- PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
- break;
- str[c1++] = (char) c;
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
@@ -2699,21 +2795,9 @@
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
- int ch;
- re_bool is_alnum = STREQ (str, "alnum");
- re_bool is_alpha = STREQ (str, "alpha");
- re_bool is_blank = STREQ (str, "blank");
- re_bool is_cntrl = STREQ (str, "cntrl");
- re_bool is_digit = STREQ (str, "digit");
- re_bool is_graph = STREQ (str, "graph");
- re_bool is_lower = STREQ (str, "lower");
- re_bool is_print = STREQ (str, "print");
- re_bool is_punct = STREQ (str, "punct");
- re_bool is_space = STREQ (str, "space");
- re_bool is_upper = STREQ (str, "upper");
- re_bool is_xdigit = STREQ (str, "xdigit");
-
- if (!IS_CHAR_CLASS (str))
+ re_wctype_t cc = re_wctype (str);
+
+ if (cc == RECC_ERROR)
FREE_STACK_RETURN (REG_ECTYPE);
/* Throw away the ] at the end of the character
@@ -2722,26 +2806,20 @@
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
- {
- /* This was split into 3 if's to
- avoid an arbitrary limit in some compiler. */
- if ( (is_alnum && ISALNUM (ch))
- || (is_alpha && ISALPHA (ch))
- || (is_blank && ISBLANK (ch))
- || (is_cntrl && ISCNTRL (ch)))
- SET_EITHER_BIT (ch);
- if ( (is_digit && ISDIGIT (ch))
- || (is_graph && ISGRAPH (ch))
- || (is_lower && ISLOWER (ch))
- || (is_print && ISPRINT (ch)))
- SET_EITHER_BIT (ch);
- if ( (is_punct && ISPUNCT (ch))
- || (is_space && ISSPACE (ch))
- || (is_upper && ISUPPER (ch))
- || (is_xdigit && ISXDIGIT (ch)))
- SET_EITHER_BIT (ch);
- }
+#ifdef MULE
+ if (re_wctype_can_match_non_ascii (cc))
+ {
+ goto start_over_with_extended;
+ }
+#endif /* MULE */
+ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
+ {
+ if (re_iswctype (ch, cc))
+ {
+ SET_LIST_BIT (ch);
+ }
+ }
+
had_char_class = true;
}
else
@@ -2749,30 +2827,18 @@
c1++;
while (c1--)
PATUNFETCH;
- SET_EITHER_BIT ('[');
- SET_EITHER_BIT (':');
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
had_char_class = false;
}
}
else
{
had_char_class = false;
- SET_EITHER_BIT (c);
+ SET_LIST_BIT (c);
}
}
-#ifdef MULE
- if (has_extended_chars)
- {
- /* We have a range table, not a bit vector. */
- int bytes_needed =
- unified_range_table_bytes_needed (rtab);
- GET_BUFFER_SPACE (bytes_needed);
- unified_range_table_copy_data (rtab, buf_end);
- buf_end += unified_range_table_bytes_used (buf_end);
- break;
- }
-#endif /* MULE */
/* Discard any (non)matching list bytes that are all 0 at the
end of the map. Decrease the map-length byte too. */
while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0)
@@ -2781,6 +2847,163 @@
}
break;
+#ifdef MULE
+ start_over_with_extended:
+ {
+ REGISTER Lisp_Object rtab = Qnil;
+ Bitbyte flags = 0;
+ int bytes_needed = sizeof (flags);
+ re_bool had_char_class = false;
+
+ /* There are extended chars here, which means we need to use the
+ unified range-table format. */
+ if (buf_end[-2] == charset)
+ buf_end[-2] = charset_mule;
+ else
+ buf_end[-2] = charset_mule_not;
+ buf_end--;
+ p = p1; /* go back to the beginning of the charset, after
+ a possible ^. */
+ rtab = Vthe_lisp_rangetab;
+ Fclear_range_table (rtab);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) buf_end[-1] == charset_mule_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_RANGETAB_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+
+ SET_RANGETAB_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret;
+
+ ret = compile_extended_range (&p, pend, translate, syntax,
+ rtab);
+
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_extended_range (&p, pend, translate,
+ syntax, rtab);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ re_wctype_t cc = re_wctype (str);
+ reg_errcode_t ret = REG_NOERROR;
+
+ if (cc == RECC_ERROR)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ ret = compile_char_class (cc, rtab, &flags);
+
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_RANGETAB_BIT ('[');
+ SET_RANGETAB_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_RANGETAB_BIT (c);
+ }
+ }
+
+ bytes_needed += unified_range_table_bytes_needed (rtab);
+ GET_BUFFER_SPACE (bytes_needed);
+ *buf_end++ = flags;
+ unified_range_table_copy_data (rtab, buf_end);
+ buf_end += unified_range_table_bytes_used (buf_end);
+ break;
+ }
+#endif /* MULE */
case '(':
if (syntax & RE_NO_BK_PARENS)
@@ -3715,6 +3938,69 @@
return REG_NOERROR;
}
+static reg_errcode_t
+compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out)
+{
+ *flags_out |= re_wctype_to_bit (cc);
+
+ switch (cc)
+ {
+ case RECC_ASCII:
+ put_range_table (rtab, 0, 0x7f, Qt);
+ break;
+
+ case RECC_XDIGIT:
+ put_range_table (rtab, 'a', 'f', Qt);
+ put_range_table (rtab, 'A', 'f', Qt);
+ /* fallthrough */
+ case RECC_DIGIT:
+ put_range_table (rtab, '0', '9', Qt);
+ break;
+
+ case RECC_BLANK:
+ put_range_table (rtab, ' ', ' ', Qt);
+ put_range_table (rtab, '\t', '\t', Qt);
+ break;
+
+ case RECC_PRINT:
+ put_range_table (rtab, ' ', 0x7e, Qt);
+ put_range_table (rtab, 0x80, MOST_POSITIVE_FIXNUM, Qt);
+ break;
+
+ case RECC_GRAPH:
+ put_range_table (rtab, '!', 0x7e, Qt);
+ put_range_table (rtab, 0x80, MOST_POSITIVE_FIXNUM, Qt);
+ break;
+
+ case RECC_NONASCII:
+ case RECC_MULTIBYTE:
+ put_range_table (rtab, 0x80, MOST_POSITIVE_FIXNUM, Qt);
+ break;
+
+ case RECC_CNTRL:
+ put_range_table (rtab, 0x00, 0x1f, Qt);
+ break;
+
+ case RECC_UNIBYTE:
+ /* Never true in XEmacs. */
+ break;
+
+ /* The following all have their own bits in the class_bits argument to
+ charset_mule and charset_mule_not, they don't use the range table
+ information. */
+ case RECC_ALPHA:
+ case RECC_WORD:
+ case RECC_ALNUM: /* Equivalent to RECC_WORD */
+ case RECC_LOWER:
+ case RECC_PUNCT:
+ case RECC_SPACE:
+ case RECC_UPPER:
+ break;
+ }
+
+ return REG_NOERROR;
+}
+
#endif /* MULE */
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -3855,6 +4141,15 @@
{
int nentries;
int i;
+ Bitbyte flags = *p++;
+
+ if (flags)
+ {
+ /* We need to consult the syntax table, fastmap won't
+ work. */
+ bufp->can_be_null = 1;
+ goto done;
+ }
nentries = unified_range_table_nentries (p);
for (i = 0; i < nentries; i++)
@@ -3878,6 +4173,16 @@
set_itext_ichar (strr, last);
fastmap[*strr] = 1;
}
+ else if (MOST_POSITIVE_FIXNUM == last)
+ {
+ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all
+ non-ASCII characters. */
+ jj = 0x80;
+ while (jj < 0xA0)
+ {
+ fastmap[jj++] = 1;
+ }
+ }
}
}
break;
@@ -3887,6 +4192,15 @@
int nentries;
int i;
int smallest_prev = 0;
+ Bitbyte flags = *p++;
+
+ if (flags)
+ {
+ /* We need to consult the syntax table, fastmap won't
+ work. */
+ bufp->can_be_null = 1;
+ goto done;
+ }
nentries = unified_range_table_nentries (p);
for (i = 0; i < nentries; i++)
@@ -5416,15 +5730,27 @@
{
REGISTER Ichar c;
re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
+ Bitbyte class_bits = *p++;
DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
-
REGEX_PREFETCH ();
c = itext_ichar_fmt (d, fmt, lispobj);
c = RE_TRANSLATE (c); /* The character to match. */
- if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
- not_p = !not_p;
+ if ((class_bits &&
+ ((class_bits & BIT_ALPHA && ISALPHA (c))
+ || (class_bits & BIT_SPACE && ISSPACE (c))
+ || (class_bits & BIT_PUNCT && ISPUNCT (c))
+ || (class_bits & BIT_WORD && ISWORD (c))
+ || (TRANSLATE_P (translate) ?
+ (class_bits & (BIT_UPPER | BIT_LOWER)
+ && !NOCASEP (lispbuf, c))
+: ((class_bits & BIT_UPPER && ISUPPER (c))
+ || (class_bits & BIT_LOWER && ISLOWER (c))))))
+ || EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
+ {
+ not_p = !not_p;
+ }
p += unified_range_table_bytes_used (p);
diff -r 1d9f603e9125 -r 3f4a234f4672 src/regex.h
--- a/src/regex.h Sat Apr 21 09:41:27 2012 +0100
+++ b/src/regex.h Sat Apr 21 18:58:28 2012 +0100
@@ -546,6 +546,19 @@
extern int debug_regexps;
+typedef enum
+ {
+ RECC_ERROR = 0,
+ RECC_ALNUM, RECC_ALPHA, RECC_WORD,
+ RECC_GRAPH, RECC_PRINT,
+ RECC_LOWER, RECC_UPPER,
+ RECC_PUNCT, RECC_CNTRL,
+ RECC_DIGIT, RECC_XDIGIT,
+ RECC_BLANK, RECC_SPACE,
+ RECC_MULTIBYTE, RECC_NONASCII,
+ RECC_ASCII, RECC_UNIBYTE
+} re_wctype_t;
+
END_C_DECLS
#endif /* INCLUDED_regex_h_ */
diff -r 1d9f603e9125 -r 3f4a234f4672 tests/ChangeLog
--- a/tests/ChangeLog Sat Apr 21 09:41:27 2012 +0100
+++ b/tests/ChangeLog Sat Apr 21 18:58:28 2012 +0100
@@ -1,3 +1,13 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/regexp-tests.el:
+ * automated/regexp-tests.el (Assert-char-class):
+ Check that #'string-match errors correctly with an over-long
+ character class name.
+ Add tests for character class functionality that supports
+ non-ASCII characters. These tests expose bugs in GNU Emacs
+ 24.0.94.2, but pass under current XEmacs.
+
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
diff -r 1d9f603e9125 -r 3f4a234f4672 tests/automated/regexp-tests.el
--- a/tests/automated/regexp-tests.el Sat Apr 21 09:41:27 2012 +0100
+++ b/tests/automated/regexp-tests.el Sat Apr 21 18:58:28 2012 +0100
@@ -598,6 +598,14 @@
(Assert (eql (string-match "[\x7f\x81-\x9f]" "\x81") 0))
;; Test character classes
+
+;; This used not to error:
+(Check-Error-Message invalid-regexp "Invalid character class name"
+ (string-match "[[:alnum12345:]]" "a"))
+;; This alwayed errored, as long as character classes were turned on
+(Check-Error-Message invalid-regexp "Invalid character class name"
+ (string-match "[[:alnum1234:]]" "a"))
+
(macrolet
((Assert-char-class (class matching-char non-matching-char)
(if (and (not (featurep 'mule))
@@ -648,7 +656,21 @@
(Assert (null (string-match ,(concat "[^" class
(string non-matching-char) "]")
,(concat (string matching-char)
- (string non-matching-char))))))))
+ (string non-matching-char)))))))
+ (Assert-never-matching (class &rest characters)
+ (cons
+ 'progn
+ (mapcan #'(lambda (character)
+ (if (or (not (eq 'decode-char (car-safe character)))
+ (featurep 'mule))
+ `((Assert (null (string-match
+ ,(concat "[" class "]")
+ ,(string (eval character)))))
+ (Assert (eql (string-match
+ ,(concat "[^" class "]")
+ ,(string (eval character)))
+ 0)))))
+ characters))))
(Assert-char-class "[:alpha:]" ?a ?0)
(Assert-char-class "[:alpha:]" ?z ?9)
(Assert-char-class "[:alpha:]" ?A ?0)
@@ -657,6 +679,18 @@
(Assert-char-class "[:alpha:]" ?c ?\x09)
(Assert-char-class "[:alpha:]" ?d ?\ )
(Assert-char-class "[:alpha:]" ?e ?\x7f)
+ (Assert-char-class
+ "[:alpha:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:alpha:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x02)
+ (Assert-char-class
+ "[:alpha:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
(Assert-char-class "[:alnum:]" ?a ?.)
(Assert-char-class "[:alnum:]" ?z ?')
@@ -664,11 +698,46 @@
(Assert-char-class "[:alnum:]" ?Z ?!)
(Assert-char-class "[:alnum:]" ?0 ?,)
(Assert-char-class "[:alnum:]" ?9 ?$)
-
(Assert-char-class "[:alnum:]" ?b ?\x00)
(Assert-char-class "[:alnum:]" ?c ?\x09)
(Assert-char-class "[:alnum:]" ?d ?\ )
(Assert-char-class "[:alnum:]" ?e ?\x7f)
+ (Assert-char-class
+ "[:alnum:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:alnum:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x02)
+ (Assert-char-class
+ "[:alnum:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+
+ ;; Word is equivalent to alnum in this implementation.
+ (Assert-char-class "[:word:]" ?a ?.)
+ (Assert-char-class "[:word:]" ?z ?')
+ (Assert-char-class "[:word:]" ?A ?/)
+ (Assert-char-class "[:word:]" ?Z ?!)
+ (Assert-char-class "[:word:]" ?0 ?,)
+ (Assert-char-class "[:word:]" ?9 ?$)
+ (Assert-char-class "[:word:]" ?b ?\x00)
+ (Assert-char-class "[:word:]" ?c ?\x09)
+ (Assert-char-class "[:word:]" ?d ?\ )
+ (Assert-char-class "[:word:]" ?e ?\x7f)
+ (Assert-char-class
+ "[:word:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:word:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x02)
+ (Assert-char-class
+ "[:word:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
(let ((case-fold-search nil))
(Assert-char-class "[:upper:]" ?A ?a)
@@ -679,6 +748,14 @@
(Assert-char-class "[:upper:]" ?E ?\x09)
(Assert-char-class "[:upper:]" ?F ?\ )
(Assert-char-class "[:upper:]" ?G ?\x7f)
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:lower:]" ?a ?A)
(Assert-char-class "[:lower:]" ?z ?Z)
@@ -687,11 +764,17 @@
(Assert-char-class "[:lower:]" ?d ?\x00)
(Assert-char-class "[:lower:]" ?e ?\x09)
(Assert-char-class "[:lower:]" ?f ? )
- (Assert-char-class "[:lower:]" ?g ?\x7f))
+ (Assert-char-class "[:lower:]" ?g ?\x7f)
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x5357)));; kDefinition south; southern part; southward
(let ((case-fold-search t))
- ;; These currently fail, because we don't take into account the buffer's
- ;; case table.
(Assert-char-class "[:upper:]" ?a ?\x00)
(Assert-char-class "[:upper:]" ?z ?\x01)
(Assert-char-class "[:upper:]" ?b ?{)
@@ -700,7 +783,14 @@
(Assert-char-class "[:upper:]" ?e ?>)
(Assert-char-class "[:upper:]" ?f ?\ )
(Assert-char-class "[:upper:]" ?g ?\x7f)
-
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:lower:]" ?A ?\x00)
(Assert-char-class "[:lower:]" ?Z ?\x01)
(Assert-char-class "[:lower:]" ?B ?{)
@@ -708,7 +798,15 @@
(Assert-char-class "[:lower:]" ?D ?<)
(Assert-char-class "[:lower:]" ?E ?>)
(Assert-char-class "[:lower:]" ?F ?\ )
- (Assert-char-class "[:lower:]" ?G ?\x7F))
+ (Assert-char-class "[:lower:]" ?G ?\x7F)
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ (decode-char 'ucs #x5357)));; kDefinition south; southern part; southward
(Assert-char-class "[:digit:]" ?0 ?a)
(Assert-char-class "[:digit:]" ?9 ?z)
@@ -718,6 +816,30 @@
(Assert-char-class "[:digit:]" ?4 ?\x09)
(Assert-char-class "[:digit:]" ?5 ? )
(Assert-char-class "[:digit:]" ?6 ?\x7f)
+ (Assert-char-class
+ "[:digit:]" ?7
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+ (Assert-char-class
+ "[:digit:]" ?8
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:digit:]" ?9
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:digit:]" ?0
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:digit:]" ?1
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:digit:]" ?2
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:digit:]" ?3
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:digit:]" ?4
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:xdigit:]" ?0 ?g)
(Assert-char-class "[:xdigit:]" ?9 ?G)
@@ -729,6 +851,30 @@
(Assert-char-class "[:xdigit:]" ?4 ?\x09)
(Assert-char-class "[:xdigit:]" ?5 ?\x7f)
(Assert-char-class "[:xdigit:]" ?6 ?z)
+ (Assert-char-class
+ "[:xdigit:]" ?7
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+ (Assert-char-class
+ "[:xdigit:]" ?8
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:xdigit:]" ?9
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:xdigit:]" ?a
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:xdigit:]" ?B
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:xdigit:]" ?c
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:xdigit:]" ?D
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:xdigit:]" ?e
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:space:]" ?\ ?0)
(Assert-char-class "[:space:]" ?\t ?9)
@@ -738,6 +884,30 @@
(Assert-char-class "[:space:]" ?\ ?\x7f)
(Assert-char-class "[:space:]" ?\t ?a)
(Assert-char-class "[:space:]" ?\ ?z)
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:print:]" ?\ ?\x00)
(Assert-char-class "[:print:]" ?0 ?\x09)
@@ -747,6 +917,63 @@
(Assert-char-class "[:print:]" ?B ?\t)
(Assert-char-class "[:print:]" ?a ?\x03)
(Assert-char-class "[:print:]" ?z ?\x04)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0385) ;; GREEK DIALYTIKA TONOS
+ ?\x05)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ ?\x06)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ ?\x07)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x08)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ ?\x09)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0686) ;; ARABIC LETTER TCHEH
+ ?\x0a)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ ?\x0b)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x5357) ;; kDefinition south; southern part; southward
+ ?\x0c)
+
+ (Assert-char-class "[:graph:]" ?! ?\ )
+ (Assert-char-class "[:graph:]" ?0 ?\x09)
+ (Assert-char-class "[:graph:]" ?9 ?\x7f)
+ (Assert-char-class "[:graph:]" ?A ?\x01)
+ (Assert-char-class "[:graph:]" ?Z ?\x02)
+ (Assert-char-class "[:graph:]" ?B ?\t)
+ (Assert-char-class "[:graph:]" ?a ?\x03)
+ (Assert-char-class "[:graph:]" ?z ?\x04)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0385) ;; GREEK DIALYTIKA TONOS
+ ?\x05)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ ?\x06)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ ?\x07)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x08)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ ?\x09)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0686) ;; ARABIC LETTER TCHEH
+ ?\x0a)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ ?\x0b)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x5357) ;; kDefinition south; southern part; southward
+ ?\x0c)
(Assert-char-class "[:punct:]" ?\( ?0)
(Assert-char-class "[:punct:]" ?. ?9)
@@ -757,4 +984,102 @@
(Assert-char-class "[:punct:]" ?< ?\x09)
(Assert-char-class "[:punct:]" ?> ?\x7f)
(Assert-char-class "[:punct:]" ?= ?a)
- (Assert-char-class "[:punct:]" ?\? ?z))
+ (Assert-char-class "[:punct:]" ?\? ?z)
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x0385) ;; GREEK DIALYTIKA TONOS
+ ?a)
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x20af) ;; DRACHMA SIGN
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x00a7) ;; SECTION SIGN
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x00a8) ;; DIAERESIS
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x0384) ;; GREEK TONOS
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x00b7) ;; MIDDLE DOT
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ ?x)
+ (Assert-char-class
+ "[:punct:]"
+ ?=
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
+
+ (Assert-char-class "[:ascii:]" ?a (decode-char 'ucs #x00a7)) ;; SECTION SIGN
+ (Assert-char-class "[:ascii:]" ?b (decode-char 'ucs #x00a8)) ;; DIAERESIS
+ (Assert-char-class "[:ascii:]" ?c (decode-char 'ucs #x00b7)) ;; MIDDLE DOT
+ (Assert-char-class "[:ascii:]" ?d (decode-char 'ucs #x0384)) ;; GREEK TONOS
+ (Assert-char-class
+ "[:ascii:]" ?\x00 (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:ascii:]" ?\x01 (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:ascii:]" ?\t (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:ascii:]" ?A (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:ascii:]" ?B (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:ascii:]" ?C (decode-char 'ucs #x20af)) ;; DRACHMA SIGN
+ (Assert-char-class
+ "[:ascii:]" ?\x7f (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x00a7) ?a) ;; SECTION SIGN
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x00a8) ?b) ;; DIAERESIS
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x00b7) ?c) ;; MIDDLE DOT
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0384) ?d) ;; GREEK TONOS
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0392) ?\x00) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x03B2) ?\x01) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0410) ?\t) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0430) ?A) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0686) ?B) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x20af) ?C) ;; DRACHMA SIGN
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x2116) ?\x7f) ;; NUMERO SIGN
+
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x00a7) ?a) ;; SECTION SIGN
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x00a8) ?b) ;; DIAERESIS
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x00b7) ?c) ;; MIDDLE DOT
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x0384) ?d) ;; GREEK TONOS
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x0392)
+ ?\x00) ;; GREEK CAPITAL LETTER BETA
+
+ (Assert-never-matching
+ "[:unibyte:]"
+ ?\x01 ?\t ?A ?B ?C ?\x7f
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x0686) ;; ARABIC LETTER TCHEH
+ (decode-char 'ucs #x20af) ;; DRACHMA SIGN
+ (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ (decode-char 'ucs #x5357))) ;; kDefinition south; southern part; southward
+
--
‘Iodine deficiency was endemic in parts of the UK until, through what has been
described as “an unplanned and accidental public health triumph”, iodine was
added to cattle feed to improve milk production in the 1930s.’
(EN Pearce, Lancet, June 2011)
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
[COMMIT] Turn on character classes in regex.c by default; test them in regexp-tests.el
12 years, 9 months
Aidan Kehoe
APPROVE COMMIT
NOTE: This patch has been committed
# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1334997687 -3600
# Node ID 1d9f603e9125575ac67f9cff0f2159a046d99d3e
# Parent 7aa144d1404b8ce10cbbb3dfb0faf0e31f04b475
Turn on character classes in regex.c by default; test them in regexp-tests.el
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* regex.h (RE_SYNTAX_EMACS):
Turn on character classes ([:alnum:] and friends) by default. This
implementation is incomplete, am working on a version that handles
non-ASCII characters correctly.
tests/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
* automated/regexp-tests.el (Assert-char-class):
Test the character classes functionality that was always in
regex.c but that has only just been turned on. These tests pass on
GNU Emacs 24.0.94.2.
diff -r 7aa144d1404b -r 1d9f603e9125 src/ChangeLog
--- a/src/ChangeLog Sat Apr 14 21:18:11 2012 +0100
+++ b/src/ChangeLog Sat Apr 21 09:41:27 2012 +0100
@@ -1,3 +1,10 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * regex.h (RE_SYNTAX_EMACS):
+ Turn on character classes ([:alnum:] and friends) by default. This
+ implementation is incomplete, am working on a version that handles
+ non-ASCII characters correctly.
+
2012-02-12 Vin Shelton <acs(a)xemacs.org>
* sysproc.h: As of Cygwin 1.7.10, /usr/include/process.h has moved
diff -r 7aa144d1404b -r 1d9f603e9125 src/regex.h
--- a/src/regex.h Sat Apr 14 21:18:11 2012 +0100
+++ b/src/regex.h Sat Apr 21 09:41:27 2012 +0100
@@ -193,7 +193,7 @@
(The [[[ comments delimit what gets put into the Texinfo file, so
don't delete them!) */
/* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS RE_INTERVALS
+#define RE_SYNTAX_EMACS (RE_INTERVALS | RE_CHAR_CLASSES)
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
diff -r 7aa144d1404b -r 1d9f603e9125 tests/ChangeLog
--- a/tests/ChangeLog Sat Apr 14 21:18:11 2012 +0100
+++ b/tests/ChangeLog Sat Apr 21 09:41:27 2012 +0100
@@ -1,3 +1,11 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/regexp-tests.el:
+ * automated/regexp-tests.el (Assert-char-class):
+ Test the character classes functionality that was always in
+ regex.c but that has only just been turned on. These tests pass on
+ GNU Emacs 24.0.94.2.
+
2012-01-14 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/lisp-tests.el:
diff -r 7aa144d1404b -r 1d9f603e9125 tests/automated/regexp-tests.el
--- a/tests/automated/regexp-tests.el Sat Apr 14 21:18:11 2012 +0100
+++ b/tests/automated/regexp-tests.el Sat Apr 21 09:41:27 2012 +0100
@@ -596,3 +596,165 @@
(Assert (eql (string-match "[\x7f\x80\x9f]" "\x80") 0))
(Assert (eql (string-match "[\x7e\x80-\x9f]" "\x80") 0))
(Assert (eql (string-match "[\x7f\x81-\x9f]" "\x81") 0))
+
+;; Test character classes
+(macrolet
+ ((Assert-char-class (class matching-char non-matching-char)
+ (if (and (not (featurep 'mule))
+ (or (eq (car-safe matching-char) 'decode-char)
+ (eq (car-safe non-matching-char) 'decode-char)))
+ ;; Don't attempt expansion if these clauses require Mule and we
+ ;; don't have it.
+ (return-from Assert-char-class nil)
+ (setq matching-char (eval matching-char)
+ non-matching-char (eval non-matching-char)))
+ `(progn
+ (Assert (eql (string-match ,(concat "[" class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[" class class class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[^" class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[^" class class class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[" class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 1))
+ (Assert (eql (string-match ,(concat "[" class class class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 1))
+ (Assert (eql (string-match ,(concat "[^" class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 1))
+ (Assert (eql (string-match ,(concat "[^" class class class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 1))
+ (Assert (null (string-match ,(concat "[" class "]")
+ ,(string non-matching-char))))
+ (Assert (null (string-match ,(concat "[^" class "]")
+ ,(string matching-char))))
+ (Assert (null (string-match ,(concat "[^" class
+ (string non-matching-char) "]")
+ ,(concat (string matching-char)
+ (string non-matching-char))))))))
+ (Assert-char-class "[:alpha:]" ?a ?0)
+ (Assert-char-class "[:alpha:]" ?z ?9)
+ (Assert-char-class "[:alpha:]" ?A ?0)
+ (Assert-char-class "[:alpha:]" ?Z ?9)
+ (Assert-char-class "[:alpha:]" ?b ?\x00)
+ (Assert-char-class "[:alpha:]" ?c ?\x09)
+ (Assert-char-class "[:alpha:]" ?d ?\ )
+ (Assert-char-class "[:alpha:]" ?e ?\x7f)
+
+ (Assert-char-class "[:alnum:]" ?a ?.)
+ (Assert-char-class "[:alnum:]" ?z ?')
+ (Assert-char-class "[:alnum:]" ?A ?/)
+ (Assert-char-class "[:alnum:]" ?Z ?!)
+ (Assert-char-class "[:alnum:]" ?0 ?,)
+ (Assert-char-class "[:alnum:]" ?9 ?$)
+
+ (Assert-char-class "[:alnum:]" ?b ?\x00)
+ (Assert-char-class "[:alnum:]" ?c ?\x09)
+ (Assert-char-class "[:alnum:]" ?d ?\ )
+ (Assert-char-class "[:alnum:]" ?e ?\x7f)
+
+ (let ((case-fold-search nil))
+ (Assert-char-class "[:upper:]" ?A ?a)
+ (Assert-char-class "[:upper:]" ?Z ?z)
+ (Assert-char-class "[:upper:]" ?B ?0)
+ (Assert-char-class "[:upper:]" ?C ?9)
+ (Assert-char-class "[:upper:]" ?D ?\x00)
+ (Assert-char-class "[:upper:]" ?E ?\x09)
+ (Assert-char-class "[:upper:]" ?F ?\ )
+ (Assert-char-class "[:upper:]" ?G ?\x7f)
+
+ (Assert-char-class "[:lower:]" ?a ?A)
+ (Assert-char-class "[:lower:]" ?z ?Z)
+ (Assert-char-class "[:lower:]" ?b ?0)
+ (Assert-char-class "[:lower:]" ?c ?9)
+ (Assert-char-class "[:lower:]" ?d ?\x00)
+ (Assert-char-class "[:lower:]" ?e ?\x09)
+ (Assert-char-class "[:lower:]" ?f ? )
+ (Assert-char-class "[:lower:]" ?g ?\x7f))
+
+ (let ((case-fold-search t))
+ ;; These currently fail, because we don't take into account the buffer's
+ ;; case table.
+ (Assert-char-class "[:upper:]" ?a ?\x00)
+ (Assert-char-class "[:upper:]" ?z ?\x01)
+ (Assert-char-class "[:upper:]" ?b ?{)
+ (Assert-char-class "[:upper:]" ?c ?})
+ (Assert-char-class "[:upper:]" ?d ?<)
+ (Assert-char-class "[:upper:]" ?e ?>)
+ (Assert-char-class "[:upper:]" ?f ?\ )
+ (Assert-char-class "[:upper:]" ?g ?\x7f)
+
+ (Assert-char-class "[:lower:]" ?A ?\x00)
+ (Assert-char-class "[:lower:]" ?Z ?\x01)
+ (Assert-char-class "[:lower:]" ?B ?{)
+ (Assert-char-class "[:lower:]" ?C ?})
+ (Assert-char-class "[:lower:]" ?D ?<)
+ (Assert-char-class "[:lower:]" ?E ?>)
+ (Assert-char-class "[:lower:]" ?F ?\ )
+ (Assert-char-class "[:lower:]" ?G ?\x7F))
+
+ (Assert-char-class "[:digit:]" ?0 ?a)
+ (Assert-char-class "[:digit:]" ?9 ?z)
+ (Assert-char-class "[:digit:]" ?1 ?A)
+ (Assert-char-class "[:digit:]" ?2 ?Z)
+ (Assert-char-class "[:digit:]" ?3 ?\x00)
+ (Assert-char-class "[:digit:]" ?4 ?\x09)
+ (Assert-char-class "[:digit:]" ?5 ? )
+ (Assert-char-class "[:digit:]" ?6 ?\x7f)
+
+ (Assert-char-class "[:xdigit:]" ?0 ?g)
+ (Assert-char-class "[:xdigit:]" ?9 ?G)
+ (Assert-char-class "[:xdigit:]" ?A ?{)
+ (Assert-char-class "[:xdigit:]" ?a ?})
+ (Assert-char-class "[:xdigit:]" ?1 ? )
+ (Assert-char-class "[:xdigit:]" ?2 ?Z)
+ (Assert-char-class "[:xdigit:]" ?3 ?\x00)
+ (Assert-char-class "[:xdigit:]" ?4 ?\x09)
+ (Assert-char-class "[:xdigit:]" ?5 ?\x7f)
+ (Assert-char-class "[:xdigit:]" ?6 ?z)
+
+ (Assert-char-class "[:space:]" ?\ ?0)
+ (Assert-char-class "[:space:]" ?\t ?9)
+ (Assert-char-class "[:space:]" ?\ ?A)
+ (Assert-char-class "[:space:]" ?\t ?Z)
+ (Assert-char-class "[:space:]" ?\ ?\x00)
+ (Assert-char-class "[:space:]" ?\ ?\x7f)
+ (Assert-char-class "[:space:]" ?\t ?a)
+ (Assert-char-class "[:space:]" ?\ ?z)
+
+ (Assert-char-class "[:print:]" ?\ ?\x00)
+ (Assert-char-class "[:print:]" ?0 ?\x09)
+ (Assert-char-class "[:print:]" ?9 ?\x7f)
+ (Assert-char-class "[:print:]" ?A ?\x01)
+ (Assert-char-class "[:print:]" ?Z ?\x02)
+ (Assert-char-class "[:print:]" ?B ?\t)
+ (Assert-char-class "[:print:]" ?a ?\x03)
+ (Assert-char-class "[:print:]" ?z ?\x04)
+
+ (Assert-char-class "[:punct:]" ?\( ?0)
+ (Assert-char-class "[:punct:]" ?. ?9)
+ (Assert-char-class "[:punct:]" ?{ ?A)
+ (Assert-char-class "[:punct:]" ?} ?Z)
+ (Assert-char-class "[:punct:]" ?: ?\t)
+ (Assert-char-class "[:punct:]" ?\; ?\x00)
+ (Assert-char-class "[:punct:]" ?< ?\x09)
+ (Assert-char-class "[:punct:]" ?> ?\x7f)
+ (Assert-char-class "[:punct:]" ?= ?a)
+ (Assert-char-class "[:punct:]" ?\? ?z))
--
‘Iodine deficiency was endemic in parts of the UK until, through what has been
described as “an unplanned and accidental public health triumph”, iodine was
added to cattle feed to improve milk production in the 1930s.’
(EN Pearce, Lancet, June 2011)
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/XEmacs: 2 new changesets
12 years, 9 months
Bitbucket
2 new commits in XEmacs:
https://bitbucket.org/xemacs/xemacs/changeset/1d9f603e9125/
changeset: 1d9f603e9125
user: kehoea
date: 2012-04-21 10:41:27
summary: Turn on character classes in regex.c by default; test them in regexp-tests.el
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* regex.h (RE_SYNTAX_EMACS):
Turn on character classes ([:alnum:] and friends) by default. This
implementation is incomplete, am working on a version that handles
non-ASCII characters correctly.
tests/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
* automated/regexp-tests.el (Assert-char-class):
Test the character classes functionality that was always in
regex.c but that has only just been turned on. These tests pass on
GNU Emacs 24.0.94.2.
affected #: 4 files
diff -r 7aa144d1404b8ce10cbbb3dfb0faf0e31f04b475 -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,10 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * regex.h (RE_SYNTAX_EMACS):
+ Turn on character classes ([:alnum:] and friends) by default. This
+ implementation is incomplete, am working on a version that handles
+ non-ASCII characters correctly.
+
2012-02-12 Vin Shelton <acs(a)xemacs.org>
* sysproc.h: As of Cygwin 1.7.10, /usr/include/process.h has moved
diff -r 7aa144d1404b8ce10cbbb3dfb0faf0e31f04b475 -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e src/regex.h
--- a/src/regex.h
+++ b/src/regex.h
@@ -193,7 +193,7 @@
(The [[[ comments delimit what gets put into the Texinfo file, so
don't delete them!) */
/* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS RE_INTERVALS
+#define RE_SYNTAX_EMACS (RE_INTERVALS | RE_CHAR_CLASSES)
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
diff -r 7aa144d1404b8ce10cbbb3dfb0faf0e31f04b475 -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e tests/ChangeLog
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,11 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/regexp-tests.el:
+ * automated/regexp-tests.el (Assert-char-class):
+ Test the character classes functionality that was always in
+ regex.c but that has only just been turned on. These tests pass on
+ GNU Emacs 24.0.94.2.
+
2012-01-14 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/lisp-tests.el:
diff -r 7aa144d1404b8ce10cbbb3dfb0faf0e31f04b475 -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e tests/automated/regexp-tests.el
--- a/tests/automated/regexp-tests.el
+++ b/tests/automated/regexp-tests.el
@@ -596,3 +596,165 @@
(Assert (eql (string-match "[\x7f\x80\x9f]" "\x80") 0))
(Assert (eql (string-match "[\x7e\x80-\x9f]" "\x80") 0))
(Assert (eql (string-match "[\x7f\x81-\x9f]" "\x81") 0))
+
+;; Test character classes
+(macrolet
+ ((Assert-char-class (class matching-char non-matching-char)
+ (if (and (not (featurep 'mule))
+ (or (eq (car-safe matching-char) 'decode-char)
+ (eq (car-safe non-matching-char) 'decode-char)))
+ ;; Don't attempt expansion if these clauses require Mule and we
+ ;; don't have it.
+ (return-from Assert-char-class nil)
+ (setq matching-char (eval matching-char)
+ non-matching-char (eval non-matching-char)))
+ `(progn
+ (Assert (eql (string-match ,(concat "[" class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[" class class class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[^" class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[^" class class class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 0))
+ (Assert (eql (string-match ,(concat "[" class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 1))
+ (Assert (eql (string-match ,(concat "[" class class class "]")
+ ,(concat (string non-matching-char)
+ (string matching-char)))
+ 1))
+ (Assert (eql (string-match ,(concat "[^" class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 1))
+ (Assert (eql (string-match ,(concat "[^" class class class "]")
+ ,(concat (string matching-char)
+ (string non-matching-char)))
+ 1))
+ (Assert (null (string-match ,(concat "[" class "]")
+ ,(string non-matching-char))))
+ (Assert (null (string-match ,(concat "[^" class "]")
+ ,(string matching-char))))
+ (Assert (null (string-match ,(concat "[^" class
+ (string non-matching-char) "]")
+ ,(concat (string matching-char)
+ (string non-matching-char))))))))
+ (Assert-char-class "[:alpha:]" ?a ?0)
+ (Assert-char-class "[:alpha:]" ?z ?9)
+ (Assert-char-class "[:alpha:]" ?A ?0)
+ (Assert-char-class "[:alpha:]" ?Z ?9)
+ (Assert-char-class "[:alpha:]" ?b ?\x00)
+ (Assert-char-class "[:alpha:]" ?c ?\x09)
+ (Assert-char-class "[:alpha:]" ?d ?\ )
+ (Assert-char-class "[:alpha:]" ?e ?\x7f)
+
+ (Assert-char-class "[:alnum:]" ?a ?.)
+ (Assert-char-class "[:alnum:]" ?z ?')
+ (Assert-char-class "[:alnum:]" ?A ?/)
+ (Assert-char-class "[:alnum:]" ?Z ?!)
+ (Assert-char-class "[:alnum:]" ?0 ?,)
+ (Assert-char-class "[:alnum:]" ?9 ?$)
+
+ (Assert-char-class "[:alnum:]" ?b ?\x00)
+ (Assert-char-class "[:alnum:]" ?c ?\x09)
+ (Assert-char-class "[:alnum:]" ?d ?\ )
+ (Assert-char-class "[:alnum:]" ?e ?\x7f)
+
+ (let ((case-fold-search nil))
+ (Assert-char-class "[:upper:]" ?A ?a)
+ (Assert-char-class "[:upper:]" ?Z ?z)
+ (Assert-char-class "[:upper:]" ?B ?0)
+ (Assert-char-class "[:upper:]" ?C ?9)
+ (Assert-char-class "[:upper:]" ?D ?\x00)
+ (Assert-char-class "[:upper:]" ?E ?\x09)
+ (Assert-char-class "[:upper:]" ?F ?\ )
+ (Assert-char-class "[:upper:]" ?G ?\x7f)
+
+ (Assert-char-class "[:lower:]" ?a ?A)
+ (Assert-char-class "[:lower:]" ?z ?Z)
+ (Assert-char-class "[:lower:]" ?b ?0)
+ (Assert-char-class "[:lower:]" ?c ?9)
+ (Assert-char-class "[:lower:]" ?d ?\x00)
+ (Assert-char-class "[:lower:]" ?e ?\x09)
+ (Assert-char-class "[:lower:]" ?f ? )
+ (Assert-char-class "[:lower:]" ?g ?\x7f))
+
+ (let ((case-fold-search t))
+ ;; These currently fail, because we don't take into account the buffer's
+ ;; case table.
+ (Assert-char-class "[:upper:]" ?a ?\x00)
+ (Assert-char-class "[:upper:]" ?z ?\x01)
+ (Assert-char-class "[:upper:]" ?b ?{)
+ (Assert-char-class "[:upper:]" ?c ?})
+ (Assert-char-class "[:upper:]" ?d ?<)
+ (Assert-char-class "[:upper:]" ?e ?>)
+ (Assert-char-class "[:upper:]" ?f ?\ )
+ (Assert-char-class "[:upper:]" ?g ?\x7f)
+
+ (Assert-char-class "[:lower:]" ?A ?\x00)
+ (Assert-char-class "[:lower:]" ?Z ?\x01)
+ (Assert-char-class "[:lower:]" ?B ?{)
+ (Assert-char-class "[:lower:]" ?C ?})
+ (Assert-char-class "[:lower:]" ?D ?<)
+ (Assert-char-class "[:lower:]" ?E ?>)
+ (Assert-char-class "[:lower:]" ?F ?\ )
+ (Assert-char-class "[:lower:]" ?G ?\x7F))
+
+ (Assert-char-class "[:digit:]" ?0 ?a)
+ (Assert-char-class "[:digit:]" ?9 ?z)
+ (Assert-char-class "[:digit:]" ?1 ?A)
+ (Assert-char-class "[:digit:]" ?2 ?Z)
+ (Assert-char-class "[:digit:]" ?3 ?\x00)
+ (Assert-char-class "[:digit:]" ?4 ?\x09)
+ (Assert-char-class "[:digit:]" ?5 ? )
+ (Assert-char-class "[:digit:]" ?6 ?\x7f)
+
+ (Assert-char-class "[:xdigit:]" ?0 ?g)
+ (Assert-char-class "[:xdigit:]" ?9 ?G)
+ (Assert-char-class "[:xdigit:]" ?A ?{)
+ (Assert-char-class "[:xdigit:]" ?a ?})
+ (Assert-char-class "[:xdigit:]" ?1 ? )
+ (Assert-char-class "[:xdigit:]" ?2 ?Z)
+ (Assert-char-class "[:xdigit:]" ?3 ?\x00)
+ (Assert-char-class "[:xdigit:]" ?4 ?\x09)
+ (Assert-char-class "[:xdigit:]" ?5 ?\x7f)
+ (Assert-char-class "[:xdigit:]" ?6 ?z)
+
+ (Assert-char-class "[:space:]" ?\ ?0)
+ (Assert-char-class "[:space:]" ?\t ?9)
+ (Assert-char-class "[:space:]" ?\ ?A)
+ (Assert-char-class "[:space:]" ?\t ?Z)
+ (Assert-char-class "[:space:]" ?\ ?\x00)
+ (Assert-char-class "[:space:]" ?\ ?\x7f)
+ (Assert-char-class "[:space:]" ?\t ?a)
+ (Assert-char-class "[:space:]" ?\ ?z)
+
+ (Assert-char-class "[:print:]" ?\ ?\x00)
+ (Assert-char-class "[:print:]" ?0 ?\x09)
+ (Assert-char-class "[:print:]" ?9 ?\x7f)
+ (Assert-char-class "[:print:]" ?A ?\x01)
+ (Assert-char-class "[:print:]" ?Z ?\x02)
+ (Assert-char-class "[:print:]" ?B ?\t)
+ (Assert-char-class "[:print:]" ?a ?\x03)
+ (Assert-char-class "[:print:]" ?z ?\x04)
+
+ (Assert-char-class "[:punct:]" ?\( ?0)
+ (Assert-char-class "[:punct:]" ?. ?9)
+ (Assert-char-class "[:punct:]" ?{ ?A)
+ (Assert-char-class "[:punct:]" ?} ?Z)
+ (Assert-char-class "[:punct:]" ?: ?\t)
+ (Assert-char-class "[:punct:]" ?\; ?\x00)
+ (Assert-char-class "[:punct:]" ?< ?\x09)
+ (Assert-char-class "[:punct:]" ?> ?\x7f)
+ (Assert-char-class "[:punct:]" ?= ?a)
+ (Assert-char-class "[:punct:]" ?\? ?z))
https://bitbucket.org/xemacs/xemacs/changeset/3f4a234f4672/
changeset: 3f4a234f4672
user: kehoea
date: 2012-04-21 19:58:28
summary: Support non-ASCII correctly in character classes, test this.
src/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
Support non-ASCII correctly in character classes ([:alnum:] and
friends).
* regex.c:
* regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends
independent of the locale, since we want them to be consistent in
XEmacs.
* regex.c (print_partial_compiled_pattern): Print the flags for
charset_mule; don't print non-ASCII as the character values in
ranges, this breaks with locales.
* regex.c (enum):
Define various flags the charset_mule and charset_mule_not opcodes
can now take.
* regex.c (CHAR_CLASS_MAX_LENGTH): Update this.
* regex.c (re_iswctype, re_wctype): New, from GNU.
* regex.c (re_wctype_can_match_non_ascii): New; used when deciding
on whether to use charset_mule or the ASCII-only regex character
set opcode.
* regex.c (regex_compile):
Error correctly on long, non-existent character class names.
Break out the handling of charsets that can match non-ASCII into a
separate clause. Use compile_char_class when compiling character
classes.
* regex.c (compile_char_class): New. Used in regex_compile when
compiling character sets that may match non-ASCII.
* regex.c (re_compile_fastmap):
If there are flags set for charset_mule or charset_mule_not, we
can't use the fastmap (since we need to check syntax table values
that aren't available there).
* regex.c (re_match_2_internal):
Check the new flags passed to the charset_mule{,_not} opcode,
observe them if appropriate.
* regex.h:
* regex.h (enum):
Expose re_wctype_t here, imported from GNU.
tests/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
* automated/regexp-tests.el (Assert-char-class):
Check that #'string-match errors correctly with an over-long
character class name.
Add tests for character class functionality that supports
non-ASCII characters. These tests expose bugs in GNU Emacs
24.0.94.2, but pass under current XEmacs.
affected #: 5 files
diff -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e -r 3f4a234f4672ab40f61811656bc674bcd80664db src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,41 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ Support non-ASCII correctly in character classes ([:alnum:] and
+ friends).
+
+ * regex.c:
+ * regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends
+ independent of the locale, since we want them to be consistent in
+ XEmacs.
+ * regex.c (print_partial_compiled_pattern): Print the flags for
+ charset_mule; don't print non-ASCII as the character values in
+ ranges, this breaks with locales.
+ * regex.c (enum):
+ Define various flags the charset_mule and charset_mule_not opcodes
+ can now take.
+ * regex.c (CHAR_CLASS_MAX_LENGTH): Update this.
+ * regex.c (re_iswctype, re_wctype): New, from GNU.
+ * regex.c (re_wctype_can_match_non_ascii): New; used when deciding
+ on whether to use charset_mule or the ASCII-only regex character
+ set opcode.
+ * regex.c (regex_compile):
+ Error correctly on long, non-existent character class names.
+ Break out the handling of charsets that can match non-ASCII into a
+ separate clause. Use compile_char_class when compiling character
+ classes.
+ * regex.c (compile_char_class): New. Used in regex_compile when
+ compiling character sets that may match non-ASCII.
+ * regex.c (re_compile_fastmap):
+ If there are flags set for charset_mule or charset_mule_not, we
+ can't use the fastmap (since we need to check syntax table values
+ that aren't available there).
+ * regex.c (re_match_2_internal):
+ Check the new flags passed to the charset_mule{,_not} opcode,
+ observe them if appropriate.
+ * regex.h:
+ * regex.h (enum):
+ Expose re_wctype_t here, imported from GNU.
+
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* regex.h (RE_SYNTAX_EMACS):
diff -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e -r 3f4a234f4672ab40f61811656bc674bcd80664db src/regex.c
--- a/src/regex.c
+++ b/src/regex.c
@@ -178,53 +178,91 @@
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
-/* Jim Meyering writes:
-
- "... Some ctype macros are valid only for character codes that
- isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
- using /bin/cc or gcc but without giving an ansi option). So, all
- ctype uses should be through macros like ISPRINT... If
- STDC_HEADERS is defined, then autoconf has verified that the ctype
- macros don't need to be guarded with references to isascii. ...
- Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding." */
-
-#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
-#define ISASCII_1(c) 1
+#ifdef emacs
+
+/* 1 if C is an ASCII character. */
+#define ISASCII(c) ((c) < 0x80)
+
+/* 1 if C is a unibyte character. */
+#define ISUNIBYTE(c) 0
+
+/* The Emacs definitions should not be directly affected by locales. */
+
+/* In Emacs, these are only used for single-byte characters. */
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f') \
+ || ((c) >= 'A' && (c) <= 'F'))
+
+/* This is only used for single-byte characters. */
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+
+/* The rest must handle multibyte characters. */
+
+#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f)
+#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c))
+#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z') \
+ || ((c) >= 'A' && (c) <= 'Z')) \
+ : ISWORD (c))
+#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c))
+
+#define ISLOWER(c) LOWERCASEP (lispbuf, c)
+
+#define ISPUNCT(c) (ISASCII (c) \
+ ? ((c) > ' ' && (c) < 0x7F \
+ && !(((c) >= 'a' && (c) <= 'z') \
+ || ((c) >= 'A' && (c) <= 'Z') \
+ || ((c) >= '0' && (c) <= '9'))) \
+ : !ISWORD (c))
+
+#define ISSPACE(c) \
+ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace)
+
+#define ISUPPER(c) UPPERCASEP (lispbuf, c)
+
+#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword)
+
+#else /* not emacs */
+
+/* 1 if C is an ASCII character. */
+#define ISASCII(c) ((c) < 0200)
+
+/* 1 if C is a unibyte character. */
+#define ISUNIBYTE(c) 0
+
+#ifdef isblank
+# define ISBLANK(c) isblank (c)
#else
-#define ISASCII_1(c) isascii(c)
-#endif
-
-#ifdef MULE
-/* The IS*() macros can be passed any character, including an extended
- one. We need to make sure there are no crashes, which would occur
- otherwise due to out-of-bounds array references. */
-#define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
-#else
-#define ISASCII(c) ISASCII_1 (c)
-#endif /* MULE */
-
-#ifdef isblank
-#define ISBLANK(c) (ISASCII (c) && isblank (c))
-#else
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) isgraph (c)
#else
-#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (isprint (c) && !isspace (c))
#endif
-#define ISPRINT(c) (ISASCII (c) && isprint (c))
-#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-#define ISALNUM(c) (ISASCII (c) && isalnum (c))
-#define ISALPHA(c) (ISASCII (c) && isalpha (c))
-#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-#define ISLOWER(c) (ISASCII (c) && islower (c))
-#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-#define ISSPACE(c) (ISASCII (c) && isspace (c))
-#define ISUPPER(c) (ISASCII (c) && isupper (c))
-#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+/* Solaris defines ISPRINT so we must undefine it first. */
+#undef ISPRINT
+#define ISPRINT(c) isprint (c)
+#define ISDIGIT(c) isdigit (c)
+#define ISALNUM(c) isalnum (c)
+#define ISALPHA(c) isalpha (c)
+#define ISCNTRL(c) iscntrl (c)
+#define ISLOWER(c) islower (c)
+#define ISPUNCT(c) ispunct (c)
+#define ISSPACE(c) isspace (c)
+#define ISUPPER(c) isupper (c)
+#define ISXDIGIT(c) isxdigit (c)
+
+#define ISWORD(c) ISALPHA (c)
+
+#ifdef _tolower
+# define TOLOWER(c) _tolower (c)
+#else
+# define TOLOWER(c) tolower (c)
+#endif
+
+#endif /* emacs */
#ifndef NULL
#define NULL (void *)0
@@ -913,6 +951,7 @@
printf ("/charset_mule [%s",
(re_opcode_t) *(p - 1) == charset_mule_not ? "^" : "");
+ printf (" flags: 0x%02x ", *p++);
nentries = unified_range_table_nentries (p);
for (i = 0; i < nentries; i++)
{
@@ -921,14 +960,14 @@
unified_range_table_get_range (p, i, &first, &last,
&dummy_val);
- if (first < 0x100)
+ if (first < 0x80)
putchar (first);
else
printf ("(0x%lx)", (long)first);
if (first != last)
{
putchar ('-');
- if (last < 0x100)
+ if (last < 0x80)
putchar (last);
else
printf ("(0x%lx)", (long)last);
@@ -1974,6 +2013,22 @@
/* The next available element. */
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+/* Bits used to implement the multibyte-part of the various character
+ classes such as [:alnum:] in a charset's range table. XEmacs; use an
+ enum, so they're visible in the debugger. */
+enum
+{
+ BIT_WORD = (1 << 0),
+ BIT_LOWER = (1 << 1),
+ BIT_PUNCT = (1 << 2),
+ BIT_SPACE = (1 << 3),
+ BIT_UPPER = (1 << 4),
+ /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII
+ (possible matches) in charset_mule. [:alpha:] matches all characters
+ with word syntax, with the exception of [0-9]. We don't need
+ BIT_MULTIBYTE. */
+ BIT_ALPHA = (1 << 5)
+};
/* Set the bit for character C in a bit vector. */
#define SET_LIST_BIT(c) \
@@ -1985,22 +2040,8 @@
/* Set the "bit" for character C in a range table. */
#define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
-/* Set the "bit" for character c in the appropriate table. */
-#define SET_EITHER_BIT(c) \
- do { \
- if (has_extended_chars) \
- SET_RANGETAB_BIT (c); \
- else \
- SET_LIST_BIT (c); \
- } while (0)
-
-#else /* not MULE */
-
-#define SET_EITHER_BIT(c) SET_LIST_BIT (c)
-
#endif
-
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
{ if (p != pend) \
@@ -2018,15 +2059,110 @@
} \
}
-#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
-
-#define IS_CHAR_CLASS(string) \
- (STREQ (string, "alpha") || STREQ (string, "upper") \
- || STREQ (string, "lower") || STREQ (string, "digit") \
- || STREQ (string, "alnum") || STREQ (string, "xdigit") \
- || STREQ (string, "space") || STREQ (string, "print") \
- || STREQ (string, "punct") || STREQ (string, "graph") \
- || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
+
+/* Map a string to the char class it names (if any). */
+static re_wctype_t
+re_wctype (const char *string)
+{
+ if (STREQ (string, "alnum")) return RECC_ALNUM;
+ else if (STREQ (string, "alpha")) return RECC_ALPHA;
+ else if (STREQ (string, "word")) return RECC_WORD;
+ else if (STREQ (string, "ascii")) return RECC_ASCII;
+ else if (STREQ (string, "nonascii")) return RECC_NONASCII;
+ else if (STREQ (string, "graph")) return RECC_GRAPH;
+ else if (STREQ (string, "lower")) return RECC_LOWER;
+ else if (STREQ (string, "print")) return RECC_PRINT;
+ else if (STREQ (string, "punct")) return RECC_PUNCT;
+ else if (STREQ (string, "space")) return RECC_SPACE;
+ else if (STREQ (string, "upper")) return RECC_UPPER;
+ else if (STREQ (string, "unibyte")) return RECC_UNIBYTE;
+ else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
+ else if (STREQ (string, "digit")) return RECC_DIGIT;
+ else if (STREQ (string, "xdigit")) return RECC_XDIGIT;
+ else if (STREQ (string, "cntrl")) return RECC_CNTRL;
+ else if (STREQ (string, "blank")) return RECC_BLANK;
+ else return RECC_ERROR;
+}
+
+/* True if CH is in the char class CC. */
+static re_bool
+re_iswctype (int ch, re_wctype_t cc)
+{
+#ifdef emacs
+ /* This is cheesy, lispbuf isn't available to us when compiling the
+ pattern. It's effectively only called (on Mule builds) when the current
+ buffer doesn't matter (e.g. for RECC_ASCII, RECC_CNTRL), so it's not a
+ big deal. */
+ struct buffer *lispbuf = current_buffer;
+#endif
+
+ switch (cc)
+ {
+ case RECC_ALNUM: return ISALNUM (ch) != 0;
+ case RECC_ALPHA: return ISALPHA (ch) != 0;
+ case RECC_BLANK: return ISBLANK (ch) != 0;
+ case RECC_CNTRL: return ISCNTRL (ch) != 0;
+ case RECC_DIGIT: return ISDIGIT (ch) != 0;
+ case RECC_GRAPH: return ISGRAPH (ch) != 0;
+ case RECC_LOWER: return ISLOWER (ch) != 0;
+ case RECC_PRINT: return ISPRINT (ch) != 0;
+ case RECC_PUNCT: return ISPUNCT (ch) != 0;
+ case RECC_SPACE: return ISSPACE (ch) != 0;
+ case RECC_UPPER: return ISUPPER (ch) != 0;
+ case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
+ case RECC_ASCII: return ISASCII (ch) != 0;
+ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch);
+ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
+ case RECC_WORD: return ISWORD (ch) != 0;
+ case RECC_ERROR: return false;
+ default:
+ abort ();
+ }
+}
+
+#ifdef MULE
+
+static re_bool
+re_wctype_can_match_non_ascii (re_wctype_t cc)
+{
+ switch (cc)
+ {
+ case RECC_ASCII:
+ case RECC_UNIBYTE:
+ case RECC_CNTRL:
+ case RECC_DIGIT:
+ case RECC_XDIGIT:
+ case RECC_BLANK:
+ return false;
+ default:
+ return true;
+ }
+}
+
+/* Return a bit-pattern to use in the range-table bits to match multibyte
+ chars of class CC. */
+static unsigned char
+re_wctype_to_bit (re_wctype_t cc)
+{
+ switch (cc)
+ {
+ case RECC_PRINT: case RECC_GRAPH:
+ case RECC_ALPHA: return BIT_ALPHA;
+ case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
+ case RECC_LOWER: return BIT_LOWER;
+ case RECC_UPPER: return BIT_UPPER;
+ case RECC_PUNCT: return BIT_PUNCT;
+ case RECC_SPACE: return BIT_SPACE;
+ case RECC_MULTIBYTE: case RECC_NONASCII:
+ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
+ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+ default:
+ abort ();
+ }
+}
+
+#endif /* emacs */
static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
@@ -2049,6 +2185,8 @@
RE_TRANSLATE_TYPE translate,
reg_syntax_t syntax,
Lisp_Object rtab);
+static reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab,
+ Bitbyte *flags_out);
#endif /* MULE */
static re_bool group_match_null_string_p (unsigned char **p,
unsigned char *end,
@@ -2512,15 +2650,20 @@
BUF_PUSH (anychar);
break;
+#ifdef MULE
+#define MAYBE_START_OVER_WITH_EXTENDED(ch) \
+ if (ch >= 0x80) \
+ { \
+ goto start_over_with_extended; \
+ } while (0)
+#else
+#define MAYBE_START_OVER_WITH_EXTENDED(ch)
+#endif
case '[':
{
/* XEmacs change: this whole section */
re_bool had_char_class = false;
-#ifdef MULE
- re_bool has_extended_chars = false;
- REGISTER Lisp_Object rtab = Qnil;
-#endif
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2550,29 +2693,6 @@
&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
SET_LIST_BIT ('\n');
-#ifdef MULE
- start_over_with_extended:
- if (has_extended_chars)
- {
- /* There are extended chars here, which means we need to start
- over and shift to unified range-table format. */
- if (buf_end[-2] == charset)
- buf_end[-2] = charset_mule;
- else
- buf_end[-2] = charset_mule_not;
- buf_end--;
- p = p1; /* go back to the beginning of the charset, after
- a possible ^. */
- rtab = Vthe_lisp_rangetab;
- Fclear_range_table (rtab);
-
- /* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) buf_end[-1] == charset_mule_not
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
- SET_EITHER_BIT ('\n');
- }
-#endif /* MULE */
-
/* Read in characters and ranges, setting map bits. */
for (;;)
{
@@ -2580,32 +2700,22 @@
PATFETCH (c);
-#ifdef MULE
- if (c >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- /* Frumble-bumble, we've found some extended chars.
- Need to start over, process everything using
- the general extended-char mechanism, and need
- to use charset_mule and charset_mule_not instead
- of charset and charset_not. */
- goto start_over_with_extended;
- }
-#endif /* MULE */
+ /* Frumble-bumble, we may have found some extended chars.
+ Need to start over, process everything using the general
+ extended-char mechanism, and need to use charset_mule and
+ charset_mule_not instead of charset and charset_not. */
+ MAYBE_START_OVER_WITH_EXTENDED (c);
+
/* \ might escape characters inside [...] and [^...]. */
if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
{
if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c1);
-#ifdef MULE
- if (c1 >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- goto start_over_with_extended;
- }
-#endif /* MULE */
- SET_EITHER_BIT (c1);
+
+ MAYBE_START_OVER_WITH_EXTENDED (c1);
+
+ SET_LIST_BIT (c1);
continue;
}
@@ -2631,18 +2741,11 @@
{
reg_errcode_t ret;
-#ifdef MULE
- if (* (unsigned char *) p >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- goto start_over_with_extended;
- }
- if (has_extended_chars)
- ret = compile_extended_range (&p, pend, translate,
- syntax, rtab);
- else
-#endif /* MULE */
- ret = compile_range (&p, pend, translate, syntax, buf_end);
+ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p);
+
+ ret = compile_range (&p, pend, translate, syntax,
+ buf_end);
+
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
@@ -2653,18 +2756,10 @@
/* Move past the `-'. */
PATFETCH (c1);
-#ifdef MULE
- if (* (unsigned char *) p >= 0x80 && !has_extended_chars)
- {
- has_extended_chars = 1;
- goto start_over_with_extended;
- }
- if (has_extended_chars)
- ret = compile_extended_range (&p, pend, translate,
- syntax, rtab);
- else
-#endif /* MULE */
- ret = compile_range (&p, pend, translate, syntax, buf_end);
+ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p);
+
+ ret = compile_range (&p, pend, translate, syntax, buf_end);
+
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
@@ -2674,6 +2769,7 @@
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
{ /* Leave room for the null. */
char str[CHAR_CLASS_MAX_LENGTH + 1];
+ int ch = 0;
PATFETCH (c);
c1 = 0;
@@ -2683,14 +2779,14 @@
for (;;)
{
- /* #### This code is unused.
- Correctness is not checked after TRT
- table change. */
- PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
- break;
- str[c1++] = (char) c;
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
@@ -2699,21 +2795,9 @@
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
- int ch;
- re_bool is_alnum = STREQ (str, "alnum");
- re_bool is_alpha = STREQ (str, "alpha");
- re_bool is_blank = STREQ (str, "blank");
- re_bool is_cntrl = STREQ (str, "cntrl");
- re_bool is_digit = STREQ (str, "digit");
- re_bool is_graph = STREQ (str, "graph");
- re_bool is_lower = STREQ (str, "lower");
- re_bool is_print = STREQ (str, "print");
- re_bool is_punct = STREQ (str, "punct");
- re_bool is_space = STREQ (str, "space");
- re_bool is_upper = STREQ (str, "upper");
- re_bool is_xdigit = STREQ (str, "xdigit");
-
- if (!IS_CHAR_CLASS (str))
+ re_wctype_t cc = re_wctype (str);
+
+ if (cc == RECC_ERROR)
FREE_STACK_RETURN (REG_ECTYPE);
/* Throw away the ] at the end of the character
@@ -2722,26 +2806,20 @@
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
- {
- /* This was split into 3 if's to
- avoid an arbitrary limit in some compiler. */
- if ( (is_alnum && ISALNUM (ch))
- || (is_alpha && ISALPHA (ch))
- || (is_blank && ISBLANK (ch))
- || (is_cntrl && ISCNTRL (ch)))
- SET_EITHER_BIT (ch);
- if ( (is_digit && ISDIGIT (ch))
- || (is_graph && ISGRAPH (ch))
- || (is_lower && ISLOWER (ch))
- || (is_print && ISPRINT (ch)))
- SET_EITHER_BIT (ch);
- if ( (is_punct && ISPUNCT (ch))
- || (is_space && ISSPACE (ch))
- || (is_upper && ISUPPER (ch))
- || (is_xdigit && ISXDIGIT (ch)))
- SET_EITHER_BIT (ch);
- }
+#ifdef MULE
+ if (re_wctype_can_match_non_ascii (cc))
+ {
+ goto start_over_with_extended;
+ }
+#endif /* MULE */
+ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
+ {
+ if (re_iswctype (ch, cc))
+ {
+ SET_LIST_BIT (ch);
+ }
+ }
+
had_char_class = true;
}
else
@@ -2749,30 +2827,18 @@
c1++;
while (c1--)
PATUNFETCH;
- SET_EITHER_BIT ('[');
- SET_EITHER_BIT (':');
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
had_char_class = false;
}
}
else
{
had_char_class = false;
- SET_EITHER_BIT (c);
+ SET_LIST_BIT (c);
}
}
-#ifdef MULE
- if (has_extended_chars)
- {
- /* We have a range table, not a bit vector. */
- int bytes_needed =
- unified_range_table_bytes_needed (rtab);
- GET_BUFFER_SPACE (bytes_needed);
- unified_range_table_copy_data (rtab, buf_end);
- buf_end += unified_range_table_bytes_used (buf_end);
- break;
- }
-#endif /* MULE */
/* Discard any (non)matching list bytes that are all 0 at the
end of the map. Decrease the map-length byte too. */
while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0)
@@ -2781,6 +2847,163 @@
}
break;
+#ifdef MULE
+ start_over_with_extended:
+ {
+ REGISTER Lisp_Object rtab = Qnil;
+ Bitbyte flags = 0;
+ int bytes_needed = sizeof (flags);
+ re_bool had_char_class = false;
+
+ /* There are extended chars here, which means we need to use the
+ unified range-table format. */
+ if (buf_end[-2] == charset)
+ buf_end[-2] = charset_mule;
+ else
+ buf_end[-2] = charset_mule_not;
+ buf_end--;
+ p = p1; /* go back to the beginning of the charset, after
+ a possible ^. */
+ rtab = Vthe_lisp_rangetab;
+ Fclear_range_table (rtab);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) buf_end[-1] == charset_mule_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_RANGETAB_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+
+ SET_RANGETAB_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret;
+
+ ret = compile_extended_range (&p, pend, translate, syntax,
+ rtab);
+
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_extended_range (&p, pend, translate,
+ syntax, rtab);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ re_wctype_t cc = re_wctype (str);
+ reg_errcode_t ret = REG_NOERROR;
+
+ if (cc == RECC_ERROR)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ ret = compile_char_class (cc, rtab, &flags);
+
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_RANGETAB_BIT ('[');
+ SET_RANGETAB_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_RANGETAB_BIT (c);
+ }
+ }
+
+ bytes_needed += unified_range_table_bytes_needed (rtab);
+ GET_BUFFER_SPACE (bytes_needed);
+ *buf_end++ = flags;
+ unified_range_table_copy_data (rtab, buf_end);
+ buf_end += unified_range_table_bytes_used (buf_end);
+ break;
+ }
+#endif /* MULE */
case '(':
if (syntax & RE_NO_BK_PARENS)
@@ -3715,6 +3938,69 @@
return REG_NOERROR;
}
+static reg_errcode_t
+compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out)
+{
+ *flags_out |= re_wctype_to_bit (cc);
+
+ switch (cc)
+ {
+ case RECC_ASCII:
+ put_range_table (rtab, 0, 0x7f, Qt);
+ break;
+
+ case RECC_XDIGIT:
+ put_range_table (rtab, 'a', 'f', Qt);
+ put_range_table (rtab, 'A', 'f', Qt);
+ /* fallthrough */
+ case RECC_DIGIT:
+ put_range_table (rtab, '0', '9', Qt);
+ break;
+
+ case RECC_BLANK:
+ put_range_table (rtab, ' ', ' ', Qt);
+ put_range_table (rtab, '\t', '\t', Qt);
+ break;
+
+ case RECC_PRINT:
+ put_range_table (rtab, ' ', 0x7e, Qt);
+ put_range_table (rtab, 0x80, MOST_POSITIVE_FIXNUM, Qt);
+ break;
+
+ case RECC_GRAPH:
+ put_range_table (rtab, '!', 0x7e, Qt);
+ put_range_table (rtab, 0x80, MOST_POSITIVE_FIXNUM, Qt);
+ break;
+
+ case RECC_NONASCII:
+ case RECC_MULTIBYTE:
+ put_range_table (rtab, 0x80, MOST_POSITIVE_FIXNUM, Qt);
+ break;
+
+ case RECC_CNTRL:
+ put_range_table (rtab, 0x00, 0x1f, Qt);
+ break;
+
+ case RECC_UNIBYTE:
+ /* Never true in XEmacs. */
+ break;
+
+ /* The following all have their own bits in the class_bits argument to
+ charset_mule and charset_mule_not, they don't use the range table
+ information. */
+ case RECC_ALPHA:
+ case RECC_WORD:
+ case RECC_ALNUM: /* Equivalent to RECC_WORD */
+ case RECC_LOWER:
+ case RECC_PUNCT:
+ case RECC_SPACE:
+ case RECC_UPPER:
+ break;
+ }
+
+ return REG_NOERROR;
+}
+
#endif /* MULE */
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -3855,6 +4141,15 @@
{
int nentries;
int i;
+ Bitbyte flags = *p++;
+
+ if (flags)
+ {
+ /* We need to consult the syntax table, fastmap won't
+ work. */
+ bufp->can_be_null = 1;
+ goto done;
+ }
nentries = unified_range_table_nentries (p);
for (i = 0; i < nentries; i++)
@@ -3878,6 +4173,16 @@
set_itext_ichar (strr, last);
fastmap[*strr] = 1;
}
+ else if (MOST_POSITIVE_FIXNUM == last)
+ {
+ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all
+ non-ASCII characters. */
+ jj = 0x80;
+ while (jj < 0xA0)
+ {
+ fastmap[jj++] = 1;
+ }
+ }
}
}
break;
@@ -3887,6 +4192,15 @@
int nentries;
int i;
int smallest_prev = 0;
+ Bitbyte flags = *p++;
+
+ if (flags)
+ {
+ /* We need to consult the syntax table, fastmap won't
+ work. */
+ bufp->can_be_null = 1;
+ goto done;
+ }
nentries = unified_range_table_nentries (p);
for (i = 0; i < nentries; i++)
@@ -5416,15 +5730,27 @@
{
REGISTER Ichar c;
re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
+ Bitbyte class_bits = *p++;
DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
-
REGEX_PREFETCH ();
c = itext_ichar_fmt (d, fmt, lispobj);
c = RE_TRANSLATE (c); /* The character to match. */
- if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
- not_p = !not_p;
+ if ((class_bits &&
+ ((class_bits & BIT_ALPHA && ISALPHA (c))
+ || (class_bits & BIT_SPACE && ISSPACE (c))
+ || (class_bits & BIT_PUNCT && ISPUNCT (c))
+ || (class_bits & BIT_WORD && ISWORD (c))
+ || (TRANSLATE_P (translate) ?
+ (class_bits & (BIT_UPPER | BIT_LOWER)
+ && !NOCASEP (lispbuf, c))
+: ((class_bits & BIT_UPPER && ISUPPER (c))
+ || (class_bits & BIT_LOWER && ISLOWER (c))))))
+ || EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
+ {
+ not_p = !not_p;
+ }
p += unified_range_table_bytes_used (p);
diff -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e -r 3f4a234f4672ab40f61811656bc674bcd80664db src/regex.h
--- a/src/regex.h
+++ b/src/regex.h
@@ -546,6 +546,19 @@
extern int debug_regexps;
+typedef enum
+ {
+ RECC_ERROR = 0,
+ RECC_ALNUM, RECC_ALPHA, RECC_WORD,
+ RECC_GRAPH, RECC_PRINT,
+ RECC_LOWER, RECC_UPPER,
+ RECC_PUNCT, RECC_CNTRL,
+ RECC_DIGIT, RECC_XDIGIT,
+ RECC_BLANK, RECC_SPACE,
+ RECC_MULTIBYTE, RECC_NONASCII,
+ RECC_ASCII, RECC_UNIBYTE
+} re_wctype_t;
+
END_C_DECLS
#endif /* INCLUDED_regex_h_ */
diff -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e -r 3f4a234f4672ab40f61811656bc674bcd80664db tests/ChangeLog
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,13 @@
+2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/regexp-tests.el:
+ * automated/regexp-tests.el (Assert-char-class):
+ Check that #'string-match errors correctly with an over-long
+ character class name.
+ Add tests for character class functionality that supports
+ non-ASCII characters. These tests expose bugs in GNU Emacs
+ 24.0.94.2, but pass under current XEmacs.
+
2012-04-21 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/regexp-tests.el:
diff -r 1d9f603e9125575ac67f9cff0f2159a046d99d3e -r 3f4a234f4672ab40f61811656bc674bcd80664db tests/automated/regexp-tests.el
--- a/tests/automated/regexp-tests.el
+++ b/tests/automated/regexp-tests.el
@@ -598,6 +598,14 @@
(Assert (eql (string-match "[\x7f\x81-\x9f]" "\x81") 0))
;; Test character classes
+
+;; This used not to error:
+(Check-Error-Message invalid-regexp "Invalid character class name"
+ (string-match "[[:alnum12345:]]" "a"))
+;; This alwayed errored, as long as character classes were turned on
+(Check-Error-Message invalid-regexp "Invalid character class name"
+ (string-match "[[:alnum1234:]]" "a"))
+
(macrolet
((Assert-char-class (class matching-char non-matching-char)
(if (and (not (featurep 'mule))
@@ -648,7 +656,21 @@
(Assert (null (string-match ,(concat "[^" class
(string non-matching-char) "]")
,(concat (string matching-char)
- (string non-matching-char))))))))
+ (string non-matching-char)))))))
+ (Assert-never-matching (class &rest characters)
+ (cons
+ 'progn
+ (mapcan #'(lambda (character)
+ (if (or (not (eq 'decode-char (car-safe character)))
+ (featurep 'mule))
+ `((Assert (null (string-match
+ ,(concat "[" class "]")
+ ,(string (eval character)))))
+ (Assert (eql (string-match
+ ,(concat "[^" class "]")
+ ,(string (eval character)))
+ 0)))))
+ characters))))
(Assert-char-class "[:alpha:]" ?a ?0)
(Assert-char-class "[:alpha:]" ?z ?9)
(Assert-char-class "[:alpha:]" ?A ?0)
@@ -657,6 +679,18 @@
(Assert-char-class "[:alpha:]" ?c ?\x09)
(Assert-char-class "[:alpha:]" ?d ?\ )
(Assert-char-class "[:alpha:]" ?e ?\x7f)
+ (Assert-char-class
+ "[:alpha:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:alpha:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x02)
+ (Assert-char-class
+ "[:alpha:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
(Assert-char-class "[:alnum:]" ?a ?.)
(Assert-char-class "[:alnum:]" ?z ?')
@@ -664,11 +698,46 @@
(Assert-char-class "[:alnum:]" ?Z ?!)
(Assert-char-class "[:alnum:]" ?0 ?,)
(Assert-char-class "[:alnum:]" ?9 ?$)
-
(Assert-char-class "[:alnum:]" ?b ?\x00)
(Assert-char-class "[:alnum:]" ?c ?\x09)
(Assert-char-class "[:alnum:]" ?d ?\ )
(Assert-char-class "[:alnum:]" ?e ?\x7f)
+ (Assert-char-class
+ "[:alnum:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:alnum:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x02)
+ (Assert-char-class
+ "[:alnum:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+
+ ;; Word is equivalent to alnum in this implementation.
+ (Assert-char-class "[:word:]" ?a ?.)
+ (Assert-char-class "[:word:]" ?z ?')
+ (Assert-char-class "[:word:]" ?A ?/)
+ (Assert-char-class "[:word:]" ?Z ?!)
+ (Assert-char-class "[:word:]" ?0 ?,)
+ (Assert-char-class "[:word:]" ?9 ?$)
+ (Assert-char-class "[:word:]" ?b ?\x00)
+ (Assert-char-class "[:word:]" ?c ?\x09)
+ (Assert-char-class "[:word:]" ?d ?\ )
+ (Assert-char-class "[:word:]" ?e ?\x7f)
+ (Assert-char-class
+ "[:word:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:word:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x02)
+ (Assert-char-class
+ "[:word:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
(let ((case-fold-search nil))
(Assert-char-class "[:upper:]" ?A ?a)
@@ -679,6 +748,14 @@
(Assert-char-class "[:upper:]" ?E ?\x09)
(Assert-char-class "[:upper:]" ?F ?\ )
(Assert-char-class "[:upper:]" ?G ?\x7f)
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:lower:]" ?a ?A)
(Assert-char-class "[:lower:]" ?z ?Z)
@@ -687,11 +764,17 @@
(Assert-char-class "[:lower:]" ?d ?\x00)
(Assert-char-class "[:lower:]" ?e ?\x09)
(Assert-char-class "[:lower:]" ?f ? )
- (Assert-char-class "[:lower:]" ?g ?\x7f))
+ (Assert-char-class "[:lower:]" ?g ?\x7f)
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x5357)));; kDefinition south; southern part; southward
(let ((case-fold-search t))
- ;; These currently fail, because we don't take into account the buffer's
- ;; case table.
(Assert-char-class "[:upper:]" ?a ?\x00)
(Assert-char-class "[:upper:]" ?z ?\x01)
(Assert-char-class "[:upper:]" ?b ?{)
@@ -700,7 +783,14 @@
(Assert-char-class "[:upper:]" ?e ?>)
(Assert-char-class "[:upper:]" ?f ?\ )
(Assert-char-class "[:upper:]" ?g ?\x7f)
-
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:upper:]"
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:lower:]" ?A ?\x00)
(Assert-char-class "[:lower:]" ?Z ?\x01)
(Assert-char-class "[:lower:]" ?B ?{)
@@ -708,7 +798,15 @@
(Assert-char-class "[:lower:]" ?D ?<)
(Assert-char-class "[:lower:]" ?E ?>)
(Assert-char-class "[:lower:]" ?F ?\ )
- (Assert-char-class "[:lower:]" ?G ?\x7F))
+ (Assert-char-class "[:lower:]" ?G ?\x7F)
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:lower:]"
+ (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ (decode-char 'ucs #x5357)));; kDefinition south; southern part; southward
(Assert-char-class "[:digit:]" ?0 ?a)
(Assert-char-class "[:digit:]" ?9 ?z)
@@ -718,6 +816,30 @@
(Assert-char-class "[:digit:]" ?4 ?\x09)
(Assert-char-class "[:digit:]" ?5 ? )
(Assert-char-class "[:digit:]" ?6 ?\x7f)
+ (Assert-char-class
+ "[:digit:]" ?7
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+ (Assert-char-class
+ "[:digit:]" ?8
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:digit:]" ?9
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:digit:]" ?0
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:digit:]" ?1
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:digit:]" ?2
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:digit:]" ?3
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:digit:]" ?4
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:xdigit:]" ?0 ?g)
(Assert-char-class "[:xdigit:]" ?9 ?G)
@@ -729,6 +851,30 @@
(Assert-char-class "[:xdigit:]" ?4 ?\x09)
(Assert-char-class "[:xdigit:]" ?5 ?\x7f)
(Assert-char-class "[:xdigit:]" ?6 ?z)
+ (Assert-char-class
+ "[:xdigit:]" ?7
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+ (Assert-char-class
+ "[:xdigit:]" ?8
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:xdigit:]" ?9
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:xdigit:]" ?a
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:xdigit:]" ?B
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:xdigit:]" ?c
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:xdigit:]" ?D
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:xdigit:]" ?e
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:space:]" ?\ ?0)
(Assert-char-class "[:space:]" ?\t ?9)
@@ -738,6 +884,30 @@
(Assert-char-class "[:space:]" ?\ ?\x7f)
(Assert-char-class "[:space:]" ?\t ?a)
(Assert-char-class "[:space:]" ?\ ?z)
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:space:]" ?\
+ (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+ (Assert-char-class
+ "[:space:]" ?\t
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
(Assert-char-class "[:print:]" ?\ ?\x00)
(Assert-char-class "[:print:]" ?0 ?\x09)
@@ -747,6 +917,63 @@
(Assert-char-class "[:print:]" ?B ?\t)
(Assert-char-class "[:print:]" ?a ?\x03)
(Assert-char-class "[:print:]" ?z ?\x04)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0385) ;; GREEK DIALYTIKA TONOS
+ ?\x05)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ ?\x06)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ ?\x07)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x08)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ ?\x09)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x0686) ;; ARABIC LETTER TCHEH
+ ?\x0a)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ ?\x0b)
+ (Assert-char-class
+ "[:print:]" (decode-char 'ucs #x5357) ;; kDefinition south; southern part; southward
+ ?\x0c)
+
+ (Assert-char-class "[:graph:]" ?! ?\ )
+ (Assert-char-class "[:graph:]" ?0 ?\x09)
+ (Assert-char-class "[:graph:]" ?9 ?\x7f)
+ (Assert-char-class "[:graph:]" ?A ?\x01)
+ (Assert-char-class "[:graph:]" ?Z ?\x02)
+ (Assert-char-class "[:graph:]" ?B ?\t)
+ (Assert-char-class "[:graph:]" ?a ?\x03)
+ (Assert-char-class "[:graph:]" ?z ?\x04)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0385) ;; GREEK DIALYTIKA TONOS
+ ?\x05)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0392) ;; GREEK CAPITAL LETTER BETA
+ ?\x06)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ ?\x07)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ ?\x08)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ ?\x09)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x0686) ;; ARABIC LETTER TCHEH
+ ?\x0a)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ ?\x0b)
+ (Assert-char-class
+ "[:graph:]" (decode-char 'ucs #x5357) ;; kDefinition south; southern part; southward
+ ?\x0c)
(Assert-char-class "[:punct:]" ?\( ?0)
(Assert-char-class "[:punct:]" ?. ?9)
@@ -757,4 +984,102 @@
(Assert-char-class "[:punct:]" ?< ?\x09)
(Assert-char-class "[:punct:]" ?> ?\x7f)
(Assert-char-class "[:punct:]" ?= ?a)
- (Assert-char-class "[:punct:]" ?\? ?z))
+ (Assert-char-class "[:punct:]" ?\? ?z)
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x0385) ;; GREEK DIALYTIKA TONOS
+ ?a)
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x20af) ;; DRACHMA SIGN
+ (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x00a7) ;; SECTION SIGN
+ (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x00a8) ;; DIAERESIS
+ (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x0384) ;; GREEK TONOS
+ (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x00b7) ;; MIDDLE DOT
+ (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:punct:]"
+ (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ ?x)
+ (Assert-char-class
+ "[:punct:]"
+ ?=
+ (decode-char 'ucs #x5357)) ;; kDefinition south; southern part; southward
+
+ (Assert-char-class "[:ascii:]" ?a (decode-char 'ucs #x00a7)) ;; SECTION SIGN
+ (Assert-char-class "[:ascii:]" ?b (decode-char 'ucs #x00a8)) ;; DIAERESIS
+ (Assert-char-class "[:ascii:]" ?c (decode-char 'ucs #x00b7)) ;; MIDDLE DOT
+ (Assert-char-class "[:ascii:]" ?d (decode-char 'ucs #x0384)) ;; GREEK TONOS
+ (Assert-char-class
+ "[:ascii:]" ?\x00 (decode-char 'ucs #x0392)) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:ascii:]" ?\x01 (decode-char 'ucs #x03B2)) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:ascii:]" ?\t (decode-char 'ucs #x0410)) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:ascii:]" ?A (decode-char 'ucs #x0430)) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:ascii:]" ?B (decode-char 'ucs #x0686)) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:ascii:]" ?C (decode-char 'ucs #x20af)) ;; DRACHMA SIGN
+ (Assert-char-class
+ "[:ascii:]" ?\x7f (decode-char 'ucs #x2116)) ;; NUMERO SIGN
+
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x00a7) ?a) ;; SECTION SIGN
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x00a8) ?b) ;; DIAERESIS
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x00b7) ?c) ;; MIDDLE DOT
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0384) ?d) ;; GREEK TONOS
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0392) ?\x00) ;; GREEK CAPITAL LETTER BETA
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x03B2) ?\x01) ;; GREEK SMALL LETTER BETA
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0410) ?\t) ;; CYRILLIC CAPITAL LETTER A
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0430) ?A) ;; CYRILLIC SMALL LETTER A
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x0686) ?B) ;; ARABIC LETTER TCHEH
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x20af) ?C) ;; DRACHMA SIGN
+ (Assert-char-class
+ "[:nonascii:]" (decode-char 'ucs #x2116) ?\x7f) ;; NUMERO SIGN
+
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x00a7) ?a) ;; SECTION SIGN
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x00a8) ?b) ;; DIAERESIS
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x00b7) ?c) ;; MIDDLE DOT
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x0384) ?d) ;; GREEK TONOS
+ (Assert-char-class
+ "[:multibyte:]" (decode-char 'ucs #x0392)
+ ?\x00) ;; GREEK CAPITAL LETTER BETA
+
+ (Assert-never-matching
+ "[:unibyte:]"
+ ?\x01 ?\t ?A ?B ?C ?\x7f
+ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
+ (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
+ (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A
+ (decode-char 'ucs #x0686) ;; ARABIC LETTER TCHEH
+ (decode-char 'ucs #x20af) ;; DRACHMA SIGN
+ (decode-char 'ucs #x2116) ;; NUMERO SIGN
+ (decode-char 'ucs #x5357))) ;; kDefinition south; southern part; southward
+
Repository URL: https://bitbucket.org/xemacs/xemacs/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/xemacs-packages: 3 new changesets
12 years, 9 months
Bitbucket
3 new commits in xemacs-packages:
https://bitbucket.org/xemacs/xemacs-packages/changeset/2eb11978d9b6/
changeset: 2eb11978d9b6
user: Norbert Koch
date: 2012-04-16 11:39:55
summary: update cc-mode
affected #: 1 file
diff -r 15585bb3a8b844585b5c662f5475faaf50ba7ed1 -r 2eb11978d9b6b15ae4a850f0b48ea7ed14c46aa3 .hgsubstate
--- a/.hgsubstate
+++ b/.hgsubstate
@@ -17,7 +17,7 @@
da4e7d4a51c502e5ac05a224cb756f382f0ba4d7 xemacs-packages/c-support
11074b3808d1e349f3fddb3c4d50f8be7c0f859e xemacs-packages/calc
7524e4fb9de45d77812090a724fac4ebd7549d6e xemacs-packages/calendar
-e56d1804baae761390362fb17dac8eaee611010f xemacs-packages/cc-mode
+3c78c03936c18a9d23802d948cdf8980f26cb389 xemacs-packages/cc-mode
a7ae1cfb2376bcd32617c1c88afe08872b11d298 xemacs-packages/cedet-common
87dd21fac17ea98219267b1378b4696698d6c4ff xemacs-packages/clearcase
e18acdbfcd36295d052cd56fa2e6d78c68b4b7d4 xemacs-packages/cogre
https://bitbucket.org/xemacs/xemacs-packages/changeset/091b95c1a0e3/
changeset: 091b95c1a0e3
user: Norbert Koch
date: 2012-04-16 11:40:40
summary: XEmacs Package Release
affected #: 1 file
diff -r 2eb11978d9b6b15ae4a850f0b48ea7ed14c46aa3 -r 091b95c1a0e353f6254166bb319da586d8b255dd ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2012-04-16 Norbert Koch <viteno(a)xemacs.org>
+
+ * Packages released: cc-mode.
+
2012-03-19 Norbert Koch <viteno(a)xemacs.org>
* Packages released: cc-mode.
https://bitbucket.org/xemacs/xemacs-packages/changeset/dacaf14c9a2a/
changeset: dacaf14c9a2a
user: Norbert Koch
date: 2012-04-16 11:58:12
summary: Prerelease cc-mode
affected #: 1 file
diff -r 091b95c1a0e353f6254166bb319da586d8b255dd -r dacaf14c9a2a06b9c7c1f544d5c56fed8780712a .hgsubstate
--- a/.hgsubstate
+++ b/.hgsubstate
@@ -17,7 +17,7 @@
da4e7d4a51c502e5ac05a224cb756f382f0ba4d7 xemacs-packages/c-support
11074b3808d1e349f3fddb3c4d50f8be7c0f859e xemacs-packages/calc
7524e4fb9de45d77812090a724fac4ebd7549d6e xemacs-packages/calendar
-3c78c03936c18a9d23802d948cdf8980f26cb389 xemacs-packages/cc-mode
+41e69539419348a61d29654486a909ca57559777 xemacs-packages/cc-mode
a7ae1cfb2376bcd32617c1c88afe08872b11d298 xemacs-packages/cedet-common
87dd21fac17ea98219267b1378b4696698d6c4ff xemacs-packages/clearcase
e18acdbfcd36295d052cd56fa2e6d78c68b4b7d4 xemacs-packages/cogre
Repository URL: https://bitbucket.org/xemacs/xemacs-packages/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/cc-mode: 2 new changesets
12 years, 9 months
Bitbucket
2 new commits in cc-mode:
https://bitbucket.org/xemacs/cc-mode/changeset/f1a6c0e64739/
changeset: f1a6c0e64739
user: Norbert Koch
date: 2012-04-16 11:40:40
summary: XEmacs Package Release 1.54
affected #: 2 files
diff -r 3c78c03936c18a9d23802d948cdf8980f26cb389 -r f1a6c0e647399eba8c3511db89520f0efc63d535 ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2012-04-16 Norbert Koch <viteno(a)xemacs.org>
+
+ * Makefile (VERSION): XEmacs package 1.54 released.
+
2012-03-19 Norbert Koch <viteno(a)xemacs.org>
* Makefile (VERSION): XEmacs package 1.53 released.
diff -r 3c78c03936c18a9d23802d948cdf8980f26cb389 -r f1a6c0e647399eba8c3511db89520f0efc63d535 Makefile
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
-VERSION = 1.53
+VERSION = 1.54
AUTHOR_VERSION = 5.32.2
MAINTAINER = Alan Mackenzie <bug-cc-mode(a)gnu.org>
PACKAGE = cc-mode
https://bitbucket.org/xemacs/cc-mode/changeset/41e695394193/
changeset: 41e695394193
user: Norbert Koch
date: 2012-04-16 11:40:40
summary: Added tag cc-mode-1_54 for changeset f1a6c0e64739
affected #: 1 file
diff -r f1a6c0e647399eba8c3511db89520f0efc63d535 -r 41e69539419348a61d29654486a909ca57559777 .hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -86,3 +86,4 @@
3eb9de826afe87ee4f9f5a718373c616a00d1c40 cc-mode-1_51
ca5959d9f677f0de84c0607fd20cf8f19121b9ec cc-mode-1_52
39ad7f53a7def58a7d0e58eb58d1a6a0f22f710d cc-mode-1_53
+f1a6c0e647399eba8c3511db89520f0efc63d535 cc-mode-1_54
Repository URL: https://bitbucket.org/xemacs/cc-mode/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches
commit/cc-mode: acm: Ensure searching for keywords is case sensitive.
12 years, 9 months
Bitbucket
1 new commit in cc-mode:
https://bitbucket.org/xemacs/cc-mode/changeset/3c78c03936c1/
changeset: 3c78c03936c1
user: acm
date: 2012-04-15 20:16:57
summary: Ensure searching for keywords is case sensitive.
cc-cmds.el (c-electric-brace, c-electric-lt-gt, c-electric-paren)
(c-beginning-of-defun, c-end-of-defun, c-defun-name, c-mark-function)
(c-cpp-define-name, c-comment-indent, c-scan-conditionals)
(c-indent-defun, c-context-line-break): bind case-fold-search to nil.
cc-mode.el (c-font-lock-fontify-region): bind case-fold-search to nil.
affected #: 2 files
diff -r 29c4b1d0c74b93f571485cdd6c881437827eec93 -r 3c78c03936c18a9d23802d948cdf8980f26cb389 cc-cmds.el
--- a/cc-cmds.el
+++ b/cc-cmds.el
@@ -681,7 +681,7 @@
;; We want to inhibit blinking the paren since this would be
;; most disruptive. We'll blink it ourselves later on.
(old-blink-paren blink-paren-function)
- blink-paren-function)
+ blink-paren-function case-fold-search)
(c-save-buffer-state ()
(setq safepos (c-safe-position (point) (c-parse-state))
@@ -1095,7 +1095,7 @@
(interactive "*P")
(let ((c-echo-syntactic-information-p nil)
- final-pos found-delim)
+ final-pos found-delim case-fold-search)
(self-insert-command (prefix-numeric-value arg))
(setq final-pos (point))
@@ -1181,7 +1181,8 @@
(interactive "*P")
(let ((literal (c-save-buffer-state () (c-in-literal)))
;; shut this up
- (c-echo-syntactic-information-p nil))
+ (c-echo-syntactic-information-p nil)
+ case-fold-search)
(self-insert-command (prefix-numeric-value arg))
(if (and (not arg) (not literal))
@@ -1588,7 +1589,7 @@
; structure with other users of c-state-cache.
(orig-point-min (point-min)) (orig-point-max (point-max))
lim ; Position of { which has been widened to.
- where pos)
+ where pos case-fold-search)
(save-restriction
(if (eq c-defun-tactic 'go-outward)
@@ -1707,7 +1708,7 @@
; structure with other users of c-state-cache.
(orig-point-min (point-min)) (orig-point-max (point-max))
lim
- where pos)
+ where pos case-fold-search)
(save-restriction
(if (eq c-defun-tactic 'go-outward)
@@ -1768,7 +1769,7 @@
(interactive)
(c-save-buffer-state
(beginning-of-defun-function end-of-defun-function
- where pos name-end)
+ where pos name-end case-fold-search)
(save-restriction
(widen)
@@ -1963,7 +1964,7 @@
;; FIXME!!! for transient-mark/zemacs sometime. (2012-03-08.)
(interactive)
- (let (decl-limits)
+ (let (decl-limits case-fold-search)
(c-save-buffer-state nil
;; We try to be line oriented, unless there are several
;; declarations on the same line.
@@ -1997,11 +1998,12 @@
(defun c-cpp-define-name ()
"Return the name of the current CPP macro, or NIL if we're not in one."
(interactive)
- (save-excursion
- (and c-opt-cpp-macro-define-start
- (c-beginning-of-macro)
- (looking-at c-opt-cpp-macro-define-start)
- (match-string-no-properties 1))))
+ (let (case-fold-search)
+ (save-excursion
+ (and c-opt-cpp-macro-define-start
+ (c-beginning-of-macro)
+ (looking-at c-opt-cpp-macro-define-start)
+ (match-string-no-properties 1)))))
;; Movement by statements.
@@ -2883,7 +2885,8 @@
(eq (match-end 0) eot))
'cpp-end-block)
(t
- 'other))))
+ 'other)))
+ case-fold-search)
(if (and (memq line-type '(anchored-comment empty-line))
c-indent-comments-syntactically-p)
(let ((c-syntactic-context (c-guess-basic-syntax)))
@@ -3019,7 +3022,7 @@
(let* ((forward (> count 0))
(increment (if forward -1 1))
(search-function (if forward 're-search-forward 're-search-backward))
- new)
+ new case-fold-search)
(unless (integerp target-depth)
(setq target-depth (if target-depth -1 0)))
(save-excursion
@@ -3221,7 +3224,7 @@
In the macro case this also has the effect of realigning any line
continuation backslashes, unless `c-auto-align-backslashes' is nil."
(interactive "*")
- (let ((here (point-marker)) decl-limits)
+ (let ((here (point-marker)) decl-limits case-fold-search)
(unwind-protect
(progn
(c-save-buffer-state nil
@@ -4633,7 +4636,8 @@
(interactive "*")
(let* (c-lit-limits c-lit-type
- (c-macro-start c-macro-start))
+ (c-macro-start c-macro-start)
+ case-fold-search)
(c-save-buffer-state ()
(setq c-lit-limits (c-literal-limits nil nil t)
diff -r 29c4b1d0c74b93f571485cdd6c881437827eec93 -r 3c78c03936c18a9d23802d948cdf8980f26cb389 cc-mode.el
--- a/cc-mode.el
+++ b/cc-mode.el
@@ -1199,7 +1199,7 @@
;;
;; Type a space in the first blank line, and the fontification of the next
;; line was fouled up by context fontification.
- (let ((new-beg beg) (new-end end) new-region)
+ (let ((new-beg beg) (new-end end) new-region case-fold-search)
(if c-in-after-change-fontification
(setq c-in-after-change-fontification nil)
(save-restriction
Repository URL: https://bitbucket.org/xemacs/cc-mode/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches