User: aidan
Date: 06/08/05 00:55:19
Added: xemacs/tests/automated lisp-reader-tests.el
Log:
Raw strings, from Python via SXEmacs
Revision Changes Path
1.756 +6 -0 XEmacs/xemacs/lisp/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/ChangeLog,v
retrieving revision 1.755
retrieving revision 1.756
diff -u -p -r1.755 -r1.756
--- ChangeLog 2006/08/04 20:01:05 1.755
+++ ChangeLog 2006/08/04 22:55:04 1.756
@@ -1,3 +1,9 @@
+2006-08-05 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * lisp.el (forward-sexp):
+ Handle raw strings specially just as we do structures. Fixes
+ problems evaluating them in *scratch*.
+
2006-08-04 Aidan Kehoe <kehoea(a)parhasard.net>
* iso8859-1.el:
1.6 +13 -12 XEmacs/xemacs/lisp/lisp.el
Index: lisp.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/lisp.el,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -p -r1.5 -r1.6
--- lisp.el 2001/04/12 18:21:29 1.5
+++ lisp.el 2006/08/04 22:55:05 1.6
@@ -60,19 +60,20 @@ move backward across N balanced expressi
(interactive "_p")
(or arg (setq arg 1))
;; XEmacs: evil hack! The other half of the evil hack below.
- (if (and (> arg 0) (looking-at "#s("))
- (goto-char (+ (point) 2)))
+ (if (and (> arg 0) (looking-at "#s(\\|#r[uU]\\{0,1\\}\""))
+ (goto-char (1+ (- (point) (- (match-end 0) (match-beginning 0))))))
(goto-char (or (scan-sexps (point) arg) (buffer-end arg)))
- (if (< arg 0) (backward-prefix-chars))
- ;; XEmacs: evil hack! Skip back over #s so that structures are read
- ;; properly. the current cheesified syntax tables just aren't up to
- ;; this.
- (if (and (< arg 0)
- (eq (char-after (point)) ?\()
- (>= (- (point) (point-min)) 2)
- (eq (char-after (- (point) 1)) ?s)
- (eq (char-after (- (point) 2)) ?#))
- (goto-char (- (point) 2))))
+ (when (< arg 0)
+ (backward-prefix-chars)
+ ;; XEmacs: evil hack! Skip back over #[sr] so that structures and raw
+ ;; strings are read properly. the current cheesified syntax tables just
+ ;; aren't up to this.
+ (let* ((diff (- (point) (point-min)))
+ (subject (buffer-substring (- (point) (min diff 3))
+ (1+ (point))))
+ (matched (string-match "#s(\\|#r[uU]\\{0,1\\}\"" subject)))
+ (if matched
+ (goto-char (1+ (- (point) (- (length subject) matched))))))))
(defun backward-sexp (&optional arg)
"Move backward across one balanced expression (sexp).
1.347 +6 -0 XEmacs/xemacs/man/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/man/ChangeLog,v
retrieving revision 1.346
retrieving revision 1.347
diff -u -p -r1.346 -r1.347
--- ChangeLog 2006/07/19 15:04:46 1.346
+++ ChangeLog 2006/08/04 22:55:09 1.347
@@ -1,3 +1,9 @@
+2006-08-05 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * lispref/objects.texi (String Type):
+ Give details of the raw string syntax, taken from SXEmacs and
+ Python.
+
2006-07-19 Stephen J. Turnbull <stephen(a)xemacs.org>
* new-users-guide/edit.texi (Insert): Document bogosity in
1.9 +10 -0 XEmacs/xemacs/man/lispref/objects.texi
Index: objects.texi
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/man/lispref/objects.texi,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -p -r1.8 -r1.9
--- objects.texi 2006/04/29 14:36:54 1.8
+++ objects.texi 2006/08/04 22:55:10 1.9
@@ -1079,6 +1079,16 @@ characters it contains, and another doub
escape any backslash or double-quote characters in the string with a
backslash, like this: @code{"this \" is an embedded quote"}.
+ An alternative syntax allows insertion of raw backslashes into a
+string, like this: @code{#r"this \ is an embedded backslash"}. In such
+a string, each character following a backslash is included literally in
+the string, and all backslashes are left in the string. This means that
+@code{#r"\""} is a valid string literal with two characters, a backslash
and a
+double-quote. It also means that a string with this syntax @emph{cannot end
+in a single backslash}. As with Python, from where this syntax was
+taken, you can specify @code{u} or @code{U} after the @code{#r} to
+specify that interpretation of Unicode escapes should be done.
+
The newline character is not special in the read syntax for strings;
if you write a new line between the double-quotes, it becomes a
character in the string. But an escaped newline---one that is preceded
1.993 +15 -0 XEmacs/xemacs/src/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/ChangeLog,v
retrieving revision 1.992
retrieving revision 1.993
diff -u -p -r1.992 -r1.993
--- ChangeLog 2006/08/04 20:55:02 1.992
+++ ChangeLog 2006/08/04 22:55:12 1.993
@@ -1,3 +1,18 @@
+2006-08-05 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * lread.c (read_unicode_escape):
+ Refactor this code out from read_escape, since it's now called
+ from read_string as well.
+ * lread.c (read_escape):
+ Call read_unicode_escape instead of using inline code,
+ * lread.c (read_string):
+ Refactor out from read1, provide raw and honor_unicode options.
+ * lread.c (read_raw_string):
+ Added, a function that calls read_string with the correct
+ arguments for a raw string.
+ * lread.c (read1):
+ Pass raw strings to read_raw_string; pass strings to read_string.
+
2006-08-04 Aidan Kehoe <kehoea(a)parhasard.net>
* event-tty.c (emacs_tty_next_event):
1.79 +158 -87 XEmacs/xemacs/src/lread.c
Index: lread.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/lread.c,v
retrieving revision 1.78
retrieving revision 1.79
diff -u -p -r1.78 -r1.79
--- lread.c 2006/06/03 17:50:54 1.78
+++ lread.c 2006/08/04 22:55:13 1.79
@@ -1670,15 +1670,56 @@ read0 (Lisp_Object readcharfun)
return val;
}
+
+/* A Unicode escape, as in C# (though we only permit them in strings
+ and characters, not arbitrarily in the source code.) */
+static Ichar
+read_unicode_escape (Lisp_Object readcharfun, int unicode_hex_count)
+{
+ REGISTER Ichar i = 0, c;
+ REGISTER int count = 0;
+ Lisp_Object lisp_char;
+ while (++count <= unicode_hex_count)
+ {
+ c = readchar (readcharfun);
+ /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
+ if (c >= '0' && c <= '9') i = (i << 4) +
(c - '0');
+ else if (c >= 'a' && c <= 'f') i = (i << 4) +
(c - 'a') + 10;
+ else if (c >= 'A' && c <= 'F') i = (i << 4) +
(c - 'A') + 10;
+ else
+ {
+ syntax_error ("Non-hex digit used for Unicode escape",
+ make_char (c));
+ break;
+ }
+ }
+
+ lisp_char = Funicode_to_char(make_int(i), Qnil);
+
+ if (EQ(Qnil, lisp_char))
+ {
+ /* This is ugly and horrible and trashes the user's data, but
+ it's what unicode.c does. In the future, unicode-to-char
+ should not return nil. */
+#ifdef MULE
+ i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
+#else
+ i = '~';
+#endif
+ return i;
+ }
+ else
+ {
+ return XCHAR(lisp_char);
+ }
+}
+
static Ichar
read_escape (Lisp_Object readcharfun)
{
/* This function can GC */
Ichar c = readchar (readcharfun);
- /* \u allows up to four hex digits, \U up to eight. Default to the
- behaviour for \u, and change this value in the case that \U is seen. */
- int unicode_hex_count = 4;
if (c < 0)
signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun));
@@ -1797,50 +1838,11 @@ read_escape (Lisp_Object readcharfun)
}
case 'U':
/* Post-Unicode-2.0: Up to eight hex chars */
- unicode_hex_count = 8;
+ return read_unicode_escape(readcharfun, 8);
case 'u':
+ /* Unicode-2.0 and before; four hex chars. */
+ return read_unicode_escape(readcharfun, 4);
- /* A Unicode escape, as in C# (though we only permit them in strings
- and characters, not arbitrarily in the source code.) */
- {
- REGISTER Ichar i = 0;
- REGISTER int count = 0;
- Lisp_Object lisp_char;
- while (++count <= unicode_hex_count)
- {
- c = readchar (readcharfun);
- /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
- if (c >= '0' && c <= '9') i = (i << 4) +
(c - '0');
- else if (c >= 'a' && c <= 'f') i = (i << 4) +
(c - 'a') + 10;
- else if (c >= 'A' && c <= 'F') i = (i <<
4) + (c - 'A') + 10;
- else
- {
- syntax_error ("Non-hex digit used for Unicode escape",
- make_char (c));
- break;
- }
- }
-
- lisp_char = Funicode_to_char(make_int(i), Qnil);
-
- if (EQ(Qnil, lisp_char))
- {
- /* This is ugly and horrible and trashes the user's data, but
- it's what unicode.c does. In the future, unicode-to-char
- should not return nil. */
-#ifdef MULE
- i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
-#else
- i = '~';
-#endif
- return i;
- }
- else
- {
- return XCHAR(lisp_char);
- }
- }
-
default:
return c;
}
@@ -2270,6 +2272,113 @@ list2_pure (int pure, Lisp_Object a, Lis
}
#endif
+static Lisp_Object
+read_string (Lisp_Object readcharfun, Ichar delim, int raw,
+ int honor_unicode)
+{
+#ifdef I18N3
+ /* #### If the input stream is translating, then the string
+ should be marked as translatable by setting its
+ `string-translatable' property to t. .el and .elc files
+ normally are translating input streams. See Fgettext()
+ and print_internal(). */
+#endif
+ Ichar c;
+ int cancel = 0;
+
+ Lstream_rewind(XLSTREAM(Vread_buffer_stream));
+ while ((c = readchar(readcharfun)) >= 0 && c != delim)
+ {
+ if (c == '\\')
+ {
+ if (raw)
+ {
+ c = readchar(readcharfun);
+ if (honor_unicode && ('u' == c || 'U' == c))
+ {
+ c = read_unicode_escape(readcharfun,
+ 'U' == c ? 8 : 4);
+ }
+ else
+ {
+ /* For raw strings, insert the
+ backslash and the next char, */
+ Lstream_put_ichar(XLSTREAM
+ (Vread_buffer_stream),
+ '\\');
+ }
+ }
+ else
+ /* otherwise, backslash escapes the next char. */
+ c = read_escape(readcharfun);
+ }
+ /* c is -1 if \ newline has just been seen */
+ if (c == -1)
+ {
+ if (Lstream_byte_count
+ (XLSTREAM(Vread_buffer_stream)) ==
+ 0)
+ cancel = 1;
+ }
+ else
+ Lstream_put_ichar(XLSTREAM
+ (Vread_buffer_stream),
+ c);
+ QUIT;
+ }
+ if (c < 0)
+ return Fsignal(Qend_of_file,
+ list1(READCHARFUN_MAYBE(readcharfun)));
+
+ /* If purifying, and string starts with \ newline,
+ return zero instead. This is for doc strings
+ that we are really going to find in lib-src/DOC.nn.nn */
+ if (purify_flag && NILP(Vinternal_doc_file_name)
+ && cancel)
+ return Qzero;
+
+ Lstream_flush(XLSTREAM(Vread_buffer_stream));
+ return make_string(resizing_buffer_stream_ptr
+ (XLSTREAM(Vread_buffer_stream)),
+ Lstream_byte_count(XLSTREAM(Vread_buffer_stream)));
+}
+
+static Lisp_Object
+read_raw_string (Lisp_Object readcharfun)
+{
+ Ichar c;
+ Ichar permit_unicode = 0;
+
+ do {
+ c = reader_nextchar(readcharfun);
+ switch (c) {
+ /* #r:engine"my sexy raw string" -- raw string w/ flags*/
+ /* case ':': */
+ /* #ru"Hi there\u20AC \U000020AC" -- raw string, honouring Unicode. */
+ case 'u':
+ case 'U':
+ permit_unicode = c;
+ continue;
+
+ /* #r"my raw string" -- raw string */
+ case '\"':
+ return read_string(readcharfun, '\"', 1, permit_unicode);
+ /* invalid syntax */
+ default:
+ {
+ if (permit_unicode)
+ {
+ unreadchar(readcharfun, permit_unicode);
+ }
+ unreadchar(readcharfun, c);
+ return Fsignal(Qinvalid_read_syntax,
+ list1(build_string
+ ("unrecognized raw string syntax")));
+ }
+ }
+ } while (1);
+}
+
/* Read the next Lisp object from the stream READCHARFUN and return it.
If the return value is a cons whose car is Qunbound, then read1()
encountered a misplaced token (e.g. a right bracket, right paren,
@@ -2509,6 +2618,8 @@ retry:
case 'x': return read_integer (readcharfun, 16);
/* #b010 => 2 -- binary constant syntax */
case 'b': return read_integer (readcharfun, 2);
+ /* #r"raw\stringt" -- raw string syntax */
+ case 'r': return read_raw_string(readcharfun);
/* #s(foobar key1 val1 key2 val2) -- structure syntax */
case 's': return read_structure (readcharfun);
case '<':
@@ -2654,48 +2765,8 @@ retry:
}
case '\"':
- {
- /* String */
-#ifdef I18N3
- /* #### If the input stream is translating, then the string
- should be marked as translatable by setting its
- `string-translatable' property to t. .el and .elc files
- normally are translating input streams. See Fgettext()
- and print_internal(). */
-#endif
- int cancel = 0;
-
- Lstream_rewind (XLSTREAM (Vread_buffer_stream));
- while ((c = readchar (readcharfun)) >= 0
- && c != '\"')
- {
- if (c == '\\')
- c = read_escape (readcharfun);
- /* c is -1 if \ newline has just been seen */
- if (c == -1)
- {
- if (Lstream_byte_count (XLSTREAM (Vread_buffer_stream)) == 0)
- cancel = 1;
- }
- else
- Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c);
- QUIT;
- }
- if (c < 0)
- return Fsignal (Qend_of_file, list1 (READCHARFUN_MAYBE (readcharfun)));
-
- /* If purifying, and string starts with \ newline,
- return zero instead. This is for doc strings
- that we are really going to find in lib-src/DOC.nn.nn */
- if (purify_flag && NILP (Vinternal_doc_file_name) && cancel)
- return Qzero;
-
- Lstream_flush (XLSTREAM (Vread_buffer_stream));
- return
- make_string
- (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)),
- Lstream_byte_count (XLSTREAM (Vread_buffer_stream)));
- }
+ /* String */
+ return read_string(readcharfun, '\"', 0, 1);
default:
{
1.83 +7 -0 XEmacs/xemacs/tests/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/ChangeLog,v
retrieving revision 1.82
retrieving revision 1.83
diff -u -p -r1.82 -r1.83
--- ChangeLog 2006/06/24 14:30:36 1.82
+++ ChangeLog 2006/08/04 22:55:18 1.83
@@ -1,3 +1,10 @@
+2006-08-05 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/lisp-reader-tests.el:
+ New file, imported from Martin Kuehl's SXEmacs commit; test the
+ new raw string syntax, including the Unicode escapes, which
+ SXEmacs doesn't have.
+
2006-06-24 Stephen J. Turnbull <stephen(a)xemacs.org>
* automated/test-harness.el (Silence-Message): New macro.
1.1 XEmacs/xemacs/tests/automated/lisp-reader-tests.el
Index: lisp-reader-tests.el
===================================================================
;; Copyright (C) 2005 Martin Kuehl.
;; Author: Martin Kuehl <martin.kuehl(a)gmail.com>
;; Maintainer: Martin Kuehl <martin.kuehl(a)gmail.com>
;; Created: 2005
;; Keywords: tests
;; This file is NOT part of SXEmacs.
;; SXEmacs is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; SXEmacs is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with SXEmacs; see the file COPYING. If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.
;;; Synched up with: Not in FSF.
;;; Commentary:
;; Test the lisp reader.
;; See test-harness.el for instructions on how to run these tests.
;;; Raw Strings
;;; ===========
;; Equality to "traditional" strings
;; ---------------------------------
(dolist (strings '((#r"xyz" "xyz") ; no backslashes
(#r"\xyz" "\\xyz") ; backslash at start
(#r"\\xyz" "\\\\xyz") ; backslashes at start
(#r"\nxyz" "\\nxyz") ; escape seq. at start
(#r"\"xyz" "\\\"xyz") ; quote at start
(#r"xy\z" "xy\\z") ; backslash in middle
(#r"xy\\z" "xy\\\\z") ; backslashes in middle
(#r"xy\nz" "xy\\nz") ; escape seq. in middle
(#r"xy\"z" "xy\\\"z") ; quote in middle
;;(#r"xyz\" "xyz\\") ; backslash at end: error
(#r"xyz\\" "xyz\\\\") ; backslashes at end
(#r"xyz\n" "xyz\\n") ; escape seq. at end
(#r"xyz\"" "xyz\\\"") ; quote at end
(#ru"\u00ABxyz" "\u00ABxyz") ; one Unicode escape
(#rU"\U000000ABxyz" "\U000000ABxyz") ; another Unicode escape
(#rU"xyz\u00AB" "xyz\u00AB") ; one Unicode escape
))
(Assert (apply #'string= strings)))
;; Odd number of backslashes at the end
;; ------------------------------------
(dolist (string '("#r\"xyz\\\"" ;
`#r"abc\"': escaped delimiter
"#r\"xyz\\\\\\\"" ; `#r"abc\\\"':
escaped delimiter
))
(with-temp-buffer
(insert string)
(Check-Error end-of-file (eval-buffer))))
;; Alternate string/regex delimiters
;; ---------------------------------
(dolist (string '("#r/xyz/" ; Perl syntax
"#r:ix/xyz/" ; Extended Perl syntax
"#r|xyz|" ; TeX syntax
"#r[xyz]" ; (uncommon) Perl syntax
"#r<xyz>" ; Perl6 syntax?
"#r(xyz)" ; arbitrary santax
"#r{xyz}" ; arbitrary santax
"#r,xyz," ; arbitrary santax
"#r!xyz!" ; arbitrary santax
))
(with-temp-buffer
(insert string)
(Check-Error-Message invalid-read-syntax "unrecognized raw string"
(eval-buffer))))