CVS update by aidan xemacs/tests/automated lisp-reader-tests.el

Friday, 4 August 2006

          User: aidan   
  Date: 06/08/05 00:55:19

  Added:       xemacs/tests/automated lisp-reader-tests.el
Log:
Raw strings, from Python via SXEmacs

Revision  Changes    Path
1.756     +6 -0      XEmacs/xemacs/lisp/ChangeLog

Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/ChangeLog,v
retrieving revision 1.755
retrieving revision 1.756
diff -u -p -r1.755 -r1.756
--- ChangeLog	2006/08/04 20:01:05	1.755
+++ ChangeLog	2006/08/04 22:55:04	1.756
＠＠ -1,3 +1,9 ＠＠
+2006-08-05  Aidan Kehoe  <kehoea(a)parhasard.net&gt;
+
+	* lisp.el (forward-sexp):
+	Handle raw strings specially just as we do structures. Fixes
+	problems evaluating them in *scratch*. 
+
 2006-08-04  Aidan Kehoe  <kehoea(a)parhasard.net&gt;

 	* iso8859-1.el:

1.6       +13 -12    XEmacs/xemacs/lisp/lisp.el

Index: lisp.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/lisp.el,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -p -r1.5 -r1.6
--- lisp.el	2001/04/12 18:21:29	1.5
+++ lisp.el	2006/08/04 22:55:05	1.6
＠＠ -60,19 +60,20 ＠＠ move backward across N balanced expressi
   (interactive "_p")
   (or arg (setq arg 1))
   ;; XEmacs: evil hack! The other half of the evil hack below.
-  (if (and (> arg 0) (looking-at "#s("))
-      (goto-char (+ (point) 2)))
+  (if (and (> arg 0) (looking-at "#s(\\|#r[uU]\\{0,1\\}\""))
+    (goto-char (1+ (- (point) (- (match-end 0) (match-beginning 0))))))
   (goto-char (or (scan-sexps (point) arg) (buffer-end arg)))
-  (if (< arg 0) (backward-prefix-chars))
-  ;; XEmacs: evil hack! Skip back over #s so that structures are read
-  ;; properly.  the current cheesified syntax tables just aren't up to
-  ;; this.
-  (if (and (< arg 0)
-	   (eq (char-after (point)) ?\()
-	   (>= (- (point) (point-min)) 2)
-	   (eq (char-after (- (point) 1)) ?s)
-	   (eq (char-after (- (point) 2)) ?#))
-      (goto-char (- (point) 2))))
+  (when (< arg 0) 
+    (backward-prefix-chars)
+    ;; XEmacs: evil hack! Skip back over #[sr] so that structures and raw
+    ;; strings are read properly.  the current cheesified syntax tables just
+    ;; aren't up to this.
+    (let* ((diff (- (point) (point-min)))
+	   (subject (buffer-substring (- (point) (min diff 3))
+				      (1+ (point))))
+	   (matched (string-match "#s(\\|#r[uU]\\{0,1\\}\"" subject)))
+      (if matched
+	(goto-char (1+ (- (point) (- (length subject) matched))))))))

 (defun backward-sexp (&optional arg)
   "Move backward across one balanced expression (sexp).

1.347     +6 -0      XEmacs/xemacs/man/ChangeLog

Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/man/ChangeLog,v
retrieving revision 1.346
retrieving revision 1.347
diff -u -p -r1.346 -r1.347
--- ChangeLog	2006/07/19 15:04:46	1.346
+++ ChangeLog	2006/08/04 22:55:09	1.347
＠＠ -1,3 +1,9 ＠＠
+2006-08-05  Aidan Kehoe  <kehoea(a)parhasard.net&gt;
+
+	* lispref/objects.texi (String Type):
+	Give details of the raw string syntax, taken from SXEmacs and
+	Python. 
+
 2006-07-19  Stephen J. Turnbull  <stephen(a)xemacs.org&gt;

 	* new-users-guide/edit.texi (Insert): Document bogosity in

1.9       +10 -0     XEmacs/xemacs/man/lispref/objects.texi

Index: objects.texi
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/man/lispref/objects.texi,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -p -r1.8 -r1.9
--- objects.texi	2006/04/29 14:36:54	1.8
+++ objects.texi	2006/08/04 22:55:10	1.9
＠＠ -1079,6 +1079,16 ＠＠ characters it contains, and another doub
 escape any backslash or double-quote characters in the string with a
 backslash, like this: ＠code{"this \" is an embedded quote"}.

+ An alternative syntax allows insertion of raw backslashes into a
+string, like this: ＠code{#r"this \ is an embedded backslash"}.  In  such
+a string, each character following a backslash is included literally in
+the string, and all backslashes are left in the string.  This means that
+＠code{#r"\""} is a valid string literal with two characters, a backslash
and a
+double-quote.  It also means that a string  with this syntax ＠emph{cannot end
+in a single backslash}.  As with Python, from where this syntax was
+taken, you can specify ＠code{u} or ＠code{U} after the ＠code{#r} to
+specify that interpretation of Unicode escapes should be done. 
+
   The newline character is not special in the read syntax for strings;
 if you write a new line between the double-quotes, it becomes a
 character in the string.  But an escaped newline---one that is preceded

1.993     +15 -0     XEmacs/xemacs/src/ChangeLog

Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/ChangeLog,v
retrieving revision 1.992
retrieving revision 1.993
diff -u -p -r1.992 -r1.993
--- ChangeLog	2006/08/04 20:55:02	1.992
+++ ChangeLog	2006/08/04 22:55:12	1.993
＠＠ -1,3 +1,18 ＠＠
+2006-08-05  Aidan Kehoe  <kehoea(a)parhasard.net&gt;
+
+	* lread.c (read_unicode_escape):
+	Refactor this code out from read_escape, since it's now called
+	from read_string as well. 
+	* lread.c (read_escape):
+	Call read_unicode_escape instead of using inline code, 
+	* lread.c (read_string):
+	Refactor out from read1, provide raw and honor_unicode options. 
+	* lread.c (read_raw_string):
+	Added, a function that calls read_string with the correct
+	arguments for a raw string. 
+	* lread.c (read1):
+	Pass raw strings to read_raw_string; pass strings to read_string.
+
 2006-08-04  Aidan Kehoe  <kehoea(a)parhasard.net&gt;

 	* event-tty.c (emacs_tty_next_event):

1.79      +158 -87   XEmacs/xemacs/src/lread.c

Index: lread.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/lread.c,v
retrieving revision 1.78
retrieving revision 1.79
diff -u -p -r1.78 -r1.79
--- lread.c	2006/06/03 17:50:54	1.78
+++ lread.c	2006/08/04 22:55:13	1.79
＠＠ -1670,15 +1670,56 ＠＠ read0 (Lisp_Object readcharfun)

   return val;
 }
+
+/* A Unicode escape, as in C# (though we only permit them in strings
+   and characters, not arbitrarily in the source code.) */
+static Ichar
+read_unicode_escape (Lisp_Object readcharfun, int unicode_hex_count)
+{
+  REGISTER Ichar i = 0, c;
+  REGISTER int count = 0;
+  Lisp_Object lisp_char;
+  while (++count <= unicode_hex_count)
+    {
+      c = readchar (readcharfun);
+      /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
+      if      (c >= '0' && c <= '9')  i = (i << 4) +
(c - '0');
+      else if (c >= 'a' && c <= 'f')  i = (i << 4) +
(c - 'a') + 10;
+      else if (c >= 'A' && c <= 'F')  i = (i << 4) +
(c - 'A') + 10;
+      else
+	{
+	  syntax_error ("Non-hex digit used for Unicode escape",
+			make_char (c));
+	  break;
+	}
+    }
+
+  lisp_char = Funicode_to_char(make_int(i), Qnil);
+
+  if (EQ(Qnil, lisp_char))
+    {
+      /* This is ugly and horrible and trashes the user's data, but
+	 it's what unicode.c does. In the future, unicode-to-char
+	 should not return nil.  */
+#ifdef MULE
+      i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
+#else
+      i = '~';
+#endif
+      return i;
+    }
+  else
+    {
+      return XCHAR(lisp_char);
+    }
+}
+

 static Ichar
 read_escape (Lisp_Object readcharfun)
 {
   /* This function can GC */
   Ichar c = readchar (readcharfun);
-  /* \u allows up to four hex digits, \U up to eight. Default to the
-     behaviour for \u, and change this value in the case that \U is seen. */
-  int unicode_hex_count = 4;

   if (c < 0)
     signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun));
＠＠ -1797,50 +1838,11 ＠＠ read_escape (Lisp_Object readcharfun)
       }
     case 'U':
       /* Post-Unicode-2.0: Up to eight hex chars */
-      unicode_hex_count = 8;
+      return read_unicode_escape(readcharfun, 8);
     case 'u':
+      /* Unicode-2.0 and before; four hex chars. */
+      return read_unicode_escape(readcharfun, 4);

-      /* A Unicode escape, as in C# (though we only permit them in strings
-	 and characters, not arbitrarily in the source code.) */
-      {
-	REGISTER Ichar i = 0;
-	REGISTER int count = 0;
-	Lisp_Object lisp_char;
-	while (++count <= unicode_hex_count)
-	  {
-	    c = readchar (readcharfun);
-	    /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
-	    if      (c >= '0' && c <= '9')  i = (i << 4) +
(c - '0');
-	    else if (c >= 'a' && c <= 'f')  i = (i << 4) +
(c - 'a') + 10;
-            else if (c >= 'A' && c <= 'F')  i = (i <<
4) + (c - 'A') + 10;
-	    else
-	      {
-		syntax_error ("Non-hex digit used for Unicode escape",
-			      make_char (c));
-		break;
-	      }
-	  }
-
-	lisp_char = Funicode_to_char(make_int(i), Qnil);
-
-	if (EQ(Qnil, lisp_char))
-	  {
-	    /* This is ugly and horrible and trashes the user's data, but
-	       it's what unicode.c does. In the future, unicode-to-char
-	       should not return nil.  */
-#ifdef MULE
-	    i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
-#else
-	    i = '~';
-#endif
-            return i;
-	  }
-	else
-	  {
-	    return XCHAR(lisp_char);
-	  }
-      }
-
     default:
 	return c;
     }
＠＠ -2270,6 +2272,113 ＠＠ list2_pure (int pure, Lisp_Object a, Lis
 }
 #endif

+static Lisp_Object
+read_string (Lisp_Object readcharfun, Ichar delim, int raw, 
+	     int honor_unicode)
+{
+#ifdef I18N3
+  /* #### If the input stream is translating, then the string
+     should be marked as translatable by setting its
+     `string-translatable' property to t.  .el and .elc files
+     normally are translating input streams.  See Fgettext()
+     and print_internal(). */
+#endif
+  Ichar c;
+  int cancel = 0;
+
+  Lstream_rewind(XLSTREAM(Vread_buffer_stream));
+  while ((c = readchar(readcharfun)) >= 0 && c != delim)
+    {
+    if (c == '\\') 
+      {
+	if (raw) 
+	  {
+	    c = readchar(readcharfun);
+	    if (honor_unicode && ('u' == c || 'U' == c))
+	      {
+		c = read_unicode_escape(readcharfun,
+					'U' == c ? 8 : 4);
+	      }
+	    else
+	      {
+		/* For raw strings, insert the
+		   backslash and the next char, */
+		Lstream_put_ichar(XLSTREAM
+				  (Vread_buffer_stream),
+				  '\\');
+	      }
+	  } 
+	else
+	  /* otherwise, backslash escapes the next char. */
+	  c = read_escape(readcharfun);
+      }
+    /* c is -1 if \ newline has just been seen */
+    if (c == -1) 
+      {
+	if (Lstream_byte_count
+	  (XLSTREAM(Vread_buffer_stream)) ==
+	  0)
+	  cancel = 1;
+      } 
+    else
+      Lstream_put_ichar(XLSTREAM
+			 (Vread_buffer_stream),
+			 c);
+    QUIT;
+    }
+  if (c < 0)
+    return Fsignal(Qend_of_file,
+		   list1(READCHARFUN_MAYBE(readcharfun)));
+
+  /* If purifying, and string starts with \ newline,
+     return zero instead.  This is for doc strings
+     that we are really going to find in lib-src/DOC.nn.nn  */
+  if (purify_flag && NILP(Vinternal_doc_file_name)
+      && cancel)
+    return Qzero;
+
+  Lstream_flush(XLSTREAM(Vread_buffer_stream));
+  return make_string(resizing_buffer_stream_ptr
+		     (XLSTREAM(Vread_buffer_stream)),
+		     Lstream_byte_count(XLSTREAM(Vread_buffer_stream)));
+}
+
+static Lisp_Object
+read_raw_string (Lisp_Object readcharfun)
+{
+  Ichar c;
+  Ichar permit_unicode = 0; 
+
+  do {
+    c = reader_nextchar(readcharfun);
+    switch (c) {
+      /* #r:engine"my sexy raw string" -- raw string w/ flags*/
+      /* case ':': */
+      /* #ru"Hi there\u20AC \U000020AC" -- raw string, honouring Unicode. */
+    case 'u':
+    case 'U':
+      permit_unicode = c; 
+      continue;
+
+      /* #r"my raw string" -- raw string */
+    case '\"':
+      return read_string(readcharfun, '\"', 1, permit_unicode);
+      /* invalid syntax */
+    default:
+      {
+	if (permit_unicode)
+	  {
+	    unreadchar(readcharfun, permit_unicode);
+	  }
+	unreadchar(readcharfun, c);
+	return Fsignal(Qinvalid_read_syntax,
+		       list1(build_string
+			     ("unrecognized raw string syntax")));
+      }
+    }
+  } while (1);
+}
+
 /* Read the next Lisp object from the stream READCHARFUN and return it.
    If the return value is a cons whose car is Qunbound, then read1()
    encountered a misplaced token (e.g. a right bracket, right paren,
＠＠ -2509,6 +2618,8 ＠＠ retry:
 	  case 'x': return read_integer (readcharfun, 16);
             /* #b010 => 2 -- binary constant syntax */
 	  case 'b': return read_integer (readcharfun, 2);
+	    /* #r"raw\stringt" -- raw string syntax */
+	  case 'r': return read_raw_string(readcharfun);
             /* #s(foobar key1 val1 key2 val2) -- structure syntax */
 	  case 's': return read_structure (readcharfun);
 	  case '<':
＠＠ -2654,48 +2765,8 ＠＠ retry:
       }

     case '\"':
-      {
-	/* String */
-#ifdef I18N3
-	/* #### If the input stream is translating, then the string
-	   should be marked as translatable by setting its
-	   `string-translatable' property to t.  .el and .elc files
-	   normally are translating input streams.  See Fgettext()
-	   and print_internal(). */
-#endif
-	int cancel = 0;
-
-	Lstream_rewind (XLSTREAM (Vread_buffer_stream));
-	while ((c = readchar (readcharfun)) >= 0
-	       && c != '\"')
-	  {
-	    if (c == '\\')
-	      c = read_escape (readcharfun);
-	    /* c is -1 if \ newline has just been seen */
-	    if (c == -1)
-	      {
-		if (Lstream_byte_count (XLSTREAM (Vread_buffer_stream)) == 0)
-		  cancel = 1;
-	      }
-	    else
-	      Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c);
-	    QUIT;
-	  }
-	if (c < 0)
-	  return Fsignal (Qend_of_file, list1 (READCHARFUN_MAYBE (readcharfun)));
-
-	/* If purifying, and string starts with \ newline,
-	   return zero instead.  This is for doc strings
-	   that we are really going to find in lib-src/DOC.nn.nn  */
-	if (purify_flag && NILP (Vinternal_doc_file_name) && cancel)
-	  return Qzero;
-
-	Lstream_flush (XLSTREAM (Vread_buffer_stream));
-	return
-	  make_string
-	  (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)),
-	   Lstream_byte_count (XLSTREAM (Vread_buffer_stream)));
-      }
+      /* String */
+      return read_string(readcharfun, '\"', 0, 1);

     default:
       {

1.83      +7 -0      XEmacs/xemacs/tests/ChangeLog

Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/ChangeLog,v
retrieving revision 1.82
retrieving revision 1.83
diff -u -p -r1.82 -r1.83
--- ChangeLog	2006/06/24 14:30:36	1.82
+++ ChangeLog	2006/08/04 22:55:18	1.83
＠＠ -1,3 +1,10 ＠＠
+2006-08-05  Aidan Kehoe  <kehoea(a)parhasard.net&gt;
+
+	* automated/lisp-reader-tests.el:
+	New file, imported from Martin Kuehl's SXEmacs commit; test the
+	new raw string syntax, including the Unicode escapes, which
+	SXEmacs doesn't have. 
+
 2006-06-24  Stephen J. Turnbull  <stephen(a)xemacs.org&gt;

 	* automated/test-harness.el (Silence-Message): New macro.

1.1                  XEmacs/xemacs/tests/automated/lisp-reader-tests.el

Index: lisp-reader-tests.el
===================================================================
;; Copyright (C) 2005 Martin Kuehl.

;; Author: Martin Kuehl <martin.kuehl(a)gmail.com&gt;
;; Maintainer: Martin Kuehl <martin.kuehl(a)gmail.com&gt;
;; Created: 2005
;; Keywords: tests

;; This file is NOT part of SXEmacs.

;; SXEmacs is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; SXEmacs is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with SXEmacs; see the file COPYING.  If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.

;;; Synched up with: Not in FSF.

;;; Commentary:

;; Test the lisp reader.
;; See test-harness.el for instructions on how to run these tests.

;;; Raw Strings
;;; ===========

;; Equality to "traditional" strings
;; ---------------------------------
(dolist (strings '((#r"xyz"   "xyz")	 ; no backslashes
		   (#r"\xyz"  "\\xyz")   ; backslash at start
                   (#r"\\xyz" "\\\\xyz") ; backslashes at start
                   (#r"\nxyz" "\\nxyz")  ; escape seq. at start
                   (#r"\"xyz" "\\\"xyz") ; quote at start
                   (#r"xy\z"  "xy\\z")   ; backslash in middle
                   (#r"xy\\z" "xy\\\\z") ; backslashes in middle
                   (#r"xy\nz" "xy\\nz")  ; escape seq. in middle
                   (#r"xy\"z" "xy\\\"z") ; quote in middle
                   ;;(#r"xyz\"  "xyz\\")   ; backslash at end: error
                   (#r"xyz\\" "xyz\\\\") ; backslashes at end
                   (#r"xyz\n" "xyz\\n")  ; escape seq. at end
                   (#r"xyz\"" "xyz\\\"") ; quote at end
		   (#ru"\u00ABxyz" "\u00ABxyz") ; one Unicode escape
		   (#rU"\U000000ABxyz" "\U000000ABxyz") ; another Unicode escape
		   (#rU"xyz\u00AB" "xyz\u00AB") ; one Unicode escape
                   ))
  (Assert (apply #'string= strings)))

;; Odd number of backslashes at the end
;; ------------------------------------
(dolist (string '("#r\"xyz\\\""         ;
`#r"abc\"': escaped delimiter
                  "#r\"xyz\\\\\\\""     ; `#r"abc\\\"':
escaped delimiter
                  ))
  (with-temp-buffer
    (insert string)
    (Check-Error end-of-file (eval-buffer))))

;; Alternate string/regex delimiters
;; ---------------------------------
(dolist (string '("#r/xyz/"             ; Perl syntax
                  "#r:ix/xyz/"          ; Extended Perl syntax
                  "#r|xyz|"             ; TeX syntax
                  "#r[xyz]"             ; (uncommon) Perl syntax
                  "#r<xyz>"             ; Perl6 syntax?
                  "#r(xyz)"             ; arbitrary santax
                  "#r{xyz}"             ; arbitrary santax
                  "#r,xyz,"             ; arbitrary santax
                  "#r!xyz!"             ; arbitrary santax
                  ))
  (with-temp-buffer
    (insert string)
    (Check-Error-Message invalid-read-syntax "unrecognized raw string"
                         (eval-buffer))))

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

CVS update by aidan xemacs/tests/automated lisp-reader-tests.el