User: aidan
Date: 06/04/29 16:37:00
Modified: xemacs/src ChangeLog lread.c xft-fonts.h
Log:
Support Unicode escapes in the Lisp reader, taking the syntax from C#.
Revision Changes Path
1.338 +6 -0 XEmacs/xemacs/man/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/man/ChangeLog,v
retrieving revision 1.337
retrieving revision 1.338
diff -u -p -r1.337 -r1.338
--- ChangeLog 2006/04/23 16:11:27 1.337
+++ ChangeLog 2006/04/29 14:36:49 1.338
@@ -1,3 +1,9 @@
+2006-04-29 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * lispref/objects.texi (Character Type):
+ Document the Unicode syntax for characters in characters and
+ strings.
+
2006-04-23 Stephen J. Turnbull <stephen(a)xemacs.org>
* internals/internals.texi: Run texinfo-master-menu.
1.8 +16 -0 XEmacs/xemacs/man/lispref/objects.texi
Index: objects.texi
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/man/lispref/objects.texi,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -p -r1.7 -r1.8
--- objects.texi 2003/06/30 09:31:01 1.7
+++ objects.texi 2006/04/29 14:36:54 1.8
@@ -510,6 +510,21 @@ with the legitimate @sc{iso}-8859-1 inte
For example, character code 193 is a lowercase @samp{a} with an acute
accent, in @sc{iso}-8859-1.)
+@cindex unicode character escape
+ From version 21.5.25 onwards, XEmacs provides a syntax for specifying
+characters by their Unicode code points. @samp{?\uABCD} will give you
+an XEmacs character that maps to the code point @samp{U+ABCD} in
+Unicode-based representations (UTF-8 text files, Unicode-oriented fonts,
+etc.) Just as in the C# language, there is a slightly different syntax
+for specifying characters with code points above @samp{#xFFFF};
+@samp{\U00ABCDEF} will give you an XEmacs character that maps to the
+code point @samp{U+ABCDEF} in Unicode-based representations, if such an
+XEmacs character exists.
+
+ Unlike in C#, while this syntax is available for character literals,
+and (see later) in strings, it is not available elsewhere in your Lisp
+source code.
+
@ignore @c None of this crap applies to XEmacs.
For use in strings and buffers, you are limited to the control
characters that exist in @sc{ascii}, but for keyboard input purposes,
@@ -614,6 +629,7 @@ bit values are 2**22 for alt, 2**23 for
@cindex backslash in character constant
@cindex octal character code
@cindex hexadecimal character code
+
Finally, there are two read syntaxes involving character codes.
It is not possible to represent multibyte or wide characters in this
way; the permissible range of codes is from 0 to 255 (@emph{i.e.},
1.950 +7 -0 XEmacs/xemacs/src/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/ChangeLog,v
retrieving revision 1.949
retrieving revision 1.950
diff -u -p -r1.949 -r1.950
--- ChangeLog 2006/04/25 14:02:05 1.949
+++ ChangeLog 2006/04/29 14:36:56 1.950
@@ -1,3 +1,10 @@
+2006-04-29 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * lread.c:
+ * lread.c (read_escape):
+ Support \uABCD and \U00ABCDEF for specifying characters by their
+ Unicode code point.
+
2006-04-25 Stephen J. Turnbull <stephen(a)xemacs.org>
Repair busted commit, plus some gratuitous doc improvements.
1.77 +48 -3 XEmacs/xemacs/src/lread.c
Index: lread.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/lread.c,v
retrieving revision 1.76
retrieving revision 1.77
diff -u -p -r1.76 -r1.77
--- lread.c 2005/07/12 23:26:49 1.76
+++ lread.c 2006/04/29 14:36:57 1.77
@@ -208,6 +208,8 @@ static int saved_doc_string_position;
static int locate_file_open_or_access_file (Ibyte *fn, int access_mode);
EXFUN (Fread_from_string, 3);
+EXFUN (Funicode_to_char, 2); /* In unicode.c. */
+
/* When errors are signaled, the actual readcharfun should not be used
as an argument if it is an lstream, so that lstreams don't escape
to the Lisp level. */
@@ -1675,6 +1677,9 @@ read_escape (Lisp_Object readcharfun)
{
/* This function can GC */
Ichar c = readchar (readcharfun);
+ /* \u allows up to four hex digits, \U up to eight. Default to the
+ behaviour for \u, and change this value in the case that \U is seen. */
+ int unicode_hex_count = 4;
if (c < 0)
signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun));
@@ -1763,7 +1768,7 @@ read_escape (Lisp_Object readcharfun)
}
}
if (i >= 0400)
- syntax_error ("Attempt to create non-ASCII/ISO-8859-1 character",
+ syntax_error ("Non-ISO-8859-1 character specified with octal escape",
make_int (i));
return i;
}
@@ -1791,11 +1796,51 @@ read_escape (Lisp_Object readcharfun)
}
return i;
}
+ case 'U':
+ /* Post-Unicode-2.0: Up to eight hex chars */
+ unicode_hex_count = 8;
+ case 'u':
+ /* A Unicode escape, as in C# (though we only permit them in strings
+ and characters, not arbitrarily in the source code.) */
+ {
+ REGISTER Ichar i = 0;
+ REGISTER int count = 0;
+ Lisp_Object lisp_char;
+ while (++count <= unicode_hex_count)
+ {
+ c = readchar (readcharfun);
+ /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
+ if (c >= '0' && c <= '9') i = (i << 4) +
(c - '0');
+ else if (c >= 'a' && c <= 'f') i = (i << 4) +
(c - 'a') + 10;
+ else if (c >= 'A' && c <= 'F') i = (i <<
4) + (c - 'A') + 10;
+ else
+ {
+ syntax_error ("Non-hex digit used for Unicode escape",
+ make_char (c));
+ break;
+ }
+ }
+
+ lisp_char = Funicode_to_char(make_int(i), Qnil);
+
+ if (EQ(Qnil, lisp_char))
+ {
+ /* This is ugly and horrible and trashes the user's data, but
+ it's what unicode.c does. In the future, unicode-to-char
+ should not return nil. */
#ifdef MULE
- /* #### need some way of reading an extended character with
- an escape sequence. */
+ i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
+#else
+ i = '~';
#endif
+ return i;
+ }
+ else
+ {
+ return XCHAR(lisp_char);
+ }
+ }
default:
return c;
1.4 +0 -71 XEmacs/xemacs/src/Attic/xft-fonts.h
<<Binary file>>