APPROVE COMMIT
NOTE: This patch has been committed
# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1507158497 -3600
# Thu Oct 05 00:08:17 2017 +0100
# Node ID 19876e056bb7630f1854719c8b037a1f2fb691e8
# Parent 8df05e6e201fbbd618c7127c762bf954028ceec3
Be more realistic, #'char-to-unicode, #'make-char
2017-10-04 Aidan Kehoe <kehoea(a)parhasard.net>
* text.c (old_mule_ichar_to_unicode):
Assert we haven't been handed an Ichar less than 256 in value.
* text.c (unicode_internal_handle_bad_ichar_to_unicode): New.
Handle a value passed to the inline ichar_to_unicode(), if CHR is
not an official Unicode character and FAIL is not
CONVERR_USE_PRIVATE.
* text.c (Fmake_char):
Make this API compatible with GNU and with pre-unicode-internal,
return a private character by default if HANDLE-ERROR is nil.
* text.c (Fchar_to_unicode):
Return nil if HANDLE-ERROR is FAIL or nil here.
* text.c (Funicode_to_char):
* text.c (Fchar_to_charset_codepoint):
* text.c (Fchar_charset):
* text.c (Fsplit_char):
Update the docstrings in this three functions.
* text.h (ichar_to_unicode):
Make the implementation of this a bit more realistic with
UNICODE_INTERNAL, use the new
unicode_internal_handle_bad_ichar_to_unicode().
diff -r 8df05e6e201f -r 19876e056bb7 src/ChangeLog
--- a/src/ChangeLog Sat Sep 30 16:16:08 2017 +0100
+++ b/src/ChangeLog Thu Oct 05 00:08:17 2017 +0100
@@ -1,3 +1,26 @@
+2017-10-04 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * text.c (old_mule_ichar_to_unicode):
+ Assert we haven't been handed an Ichar less than 256 in value.
+ * text.c (unicode_internal_handle_bad_ichar_to_unicode): New.
+ Handle a value passed to the inline ichar_to_unicode(), if CHR is
+ not an official Unicode character and FAIL is not
+ CONVERR_USE_PRIVATE.
+ * text.c (Fmake_char):
+ Make this API compatible with GNU and with pre-unicode-internal,
+ return a private character by default if HANDLE-ERROR is nil.
+ * text.c (Fchar_to_unicode):
+ Return nil if HANDLE-ERROR is FAIL or nil here.
+ * text.c (Funicode_to_char):
+ * text.c (Fchar_to_charset_codepoint):
+ * text.c (Fchar_charset):
+ * text.c (Fsplit_char):
+ Update the docstrings in this three functions.
+ * text.h (ichar_to_unicode):
+ Make the implementation of this a bit more realistic with
+ UNICODE_INTERNAL, use the new
+ unicode_internal_handle_bad_ichar_to_unicode().
+
2017-09-30 Aidan Kehoe <kehoea(a)parhasard.net>
* text.c (old_mule_non_ascii_valid_ichar_p):
diff -r 8df05e6e201f -r 19876e056bb7 src/text.c
--- a/src/text.c Sat Sep 30 16:16:08 2017 +0100
+++ b/src/text.c Thu Oct 05 00:08:17 2017 +0100
@@ -1703,11 +1703,7 @@
old_mule_ichar_to_unicode (Ichar chr, enum converr fail)
{
ASSERT_VALID_ICHAR (chr);
-
- /* This shortcut depends on the representation of an Ichar, see text.c. */
- if (chr < 256)
- return (int) chr;
-
+ text_checking_assert (chr < 256);
{
int c1, c2;
Lisp_Object charset;
@@ -1862,6 +1858,39 @@
#endif /* not UNICODE_INTERNAL */
+int
+unicode_internal_handle_bad_ichar_to_unicode (Ichar chr, enum converr fail)
+{
+ ASSERT_VALID_ICHAR (chr);
+ text_checking_assert (fail != CONVERR_USE_PRIVATE);
+ text_checking_assert (!valid_unicode_codepoint_p (chr,
+ UNICODE_OFFICIAL_ONLY));
+ switch (fail)
+ {
+ case CONVERR_FAIL:
+ return -1;
+ case CONVERR_ERROR:
+ if (chr > UNICODE_OFFICIAL_MAX)
+ {
+ args_out_of_range_3 (make_fixnum (chr), Qzero,
+ make_fixnum (UNICODE_OFFICIAL_MAX));
+ }
+ if (chr >= FIRST_UTF_16_SURROGATE)
+ {
+ args_out_of_range_3 (make_fixnum (chr), Qzero,
+ make_fixnum (FIRST_UTF_16_SURROGATE));
+ }
+ /* FALLTHROUGH */
+ case CONVERR_ABORT:
+ case CONVERR_USE_PRIVATE:
+ default:
+ ABORT ();
+ case CONVERR_SUCCEED:
+ case CONVERR_SUBSTITUTE:
+ return 0xFFFD;
+ }
+}
+
#endif /* MULE */
/* Take a possibly invalid Ichar value (must be >= 0) and move upwards as
@@ -5422,11 +5451,11 @@
to translate. Currently, this happens only with Unicode-internal (see
below):
-nil or `fail' Return nil
-`abort' Signal an error
-`succeed' Same as `substitute'
-`substitute' Substitute the Unicode replacement char (0xFFFD)
-`use-private' Encode using private Unicode space
+`use-private' or `nil' Encode using private Unicode space
+`fail' Return nil
+`error' Signal an error
+`succeed' Same as `substitute'
+`substitute' Substitute the Unicode replacement char (0xFFFD)
Each octet should be in the range corresponding to the offset and size
for that dimension, as defined in the charset. For a typical one-dimensional
@@ -5602,10 +5631,16 @@
*/
(charset, octet1, octet2, handle_error))
{
- enum converr fail = decode_handle_error (handle_error, 1);
+ enum converr fail;
int a1, a2;
Ichar ch;
+ if (NILP (handle_error))
+ {
+ handle_error = Quse_private;
+ }
+
+ fail = decode_handle_error (handle_error, 1);
charset = get_external_charset_codepoint (charset, octet1, octet2,
&a1, &a2, 1);
ch = charset_codepoint_to_ichar (charset, a1, a2, fail);
@@ -5622,7 +5657,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute the Unicode replacement char (0xFFFD)
`use-private' Encode using private Unicode space
@@ -5630,9 +5665,18 @@
(character, handle_error))
{
enum converr fail = decode_handle_error (handle_error, 1);
+ INT_32_BIT result;
CHECK_CHAR_COERCE_INT (character);
- return make_fixnum (ichar_to_unicode (XCHAR (character), fail));
+
+ result = ichar_to_unicode (XCHAR (character), fail);
+
+ if (result < 0)
+ {
+ return Qnil;
+ }
+
+ return make_fixnum (result);
}
DEFUN ("unicode-to-char", Funicode_to_char, 1, 3, 0, /*
@@ -5684,7 +5728,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute a '?' character
*/
@@ -5727,7 +5771,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute a '?' character
*/
@@ -5761,7 +5805,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute the Unicode replacement char (0xFFFD)
`use-private' Encode using private Unicode space
@@ -5799,7 +5843,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute a '?' character
*/
@@ -5849,7 +5893,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute a '?' character
*/
@@ -5928,7 +5972,7 @@
HANDLE-ERROR controls error behavior:
nil or `fail' Return nil
-`abort' Signal an error
+`error' Signal an error
`succeed' Same as `substitute'
`substitute' Substitute a '?' character
*/
diff -r 8df05e6e201f -r 19876e056bb7 src/text.h
--- a/src/text.h Sat Sep 30 16:16:08 2017 +0100
+++ b/src/text.h Thu Oct 05 00:08:17 2017 +0100
@@ -522,15 +522,17 @@
#define LAST_TRAILING_BYTE 0xFF
#endif
-#ifndef UNICODE_INTERNAL
-MODULE_API int old_mule_non_ascii_valid_ichar_p (Ichar ch);
+#ifdef UNICODE_INTERNAL
+MODULE_API INT_32_BIT unicode_internal_handle_bad_ichar_to_unicode (Ichar,
+ enum
+ converr);
+#elif defined (MULE)
+MODULE_API INT_32_BIT old_mule_non_ascii_valid_ichar_p (Ichar ch);
#endif
-/* Return whether the given Ichar is valid.
- */
-
+/* Return whether the given Ichar is valid. */
DECLARE_INLINE_HEADER (
-int
+Boolint
valid_ichar_p (Ichar ch)
)
{
@@ -826,12 +828,30 @@
Return value will be -1 if cannot convert. */
DECLARE_INLINE_HEADER (
int
-ichar_to_unicode (Ichar chr, enum converr USED_IF_OLD_MULE (fail))
+ichar_to_unicode (Ichar chr, enum converr USED_IF_MULE (fail))
)
{
ASSERT_VALID_ICHAR (chr);
-#if defined (MULE) && !defined (UNICODE_INTERNAL)
+ /* This shortcut depends on the representation of an Ichar, see text.c. */
+ if (chr < 256)
+ {
+ return (int) chr;
+ }
+
+#ifdef UNICODE_INTERNAL
+ if (fail == CONVERR_USE_PRIVATE)
+ {
+ return (int) chr;
+ }
+
+ if (valid_unicode_codepoint_p (chr, UNICODE_OFFICIAL_ONLY))
+ {
+ return (int) chr;
+ }
+
+ return unicode_internal_handle_bad_ichar_to_unicode (chr, fail);
+#elif defined (MULE)
return old_mule_ichar_to_unicode (chr, fail);
#else
/* Unicode-internal or non-Mule */
--
‘As I sat looking up at the Guinness ad, I could never figure out /
How your man stayed up on the surfboard after forty pints of stout’
(C. Moore)