APPROVE COMMIT
NOTE: This patch has been committed
# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net>
# Date 1507158497 -3600
#      Thu Oct 05 00:08:17 2017 +0100
# Node ID 19876e056bb7630f1854719c8b037a1f2fb691e8
# Parent  8df05e6e201fbbd618c7127c762bf954028ceec3
Be more realistic, #'char-to-unicode, #'make-char
2017-10-04  Aidan Kehoe  <kehoea(a)parhasard.net>
	* text.c (old_mule_ichar_to_unicode):
	Assert we haven't been handed an Ichar less than 256 in value.
	* text.c (unicode_internal_handle_bad_ichar_to_unicode): New.
	Handle a value passed to the inline ichar_to_unicode(), if CHR is
	not an official Unicode character and FAIL is not
	CONVERR_USE_PRIVATE.
	* text.c (Fmake_char):
	Make this API compatible with GNU and with pre-unicode-internal,
	return a private character by default if HANDLE-ERROR is nil.
	* text.c (Fchar_to_unicode):
	Return nil if HANDLE-ERROR is FAIL or nil here.
	* text.c (Funicode_to_char):
	* text.c (Fchar_to_charset_codepoint):
	* text.c (Fchar_charset):
	* text.c (Fsplit_char):
	Update the docstrings in this three functions.
	* text.h (ichar_to_unicode):
	Make the implementation of this a bit more realistic with
	UNICODE_INTERNAL, use the new
	unicode_internal_handle_bad_ichar_to_unicode().
diff -r 8df05e6e201f -r 19876e056bb7 src/ChangeLog
--- a/src/ChangeLog	Sat Sep 30 16:16:08 2017 +0100
+++ b/src/ChangeLog	Thu Oct 05 00:08:17 2017 +0100
@@ -1,3 +1,26 @@
+2017-10-04  Aidan Kehoe  <kehoea(a)parhasard.net>
+
+	* text.c (old_mule_ichar_to_unicode):
+	Assert we haven't been handed an Ichar less than 256 in value.
+	* text.c (unicode_internal_handle_bad_ichar_to_unicode): New.
+	Handle a value passed to the inline ichar_to_unicode(), if CHR is
+	not an official Unicode character and FAIL is not
+	CONVERR_USE_PRIVATE. 
+	* text.c (Fmake_char):
+	Make this API compatible with GNU and with pre-unicode-internal,
+	return a private character by default if HANDLE-ERROR is nil. 
+	* text.c (Fchar_to_unicode):
+	Return nil if HANDLE-ERROR is FAIL or nil here.
+	* text.c (Funicode_to_char):
+	* text.c (Fchar_to_charset_codepoint):
+	* text.c (Fchar_charset):
+	* text.c (Fsplit_char):
+	Update the docstrings in this three functions.
+	* text.h (ichar_to_unicode):	
+	Make the implementation of this a bit more realistic with
+	UNICODE_INTERNAL, use the new
+	unicode_internal_handle_bad_ichar_to_unicode().
+
 2017-09-30  Aidan Kehoe  <kehoea(a)parhasard.net>
 
 	* text.c (old_mule_non_ascii_valid_ichar_p):
diff -r 8df05e6e201f -r 19876e056bb7 src/text.c
--- a/src/text.c	Sat Sep 30 16:16:08 2017 +0100
+++ b/src/text.c	Thu Oct 05 00:08:17 2017 +0100
@@ -1703,11 +1703,7 @@
 old_mule_ichar_to_unicode (Ichar chr, enum converr fail)
 {
   ASSERT_VALID_ICHAR (chr);
-
-  /* This shortcut depends on the representation of an Ichar, see text.c. */
-  if (chr < 256)
-    return (int) chr;
-
+  text_checking_assert (chr < 256);
   {
     int c1, c2;
     Lisp_Object charset;
@@ -1862,6 +1858,39 @@
 
 #endif /* not UNICODE_INTERNAL */
 
+int
+unicode_internal_handle_bad_ichar_to_unicode (Ichar chr, enum converr fail)
+{
+  ASSERT_VALID_ICHAR (chr);
+  text_checking_assert (fail != CONVERR_USE_PRIVATE);
+  text_checking_assert (!valid_unicode_codepoint_p (chr,
+                                                    UNICODE_OFFICIAL_ONLY));
+  switch (fail)
+    {
+    case CONVERR_FAIL:
+      return -1;
+    case CONVERR_ERROR:
+      if (chr > UNICODE_OFFICIAL_MAX)
+        {
+          args_out_of_range_3 (make_fixnum (chr), Qzero,
+                               make_fixnum (UNICODE_OFFICIAL_MAX));
+        }
+      if (chr >= FIRST_UTF_16_SURROGATE)
+        {
+          args_out_of_range_3 (make_fixnum (chr), Qzero,
+                               make_fixnum (FIRST_UTF_16_SURROGATE));
+        }
+      /* FALLTHROUGH */
+    case CONVERR_ABORT:
+    case CONVERR_USE_PRIVATE:
+    default:
+      ABORT ();
+    case CONVERR_SUCCEED:
+    case CONVERR_SUBSTITUTE:
+      return 0xFFFD;
+    }
+}
+
 #endif /* MULE */
 
 /* Take a possibly invalid Ichar value (must be >= 0) and move upwards as
@@ -5422,11 +5451,11 @@
 to translate.  Currently, this happens only with Unicode-internal (see
 below):
 
-nil or `fail'	Return nil
-`abort'		Signal an error
-`succeed'	Same as `substitute'
-`substitute'	Substitute the Unicode replacement char (0xFFFD)
-`use-private'	Encode using private Unicode space
+`use-private' or `nil'	Encode using private Unicode space
+`fail'                  Return nil
+`error'                 Signal an error
+`succeed'               Same as `substitute'
+`substitute'            Substitute the Unicode replacement char (0xFFFD)
 
 Each octet should be in the range corresponding to the offset and size
 for that dimension, as defined in the charset.  For a typical one-dimensional
@@ -5602,10 +5631,16 @@
 */
        (charset, octet1, octet2, handle_error))
 {
-  enum converr fail = decode_handle_error (handle_error, 1);
+  enum converr fail;
   int a1, a2;
   Ichar ch;
 
+  if (NILP (handle_error))
+    {
+      handle_error = Quse_private;
+    }
+
+  fail = decode_handle_error (handle_error, 1);
   charset = get_external_charset_codepoint (charset, octet1, octet2,
 					    &a1, &a2, 1);
   ch = charset_codepoint_to_ichar (charset, a1, a2, fail);
@@ -5622,7 +5657,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute the Unicode replacement char (0xFFFD)
 `use-private'	Encode using private Unicode space
@@ -5630,9 +5665,18 @@
        (character, handle_error))
 {
   enum converr fail = decode_handle_error (handle_error, 1);
+  INT_32_BIT result;
 
   CHECK_CHAR_COERCE_INT (character);
-  return make_fixnum (ichar_to_unicode (XCHAR (character), fail));
+
+  result = ichar_to_unicode (XCHAR (character), fail);
+
+  if (result < 0)
+    {
+      return Qnil;
+    }
+
+  return make_fixnum (result);
 }
 
 DEFUN ("unicode-to-char", Funicode_to_char, 1, 3, 0, /*
@@ -5684,7 +5728,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute a '?' character
 */
@@ -5727,7 +5771,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute a '?' character
 */
@@ -5761,7 +5805,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute the Unicode replacement char (0xFFFD)
 `use-private'	Encode using private Unicode space
@@ -5799,7 +5843,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute a '?' character
 */
@@ -5849,7 +5893,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute a '?' character
 */
@@ -5928,7 +5972,7 @@
 HANDLE-ERROR controls error behavior:
 
 nil or `fail'	Return nil
-`abort'		Signal an error
+`error'		Signal an error
 `succeed'	Same as `substitute'
 `substitute'	Substitute a '?' character
 */
diff -r 8df05e6e201f -r 19876e056bb7 src/text.h
--- a/src/text.h	Sat Sep 30 16:16:08 2017 +0100
+++ b/src/text.h	Thu Oct 05 00:08:17 2017 +0100
@@ -522,15 +522,17 @@
 #define LAST_TRAILING_BYTE 0xFF
 #endif
 
-#ifndef UNICODE_INTERNAL
-MODULE_API int old_mule_non_ascii_valid_ichar_p (Ichar ch);
+#ifdef UNICODE_INTERNAL
+MODULE_API INT_32_BIT unicode_internal_handle_bad_ichar_to_unicode (Ichar,
+                                                                    enum
+                                                                    converr);
+#elif defined (MULE)
+MODULE_API INT_32_BIT old_mule_non_ascii_valid_ichar_p (Ichar ch);
 #endif
 
-/* Return whether the given Ichar is valid.
- */
-
+/* Return whether the given Ichar is valid. */
 DECLARE_INLINE_HEADER (
-int
+Boolint
 valid_ichar_p (Ichar ch)
 )
 {
@@ -826,12 +828,30 @@
    Return value will be -1 if cannot convert. */
 DECLARE_INLINE_HEADER (
 int
-ichar_to_unicode (Ichar chr, enum converr USED_IF_OLD_MULE (fail))
+ichar_to_unicode (Ichar chr, enum converr USED_IF_MULE (fail))
 )
 {
   ASSERT_VALID_ICHAR (chr);
 
-#if defined (MULE) && !defined (UNICODE_INTERNAL)
+  /* This shortcut depends on the representation of an Ichar, see text.c. */
+  if (chr < 256)
+    {
+      return (int) chr;
+    }
+
+#ifdef UNICODE_INTERNAL
+  if (fail == CONVERR_USE_PRIVATE)
+    {
+      return (int) chr;
+    }
+
+  if (valid_unicode_codepoint_p (chr, UNICODE_OFFICIAL_ONLY))
+    {
+      return (int) chr;
+    }
+
+  return unicode_internal_handle_bad_ichar_to_unicode (chr, fail);
+#elif defined (MULE)
   return old_mule_ichar_to_unicode (chr, fail);
 #else
   /* Unicode-internal or non-Mule */
-- 
‘As I sat looking up at the Guinness ad, I could never figure out /
How your man stayed up on the surfboard after forty pints of stout’
(C. Moore)