changeset: 4583:2669b1b7e33b70dad0f42a94629ab3afeffa9b55
tag: tip
user: Aidan Kehoe <kehoea(a)parhasard.net>
date: Sat Jan 31 13:06:37 2009 +0000
files: src/ChangeLog src/unicode.c tests/ChangeLog tests/automated/mule-tests.el
description:
Correct little-endian UTF-16 surrogate handling.
src/ChangeLog addition:
2009-01-31 Aidan Kehoe <kehoea(a)parhasard.net>
* unicode.c (unicode_convert):
Correct little-endian UTF-16 surrogate handling.
tests/ChangeLog addition:
2009-01-31 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/mule-tests.el:
Test little-endian Unicode surrogates too.
diff -r 00ed9903a988de9a983c074bf0abdc667639eeaf -r
2669b1b7e33b70dad0f42a94629ab3afeffa9b55 src/ChangeLog
--- a/src/ChangeLog Sun Jan 18 12:56:51 2009 +0000
+++ b/src/ChangeLog Sat Jan 31 13:06:37 2009 +0000
@@ -1,3 +1,8 @@ 2009-01-16 Aidan Kehoe <kehoea@parhasa
+2009-01-31 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * unicode.c (unicode_convert):
+ Correct little-endian UTF-16 surrogate handling.
+
2009-01-16 Aidan Kehoe <kehoea(a)parhasard.net>
* chartab.c (print_table_entry):
diff -r 00ed9903a988de9a983c074bf0abdc667639eeaf -r
2669b1b7e33b70dad0f42a94629ab3afeffa9b55 src/unicode.c
--- a/src/unicode.c Sun Jan 18 12:56:51 2009 +0000
+++ b/src/unicode.c Sat Jan 31 13:06:37 2009 +0000
@@ -2115,23 +2115,47 @@ unicode_convert (struct coding_stream *s
{
int tempch;
- if (!valid_utf_16_last_surrogate(ch & 0xFFFF))
- {
- DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
- ignore_bom);
- DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
- ignore_bom);
- DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
- ignore_bom);
- DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
- ignore_bom);
- }
- else
+ if (little_endian)
{
- tempch = utf_16_surrogates_to_code((ch >> 16),
- (ch & 0xffff));
- decode_unicode_char(tempch, dst, data, ignore_bom);
+ if (!valid_utf_16_last_surrogate(ch >> 16))
+ {
+ DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+ ignore_bom);
+ }
+ else
+ {
+ tempch = utf_16_surrogates_to_code((ch & 0xffff),
+ (ch >> 16));
+ decode_unicode_char(tempch, dst, data, ignore_bom);
+ }
}
+ else
+ {
+ if (!valid_utf_16_last_surrogate(ch & 0xFFFF))
+ {
+ DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+ ignore_bom);
+ }
+ else
+ {
+ tempch = utf_16_surrogates_to_code((ch >> 16),
+ (ch & 0xffff));
+ decode_unicode_char(tempch, dst, data, ignore_bom);
+ }
+ }
+
ch = 0;
counter = 0;
}
diff -r 00ed9903a988de9a983c074bf0abdc667639eeaf -r
2669b1b7e33b70dad0f42a94629ab3afeffa9b55 tests/ChangeLog
--- a/tests/ChangeLog Sun Jan 18 12:56:51 2009 +0000
+++ b/tests/ChangeLog Sat Jan 31 13:06:37 2009 +0000
@@ -1,3 +1,8 @@ 2009-01-18 Aidan Kehoe <kehoea@parhasa
+2009-01-31 Aidan Kehoe <kehoea(a)parhasard.net>
+
+ * automated/mule-tests.el:
+ Test little-endian Unicode surrogates too.
+
2009-01-18 Aidan Kehoe <kehoea(a)parhasard.net>
* automated/lisp-tests.el: (char-table-with-string):
diff -r 00ed9903a988de9a983c074bf0abdc667639eeaf -r
2669b1b7e33b70dad0f42a94629ab3afeffa9b55 tests/automated/mule-tests.el
--- a/tests/automated/mule-tests.el Sun Jan 18 12:56:51 2009 +0000
+++ b/tests/automated/mule-tests.el Sat Jan 31 13:06:37 2009 +0000
@@ -446,12 +446,17 @@ This is a naive implementation in Lisp.
(encode-coding-string xemacs-character 'ctext))))))
(loop
- for (code-point encoded)
- in '((#x10000 "\xd8\x00\xdc\x00")
- (#x10FFFD "\xdb\xff\xdf\xfd"))
- do (Assert (equal (encode-coding-string
- (decode-char 'ucs code-point) 'utf-16)
- encoded)))
+ for (code-point utf-16-big-endian utf-16-little-endian)
+ in '((#x10000 "\xd8\x00\xdc\x00" "\x00\xd8\x00\xdc")
+ (#x10FFFD "\xdb\xff\xdf\xfd" "\xff\xdb\xfd\xdf"))
+ do
+ (Assert (equal (encode-coding-string
+ (decode-char 'ucs code-point) 'utf-16)
+ utf-16-big-endian))
+ (Assert (equal (encode-coding-string
+ (decode-char 'ucs code-point) 'utf-16-le)
+ utf-16-little-endian))
+
;;---------------------------------------------------------------
;; Regression test for a couple of CCL-related bugs.
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches