[PATCH] Handle UTF-8 more robustly; pass through information about incorrect sequences
17 years, 5 months
Aidan Kehoe
I’m running with this code, and it--combined with the following Lisp--makes
IRCing on a channel where most of the traffic is in UTF-8, but some people
occasionally send ISO 8859-1 much more comfortable.
I’m not certain about the sanity of the UTF-16 part of this, mind. More work
to be done.
(loop for i from #x00 to #xFF
with glyph = nil
with char-table = (specifier-instance current-display-table)
do (setq glyph (make-glyph (vector 'string :data (string i))))
(set-glyph-face glyph 'red)
(put-char-table (decode-char 'ucs (+ #x200000 i)) glyph char-table))
src/ChangeLog addition:
2007-07-21 Aidan Kehoe <kehoea(a)parhasard.net>
* charset.h:
* mule-coding.c:
* mule-coding.c (dynarr_add_2022_one_dimension):
* mule-coding.c (dynarr_add_2022_two_dimensions):
* mule-coding.c (struct iso2022_coding_stream):
* mule-coding.c (decode_unicode_char):
* mule-coding.c (indicate_invalid_utf_8):
* mule-coding.c (iso2022_decode):
* unicode.c:
* unicode.c (struct unicode_coding_stream):
* unicode.c (decode_unicode_char):
* unicode.c (DECODE_ERROR_OCTET):
* unicode.c (indicate_invalid_utf_8):
* unicode.c (encode_unicode_char_1):
* unicode.c (encode_unicode_char):
* unicode.c (unicode_convert):
Make UTF-8 handling more robust; indicate error sequences when
decoding, passing the octets as distinct from the corresponding
ISO8859-1 characters, and (by default) writing them to disk on
encoding. Don't accept UTF-8 sequences longer than four octets on
reading in the utf-8 coding system; do accept them in the ISO IR 196
handling, since we decode Unicode error sequences to "Unicode" code
points starting at 0x200000, and will need to save them as such
in escape-quoted.
This change means that when a non-UTF-8 file is opened as UTF-8,
one change made, and immediately saved, the non-ASCII characters
are not corrupted. In Europe, this is a distinct win.
Don't error on invalid UTF-16 sequences; pass them through, using
the same error octets.
XEmacs Trunk source patch:
Diff command: cvs -q diff -Nu
Files affected: src/unicode.c
===================================================================
RCS src/mule-coding.c
===================================================================
RCS src/charset.h
===================================================================
RCS
Index: src/charset.h
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/charset.h,v
retrieving revision 1.16
diff -u -u -r1.16 charset.h
--- src/charset.h 2006/11/12 13:40:07 1.16
+++ src/charset.h 2007/07/21 15:03:21
@@ -572,7 +572,10 @@
void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h,
int USED_IF_MULE (l), unsigned_char_dynarr *dst,
- enum unicode_type type, unsigned int little_endian);
+ enum unicode_type type, unsigned int little_endian,
+ int write_error_characters_as_such);
+
+#define UNICODE_ERROR_OCTET_RANGE_START 0x200000
void set_charset_registries(Lisp_Object charset, Lisp_Object registries);
Index: src/mule-coding.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/mule-coding.c,v
retrieving revision 1.39
diff -u -u -r1.39 mule-coding.c
--- src/mule-coding.c 2006/11/23 13:43:19 1.39
+++ src/mule-coding.c 2007/07/21 15:04:59
@@ -104,7 +104,7 @@
if (XCHARSET_ENCODE_AS_UTF_8 (charset))
{
encode_unicode_char (charset, c & charmask, 0,
- dst, UNICODE_UTF_8, 0);
+ dst, UNICODE_UTF_8, 0, 0);
}
else
{
@@ -123,7 +123,7 @@
encode_unicode_char (charset,
ch & charmask,
c & charmask, dst,
- UNICODE_UTF_8, 0);
+ UNICODE_UTF_8, 0, 0);
}
else
{
@@ -969,6 +969,7 @@
/* Used for handling UTF-8. */
unsigned char counter;
+ unsigned char indicated_length;
};
static const struct memory_description ccs_description_1[] =
@@ -1804,6 +1805,39 @@
}
}
+/* Note that this name conflicts with a function in unicode.c. */
+static void
+decode_unicode_char (int ucs, unsigned_char_dynarr *dst)
+{
+ Ibyte work[MAX_ICHAR_LEN];
+ int len;
+ Lisp_Object chr;
+
+ chr = Funicode_to_char(make_int(ucs), Qnil);
+ assert (!NILP(chr));
+ len = set_itext_ichar (work, XCHAR(chr));
+ Dynarr_add_many (dst, work, len);
+}
+
+#define DECODE_ERROR_OCTET(octet, dst) \
+ decode_unicode_char ((octet) + UNICODE_ERROR_OCTET_RANGE_START, dst)
+
+static inline void
+indicate_invalid_utf_8 (unsigned char indicated_length,
+ unsigned char counter,
+ int ch, unsigned_char_dynarr *dst)
+{
+ Binbyte stored = indicated_length - counter;
+ Binbyte mask = "\x00\x00\xC0\xE0\xF0\xF8\xFC"[indicated_length];
+
+ while (stored > 0)
+ {
+ DECODE_ERROR_OCTET (((ch >> (6 * (stored - 1))) & 0x3f) | mask,
+ dst);
+ mask = 0x80, stored--;
+ }
+}
+
/* Convert ISO2022-format data to internal format. */
static Bytecount
@@ -1907,9 +1941,7 @@
else if (flags & ISO_STATE_UTF_8)
{
unsigned char counter = data->counter;
- Ibyte work[MAX_ICHAR_LEN];
- int len;
- Lisp_Object chr;
+ unsigned char indicated_length = data->indicated_length;
if (ISO_CODE_ESC == c)
{
@@ -1919,74 +1951,123 @@
data->esc_bytes_index = 1;
continue;
}
-
- switch (counter)
- {
- case 0:
- if (c >= 0xfc)
- {
- ch = c & 0x01;
- counter = 5;
- }
- else if (c >= 0xf8)
- {
- ch = c & 0x03;
- counter = 4;
- }
- else if (c >= 0xf0)
- {
- ch = c & 0x07;
- counter = 3;
- }
- else if (c >= 0xe0)
- {
- ch = c & 0x0f;
- counter = 2;
- }
- else if (c >= 0xc0)
- {
- ch = c & 0x1f;
- counter = 1;
- }
- else
- /* ASCII, or the lower control characters.
-
- Perhaps we should signal an error if the character is in
- the range 0x80-0xc0; this is illegal UTF-8. */
- Dynarr_add (dst, (c & 0x7f));
-
- break;
- case 1:
- ch = (ch << 6) | (c & 0x3f);
- chr = Funicode_to_char(make_int(ch), Qnil);
-
- if (!NILP (chr))
- {
- assert(CHARP(chr));
- len = set_itext_ichar (work, XCHAR(chr));
- Dynarr_add_many (dst, work, len);
- }
- else
- {
- /* Shouldn't happen, this code should only be enabled in
- XEmacsen with support for all of Unicode. */
- Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
- Dynarr_add (dst, 34 + 128);
- Dynarr_add (dst, 46 + 128);
- }
-
- ch = 0;
- counter = 0;
- break;
- default:
- ch = (ch << 6) | (c & 0x3f);
- counter--;
- }
- if (str->eof)
- DECODE_OUTPUT_PARTIAL_CHAR (ch, dst);
+ if (0 == counter)
+ {
+ if (0 == (c & 0x80))
+ {
+ /* ASCII. */
+ decode_unicode_char (c, dst);
+ }
+ else if (0 == (c & 0x40))
+ {
+ /* Highest bit set, second highest not--there's
+ something wrong. */
+ DECODE_ERROR_OCTET (c, dst);
+ }
+ else if (0 == (c & 0x20))
+ {
+ ch = c & 0x1f;
+ counter = 1;
+ indicated_length = 2;
+ }
+ else if (0 == (c & 0x10))
+ {
+ ch = c & 0x0f;
+ counter = 2;
+ indicated_length = 3;
+ }
+ else if (0 == (c & 0x08))
+ {
+ ch = c & 0x0f;
+ counter = 3;
+ indicated_length = 4;
+ }
+ /* We support lengths longer than 4 here, since we want to
+ represent UTF-8 error chars as distinct from the
+ corresponding ISO 8859-1 characters in escape-quoted.
+
+ However, we can't differentiate UTF-8 error chars as
+ written to disk, and UTF-8 errors in escape-quoted. This
+ is not a big problem;
+ non-Unicode-chars-encoded-as-UTF-8-in-ISO-2022 is not
+ deployed, in practice, so if such a sequence of octets
+ occurs, XEmacs generated it. */
+ else if (0 == (c & 0x04))
+ {
+ ch = c & 0x03;
+ counter = 4;
+ indicated_length = 5;
+ }
+ else if (0 == (c & 0x02))
+ {
+ ch = c & 0x01;
+ counter = 5;
+ indicated_length = 6;
+ }
+ else
+ {
+ /* #xFF is not a valid leading byte in any form of
+ UTF-8. */
+ DECODE_ERROR_OCTET (c, dst);
+
+ }
+ }
+ else
+ {
+ /* counter != 0 */
+ if ((0 == (c & 0x80)) || (0 != (c & 0x40)))
+ {
+ indicate_invalid_utf_8(indicated_length,
+ counter,
+ ch, dst);
+ if (c & 0x80)
+ {
+ DECODE_ERROR_OCTET (c, dst);
+ }
+ else
+ {
+ /* The character just read is ASCII. Treat it as
+ such. */
+ decode_unicode_char (c, dst);
+ }
+ ch = 0;
+ counter = 0;
+ }
+ else
+ {
+ ch = (ch << 6) | (c & 0x3f);
+ counter--;
+ /* Just processed the final byte. Emit the character,
+ avoiding over-long sequences. */
+ if (!counter)
+ {
+ if ((ch < 0x80) ||
+ ((ch < 0x800) && indicated_length > 2) ||
+ ((ch < 0x1000) && indicated_length > 3) ||
+ ((ch < 0x10000) && indicated_length > 4))
+ {
+ indicate_invalid_utf_8(indicated_length,
+ counter,
+ ch, dst);
+ }
+ else
+ {
+ decode_unicode_char (ch, dst);
+ }
+ ch = 0;
+ }
+ }
+ }
+
+ if (str->eof && ch)
+ {
+ DECODE_ERROR_OCTET (ch, dst);
+ ch = 0;
+ }
data->counter = counter;
+ data->indicated_length = indicated_length;
}
else if (byte_c0_p (c) || byte_c1_p (c))
{ /* Control characters */
Index: src/unicode.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/unicode.c,v
retrieving revision 1.37
diff -u -u -r1.37 unicode.c
--- src/unicode.c 2007/05/13 11:11:30 1.37
+++ src/unicode.c 2007/07/21 15:05:45
@@ -146,13 +146,6 @@
(1) User-defined charsets: It would be inconvenient to require all
dumped user-defined charsets to be reloaded at init time.
- (2) Starting up in a non-ISO-8859-1 directory. If we load at run-time,
- we don't load the tables until after we've parsed the current
- directories, and we run into a real bootstrapping problem, if the
- directories themselves are non-ISO-8859-1. This is potentially fixable
- once we switch to using Unicode internally, so we don't have to do any
- conversion (other than the automatic kind, e.g. UTF-16 to UTF-8).
-
NB With run-time loading, we load in init-mule-at-startup, in
mule-cmds.el. This is called from startup.el, which is quite late in
the initialization process -- but data-directory isn't set until then.
@@ -1703,6 +1696,7 @@
{
/* decode */
unsigned char counter;
+ unsigned char indicated_length;
int seen_char;
/* encode */
Lisp_Object current_charset;
@@ -1716,11 +1710,6 @@
DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (unicode);
-/* Decode a UCS-2 or UCS-4 character into a buffer. If the lookup fails, use
- <GETA MARK> (U+3013) of JIS X 0208, which means correct character
- is not found, instead.
- #### do something more appropriate (use blob?)
- Danger, Will Robinson! Data loss. Should we signal user? */
static void
decode_unicode_char (int ch, unsigned_char_dynarr *dst,
struct unicode_coding_stream *data,
@@ -1755,9 +1744,32 @@
data->seen_char = 1;
}
+#define DECODE_ERROR_OCTET(octet, dst, data, ignore_bom) \
+ decode_unicode_char ((octet) + UNICODE_ERROR_OCTET_RANGE_START, \
+ dst, data, ignore_bom)
+
+static inline void
+indicate_invalid_utf_8 (unsigned char indicated_length,
+ unsigned char counter,
+ int ch, unsigned_char_dynarr *dst,
+ struct unicode_coding_stream *data,
+ unsigned int ignore_bom)
+{
+ Binbyte stored = indicated_length - counter;
+ Binbyte mask = "\x00\x00\xC0\xE0\xF0\xF8\xFC"[indicated_length];
+
+ while (stored > 0)
+ {
+ DECODE_ERROR_OCTET (((ch >> (6 * (stored - 1))) & 0x3f) | mask,
+ dst, data, ignore_bom);
+ mask = 0x80, stored--;
+ }
+}
+
static void
encode_unicode_char_1 (int code, unsigned_char_dynarr *dst,
- enum unicode_type type, unsigned int little_endian)
+ enum unicode_type type, unsigned int little_endian,
+ int write_error_characters_as_such)
{
switch (type)
{
@@ -1768,16 +1780,25 @@
Dynarr_add (dst, (unsigned char) (code & 255));
Dynarr_add (dst, (unsigned char) ((code >> 8) & 255));
} else {
- /* Little endian; least significant byte first. */
- int first, second;
-
- CODE_TO_UTF_16_SURROGATES(code, first, second);
-
- Dynarr_add (dst, (unsigned char) (first & 255));
- Dynarr_add (dst, (unsigned char) ((first >> 8) & 255));
-
- Dynarr_add (dst, (unsigned char) (second & 255));
- Dynarr_add (dst, (unsigned char) ((second >> 8) & 255));
+ if (write_error_characters_as_such &&
+ code >= UNICODE_ERROR_OCTET_RANGE_START &&
+ code < (UNICODE_ERROR_OCTET_RANGE_START + 0x100))
+ {
+ Dynarr_add (dst, (unsigned char) ((code & 0xFF)));
+ }
+ else
+ {
+ /* Little endian; least significant byte first. */
+ int first, second;
+
+ CODE_TO_UTF_16_SURROGATES(code, first, second);
+
+ Dynarr_add (dst, (unsigned char) (first & 255));
+ Dynarr_add (dst, (unsigned char) ((first >> 8) & 255));
+
+ Dynarr_add (dst, (unsigned char) (second & 255));
+ Dynarr_add (dst, (unsigned char) ((second >> 8) & 255));
+ }
}
}
else
@@ -1786,16 +1807,25 @@
Dynarr_add (dst, (unsigned char) ((code >> 8) & 255));
Dynarr_add (dst, (unsigned char) (code & 255));
} else {
- /* Big endian; most significant byte first. */
- int first, second;
-
- CODE_TO_UTF_16_SURROGATES(code, first, second);
-
- Dynarr_add (dst, (unsigned char) ((first >> 8) & 255));
- Dynarr_add (dst, (unsigned char) (first & 255));
-
- Dynarr_add (dst, (unsigned char) ((second >> 8) & 255));
- Dynarr_add (dst, (unsigned char) (second & 255));
+ if (write_error_characters_as_such &&
+ code >= UNICODE_ERROR_OCTET_RANGE_START &&
+ code < (UNICODE_ERROR_OCTET_RANGE_START + 0x100))
+ {
+ Dynarr_add (dst, (unsigned char) ((code & 0xFF)));
+ }
+ else
+ {
+ /* Big endian; most significant byte first. */
+ int first, second;
+
+ CODE_TO_UTF_16_SURROGATES(code, first, second);
+
+ Dynarr_add (dst, (unsigned char) ((first >> 8) & 255));
+ Dynarr_add (dst, (unsigned char) (first & 255));
+
+ Dynarr_add (dst, (unsigned char) ((second >> 8) & 255));
+ Dynarr_add (dst, (unsigned char) (second & 255));
+ }
}
}
break;
@@ -1803,17 +1833,35 @@
case UNICODE_UCS_4:
if (little_endian)
{
- Dynarr_add (dst, (unsigned char) (code & 255));
- Dynarr_add (dst, (unsigned char) ((code >> 8) & 255));
- Dynarr_add (dst, (unsigned char) ((code >> 16) & 255));
- Dynarr_add (dst, (unsigned char) (code >> 24));
+ if (write_error_characters_as_such &&
+ code >= UNICODE_ERROR_OCTET_RANGE_START &&
+ code < (UNICODE_ERROR_OCTET_RANGE_START + 0x100))
+ {
+ Dynarr_add (dst, (unsigned char) ((code & 0xFF)));
+ }
+ else
+ {
+ Dynarr_add (dst, (unsigned char) (code & 255));
+ Dynarr_add (dst, (unsigned char) ((code >> 8) & 255));
+ Dynarr_add (dst, (unsigned char) ((code >> 16) & 255));
+ Dynarr_add (dst, (unsigned char) (code >> 24));
+ }
}
else
{
- Dynarr_add (dst, (unsigned char) (code >> 24));
- Dynarr_add (dst, (unsigned char) ((code >> 16) & 255));
- Dynarr_add (dst, (unsigned char) ((code >> 8) & 255));
- Dynarr_add (dst, (unsigned char) (code & 255));
+ if (write_error_characters_as_such &&
+ code >= UNICODE_ERROR_OCTET_RANGE_START &&
+ code < (UNICODE_ERROR_OCTET_RANGE_START + 0x100))
+ {
+ Dynarr_add (dst, (unsigned char) ((code & 0xFF)));
+ }
+ else
+ {
+ Dynarr_add (dst, (unsigned char) (code >> 24));
+ Dynarr_add (dst, (unsigned char) ((code >> 16) & 255));
+ Dynarr_add (dst, (unsigned char) ((code >> 8) & 255));
+ Dynarr_add (dst, (unsigned char) (code & 255));
+ }
}
break;
@@ -1842,11 +1890,25 @@
}
else if (code <= 0x3ffffff)
{
- Dynarr_add (dst, (unsigned char) ((code >> 24) | 0xf8));
- Dynarr_add (dst, (unsigned char) (((code >> 18) & 0x3f) | 0x80));
- Dynarr_add (dst, (unsigned char) (((code >> 12) & 0x3f) | 0x80));
- Dynarr_add (dst, (unsigned char) (((code >> 6) & 0x3f) | 0x80));
- Dynarr_add (dst, (unsigned char) ((code & 0x3f) | 0x80));
+
+#if !(UNICODE_ERROR_OCTET_RANGE_START > 0x1fffff \
+ && UNICODE_ERROR_OCTET_RANGE_START < 0x3ffffff)
+#error "This code needs to be rewritten. "
+#endif
+ if (write_error_characters_as_such &&
+ code >= UNICODE_ERROR_OCTET_RANGE_START &&
+ code < (UNICODE_ERROR_OCTET_RANGE_START + 0x100))
+ {
+ Dynarr_add (dst, (unsigned char) ((code & 0xFF)));
+ }
+ else
+ {
+ Dynarr_add (dst, (unsigned char) ((code >> 24) | 0xf8));
+ Dynarr_add (dst, (unsigned char) (((code >> 18) & 0x3f) | 0x80));
+ Dynarr_add (dst, (unsigned char) (((code >> 12) & 0x3f) | 0x80));
+ Dynarr_add (dst, (unsigned char) (((code >> 6) & 0x3f) | 0x80));
+ Dynarr_add (dst, (unsigned char) ((code & 0x3f) | 0x80));
+ }
}
else
{
@@ -1870,7 +1932,8 @@
void
encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h,
int USED_IF_MULE (l), unsigned_char_dynarr *dst,
- enum unicode_type type, unsigned int little_endian)
+ enum unicode_type type, unsigned int little_endian,
+ int write_error_characters_as_such)
{
#ifdef MULE
int code = ichar_to_unicode (make_ichar (charset, h & 127, l & 127));
@@ -1896,7 +1959,8 @@
int code = h;
#endif /* MULE */
- encode_unicode_char_1 (code, dst, type, little_endian);
+ encode_unicode_char_1 (code, dst, type, little_endian,
+ write_error_characters_as_such);
}
static Bytecount
@@ -1915,6 +1979,8 @@
if (str->direction == CODING_DECODE)
{
unsigned char counter = data->counter;
+ unsigned char indicated_length
+ = data->indicated_length;
while (n--)
{
@@ -1923,46 +1989,91 @@
switch (type)
{
case UNICODE_UTF_8:
- switch (counter)
- {
- case 0:
- if (c >= 0xfc)
- {
- ch = c & 0x01;
- counter = 5;
- }
- else if (c >= 0xf8)
- {
- ch = c & 0x03;
- counter = 4;
- }
- else if (c >= 0xf0)
- {
- ch = c & 0x07;
- counter = 3;
- }
- else if (c >= 0xe0)
- {
- ch = c & 0x0f;
- counter = 2;
- }
- else if (c >= 0xc0)
- {
- ch = c & 0x1f;
- counter = 1;
- }
- else
- decode_unicode_char (c, dst, data, ignore_bom);
- break;
- case 1:
- ch = (ch << 6) | (c & 0x3f);
- decode_unicode_char (ch, dst, data, ignore_bom);
- ch = 0;
- counter = 0;
- break;
- default:
- ch = (ch << 6) | (c & 0x3f);
- counter--;
+ if (0 == counter)
+ {
+ if (0 == (c & 0x80))
+ {
+ /* ASCII. */
+ decode_unicode_char (c, dst, data, ignore_bom);
+ }
+ else if (0 == (c & 0x40))
+ {
+ /* Highest bit set, second highest not--there's
+ something wrong. */
+ DECODE_ERROR_OCTET (c, dst, data, ignore_bom);
+ }
+ else if (0 == (c & 0x20))
+ {
+ ch = c & 0x1f;
+ counter = 1;
+ indicated_length = 2;
+ }
+ else if (0 == (c & 0x10))
+ {
+ ch = c & 0x0f;
+ counter = 2;
+ indicated_length = 3;
+ }
+ else if (0 == (c & 0x08))
+ {
+ ch = c & 0x0f;
+ counter = 3;
+ indicated_length = 4;
+ }
+ else
+ {
+ /* We don't supports lengths longer than 4 in
+ external-format data. */
+ DECODE_ERROR_OCTET (c, dst, data, ignore_bom);
+
+ }
+ }
+ else
+ {
+ /* counter != 0 */
+ if ((0 == (c & 0x80)) || (0 != (c & 0x40)))
+ {
+ indicate_invalid_utf_8(indicated_length,
+ counter,
+ ch, dst, data, ignore_bom);
+ if (c & 0x80)
+ {
+ DECODE_ERROR_OCTET (c, dst, data, ignore_bom);
+ }
+ else
+ {
+ /* The character just read is ASCII. Treat it as
+ such. */
+ decode_unicode_char (c, dst, data, ignore_bom);
+ }
+ ch = 0;
+ counter = 0;
+ }
+ else
+ {
+ ch = (ch << 6) | (c & 0x3f);
+ counter--;
+ /* Just processed the final byte. Emit the character,
+ avoiding over-long sequences. */
+ if (!counter)
+ {
+ if ((ch < 0x80) ||
+ ((ch < 0x800) && indicated_length > 2) ||
+ ((ch < 0x1000) && indicated_length > 3) ||
+ ((ch < 0x10000) && indicated_length > 4))
+ {
+ indicate_invalid_utf_8(indicated_length,
+ counter,
+ ch, dst, data,
+ ignore_bom);
+ }
+ else
+ {
+ decode_unicode_char (ch, dst, data, ignore_bom);
+ }
+ ch = 0;
+ }
+ }
}
break;
@@ -1987,20 +2098,27 @@
if (counter == 32)
{
int tempch;
- /* #### Signalling an error may be a bit extreme. Should
- we try and read it in anyway? */
+
if (!valid_utf_16_first_surrogate(ch >> 16)
|| !valid_utf_16_last_surrogate(ch & 0xFFFF))
{
- signal_error(Qtext_conversion_error,
- "Invalid UTF-16 surrogate sequence",
- Qunbound);
+ DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+ ignore_bom);
}
- tempch = utf_16_surrogates_to_code((ch >> 16),
- (ch & 0xffff));
+ else
+ {
+ tempch = utf_16_surrogates_to_code((ch >> 16),
+ (ch & 0xffff));
+ decode_unicode_char(tempch, dst, data, ignore_bom);
+ }
ch = 0;
counter = 0;
- decode_unicode_char(tempch, dst, data, ignore_bom);
}
break;
@@ -2012,15 +2130,37 @@
counter += 8;
if (counter == 32)
{
- int tempch = ch;
- ch = 0;
- counter = 0;
- if (tempch < 0)
+ if (ch < 0)
{
- /* !!#### indicate an error */
- tempch = '~';
+ if (little_endian)
+ {
+ DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+ ignore_bom);
+ }
+ else
+ {
+ DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+ ignore_bom);
+ DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+ ignore_bom);
+ }
}
- decode_unicode_char (tempch, dst, data, ignore_bom);
+ else
+ {
+ decode_unicode_char (ch, dst, data, ignore_bom);
+ }
+ ch = 0;
+ counter = 0;
}
break;
@@ -2032,10 +2172,14 @@
}
}
- if (str->eof)
- DECODE_OUTPUT_PARTIAL_CHAR (ch, dst);
+ if (str->eof && ch)
+ {
+ DECODE_ERROR_OCTET (ch, dst, data, ignore_bom);
+ ch = 0;
+ }
data->counter = counter;
+ data->indicated_length = indicated_length;
}
else
{
@@ -2054,7 +2198,7 @@
if (XCODING_SYSTEM_UNICODE_NEED_BOM (str->codesys) && !data->wrote_bom)
{
- encode_unicode_char_1 (0xFEFF, dst, type, little_endian);
+ encode_unicode_char_1 (0xFEFF, dst, type, little_endian, 1);
data->wrote_bom = 1;
}
@@ -2068,7 +2212,7 @@
{ /* Processing ASCII character */
ch = 0;
encode_unicode_char (Vcharset_ascii, c, 0, dst, type,
- little_endian);
+ little_endian, 1);
char_boundary = 1;
}
@@ -2092,20 +2236,20 @@
for the rationale behind subtracting #xa0 from the
character's code. */
encode_unicode_char (Vcharset_control_1, c - 0xa0, 0, dst,
- type, little_endian);
+ type, little_endian, 1);
else
{
switch (XCHARSET_REP_BYTES (charset))
{
case 2:
encode_unicode_char (charset, c, 0, dst, type,
- little_endian);
+ little_endian, 1);
break;
case 3:
if (XCHARSET_PRIVATE_P (charset))
{
encode_unicode_char (charset, c, 0, dst, type,
- little_endian);
+ little_endian, 1);
ch = 0;
}
else if (ch)
@@ -2119,7 +2263,7 @@
handle this yet. */
encode_unicode_char (Vcharset_ascii, '~', 0,
dst, type,
- little_endian);
+ little_endian, 1);
}
else
{
@@ -2138,7 +2282,7 @@
else
#endif /* ENABLE_COMPOSITE_CHARS */
encode_unicode_char (charset, ch, c, dst, type,
- little_endian);
+ little_endian, 1);
ch = 0;
}
else
@@ -2151,7 +2295,7 @@
if (ch)
{
encode_unicode_char (charset, ch, c, dst, type,
- little_endian);
+ little_endian, 1);
ch = 0;
}
else
--
On the quay of the little Black Sea port, where the rescued pair came once
more into contact with civilization, Dobrinton was bitten by a dog which was
assumed to be mad, though it may only have been indiscriminating. (Saki)
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches
[PATCH] Move the display table code to use char tables, not vectors
17 years, 5 months
Aidan Kehoe
lisp/ChangeLog addition:
2007-07-21 Aidan Kehoe <kehoea(a)parhasard.net>
* mule/cyril-util.el:
* mule/cyril-util.el (cyrillic-encode-koi8-r-char): Removed.
* mule/cyril-util.el (cyrillic-encode-alternativnyj-char):
Removed. No-one uses these functions in google.com/codesearch,
GNU have a comment doubting their utility, and their
implementation is trivial.
* mule/cyril-util.el (cyrillic-language-alist):
Reformatted.
* mule/cyril-util.el (standard-display-table)): Removed. It wasn't
used anyway.
* mule/cyril-util.el (standard-display-cyrillic-translit):
Rewrite it to work with character tables as display tables, and
not to abort with an error.
2007-07-21 Aidan Kehoe <kehoea(a)parhasard.net>
* disp-table.el:
* disp-table.el (make-display-table): Moved earlier in the file in
a weak attempt at making syncing with GNU easier.
* disp-table.el (frob-display-table):
Autoload it, accept TAG-SET, for editing specifiers.
* disp-table.el (describe-display-table):
Have it handle character sets.
* disp-table.el (standard-display-8bit-1):
* disp-table.el (standard-display-8bit):
* disp-table.el (standard-display-default-1):
* disp-table.el (standard-display-ascii):
* disp-table.el (standard-display-g1):
* disp-table.el (standard-display-graphic):
* disp-table.el (standard-display-underline):
* disp-table.el (standard-display-european):
Rework them all to use put-char-table, remove-char-table instead
of aset. Limit standard-display-g1, standard-display-graphic to
TTYs; have standard-display-underline work on X11 too.
* font.el (font-caps-display-table):
Use put-char-table instead of aset when editing a display table.
* x-init.el:
* x-init.el (tab):
Create the initial display table as a char-table, not a vector.
XEmacs Trunk source patch:
Diff command: cvs -q diff -Nu
Files affected: lisp/mule/cyril-util.el
===================================================================
RCS lisp/x-init.el
===================================================================
RCS lisp/font.el
===================================================================
RCS lisp/disp-table.el
===================================================================
RCS
Index: lisp/disp-table.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/disp-table.el,v
retrieving revision 1.2
diff -u -u -r1.2 disp-table.el
--- lisp/disp-table.el 1997/12/06 22:26:09 1.2
+++ lisp/disp-table.el 2007/07/21 13:21:11
@@ -28,56 +28,45 @@
;;; Commentary:
-;; #### Need lots of work. make-display-table depends on a value
-;; that is a define in the C code. Maybe we should just move the
-;; function into C.
-
-;; #### display-tables-as-vectors is really evil and a big pain in
-;; the ass.
-
;; Rewritten for XEmacs July 1995, Ben Wing.
;;; Code:
+;;;###autoload
+(defun make-display-table ()
+ "Return a new, empty display table."
+ (make-char-table 'generic))
+
(defun describe-display-table (dt)
"Describe the display table DT in a help buffer."
(with-displaying-help-buffer
(lambda ()
- (princ "\nCharacter display glyph sequences:\n")
- (save-excursion
- (let ((vector (make-vector 256 nil))
- (i 0))
- (while (< i 256)
- (aset vector i (aref dt i))
- (incf i))
- ;; FSF calls `describe-vector' here, but it is so incredibly
- ;; lame a function for that name that I cannot bring myself
- ;; to porting it. Here is what `describe-vector' does:
- (terpri)
- (let ((old (aref vector 0))
- (oldpos 0)
- (i 1)
- str)
- (while (<= i 256)
- (when (or (= i 256)
- (not (equal old (aref vector i))))
- (if (eq oldpos (1- i))
- (princ (format "%s\t\t%s\n"
- (single-key-description (int-char oldpos))
- old))
- (setq str (format "%s - %s"
- (single-key-description (int-char oldpos))
- (single-key-description (int-char (1- i)))))
- (princ str)
- (princ (make-string (max (- 2 (/ (length str)
- tab-width)) 1) ?\t))
- (princ old)
- (terpri))
- (or (= i 256)
- (setq old (aref vector i)
- oldpos i)))
- (incf i))))))))
+ (princ "\nFor various glyphs that GNU Emacs uses the display table for,
+see the XEmacs specifiers `truncation-glyph' , `continuation-glyph',
+`control-arrow-glyph', `octal-escape-glyph' and the others described in the
+docstring of `make-glyph'. \n\n")
+ (map-char-table
+ (lambda (range value)
+ (cond
+ ((eq range t)
+ (princ "\nAll characters: \n")
+ (princ (format " %S" value)))
+ ((eq 'charset (and (symbolp range) (type-of (find-charset range))))
+ (princ (format "\n\nCharset %S: \n" (charset-name range)))
+ (princ (format " %S" value)))
+ ((vectorp range)
+ (princ (format "\n\nCharset %S, row %d \n"
+ (charset-name (aref value 0))
+ (aref value 1)))
+ (princ (format " %S\n\n" value)))
+ ((characterp range)
+ (princ (format "\nCharacter U+%04X, %S: "
+ range (if (fboundp 'split-char)
+ (split-char range)
+ (list 'ascii (char-to-int range)))))
+ (princ (format " %S" value))))
+ nil) dt))))
;;;###autoload
(defun describe-current-display-table (&optional domain)
@@ -89,21 +78,17 @@
(describe-display-table disptab)
(message "No display table"))))
-;;;###autoload
-(defun make-display-table ()
- "Return a new, empty display table."
- (make-vector 256 nil))
-
;; #### we need a generic frob-specifier function.
;; #### this also needs to be redone like frob-face-property.
;; Let me say one more time how much dynamic scoping sucks.
-(defun frob-display-table (fdt-function fdt-locale)
+;;;###autoload
+(defun frob-display-table (fdt-function fdt-locale &optional tag-set)
(or fdt-locale (setq fdt-locale 'global))
- (or (specifier-spec-list current-display-table fdt-locale)
+ (or (specifier-spec-list current-display-table fdt-locale tag-set)
(add-spec-to-specifier current-display-table (make-display-table)
- fdt-locale))
+ fdt-locale tag-set))
(add-spec-list-to-specifier
current-display-table
(list (cons fdt-locale
@@ -112,16 +97,18 @@
(funcall fdt-function (cdr fdt-x))
fdt-x)
(cdar (specifier-spec-list current-display-table
- fdt-locale)))))))
+ fdt-locale tag-set)))))))
(defun standard-display-8bit-1 (dt l h)
(while (<= l h)
- (aset dt l (char-to-string l))
+ (remove-char-table (int-to-char l) dt)
(setq l (1+ l))))
;;;###autoload
(defun standard-display-8bit (l h &optional locale)
- "Display characters in the range L to H literally."
+ "Display characters in the range L to H literally.
+
+Of course, `literally' has no meaning here. "
(frob-display-table
(lambda (x)
(standard-display-8bit-1 x l h))
@@ -129,7 +116,7 @@
(defun standard-display-default-1 (dt l h)
(while (<= l h)
- (aset dt l nil)
+ (put-char-table (int-to-char l) (format "\\%o" l) dt)
(setq l (1+ l))))
;;;###autoload
@@ -145,12 +132,9 @@
"Display character C using printable string S."
(frob-display-table
(lambda (x)
- (aset x c s))
+ (put-char-table c s x))
locale))
-
-;;; #### should frob in a 'tty locale.
-
;;;###autoload
(defun standard-display-g1 (c sc &optional locale)
"Display character C as character SC in the g1 character set.
@@ -158,11 +142,8 @@
it is meaningless for an X frame."
(frob-display-table
(lambda (x)
- (aset x c (concat "\016" (char-to-string sc) "\017")))
- locale))
-
-
-;;; #### should frob in a 'tty locale.
+ (put-char-table c (concat "\016" (char-to-string sc) "\017") x))
+ locale '(tty)))
;;;###autoload
(defun standard-display-graphic (c gc &optional locale)
@@ -171,37 +152,36 @@
X frame."
(frob-display-table
(lambda (x)
- (aset x c (concat "\e(0" (char-to-string gc) "\e(B")))
- locale))
-
-;;; #### should frob in a 'tty locale.
-;;; #### the FSF equivalent of this makes this character be displayed
-;;; in the 'underline face. There's no current way to do this with
-;;; XEmacs display tables.
+ (put-char-table c (concat "\e(0" (char-to-string gc) "\e(B") x))
+ locale '(tty)))
;;;###autoload
(defun standard-display-underline (c uc &optional locale)
"Display character C as character UC plus underlining."
(frob-display-table
(lambda (x)
- (aset x c (concat "\e[4m" (char-to-string uc) "\e[m")))
+ (let (glyph)
+ (setq glyph (make-glyph (vector 'string :data (char-to-string uc))))
+ (set-glyph-face glyph 'underline)
+ (put-char-table c glyph x)))
locale))
;;;###autoload
(defun standard-display-european (arg &optional locale)
"Toggle display of European characters encoded with ISO 8859.
-When enabled, characters in the range of 160 to 255 display not
-as octal escapes, but as accented characters.
-With prefix argument, enable European character display iff arg is positive."
+When enabled (the default), characters in the range of 160 to 255 display
+not as octal escapes, but as accented characters. With prefix argument,
+enable European character display iff arg is positive."
(interactive "P")
- (frob-display-table
- (lambda (x)
- (if (or (<= (prefix-numeric-value arg) 0)
- (and (null arg)
- (equal (aref x 160) (char-to-string 160))))
- (standard-display-default-1 x 160 255)
- (standard-display-8bit-1 x 160 255)))
- locale))
+ (if (<= (prefix-numeric-value arg) 0)
+ (frob-display-table
+ (lambda (x)
+ (standard-display-default-1 x 160 255))
+ locale)
+ (frob-display-table
+ (lambda (x)
+ (standard-display-8bit-1 x 160 255))
+ locale)))
(provide 'disp-table)
Index: lisp/font.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/font.el,v
retrieving revision 1.20
diff -u -u -r1.20 font.el
--- lisp/font.el 2006/04/25 14:01:53 1.20
+++ lisp/font.el 2007/07/21 13:21:20
@@ -250,17 +250,17 @@
(i 0))
;; Standard ASCII characters
(while (< i 26)
- (aset table (+ i ?a) (+ i ?A))
+ (put-char-table (+ i ?a) (+ i ?A) table)
(setq i (1+ i)))
;; Now ISO translations
;; #### FIXME what's this for??
(setq i 224)
(while (< i 247) ;; Agrave - Ouml
- (aset table i (- i 32))
+ (put-char-table i (- i 32) table)
(setq i (1+ i)))
(setq i 248)
(while (< i 255) ;; Oslash - Thorn
- (aset table i (- i 32))
+ (put-char-table i (- i 32) table)
(setq i (1+ i)))
table))
Index: lisp/x-init.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/x-init.el,v
retrieving revision 1.19
diff -u -u -r1.19 x-init.el
--- lisp/x-init.el 2007/07/16 12:26:03 1.19
+++ lisp/x-init.el 2007/07/21 13:21:55
@@ -336,11 +336,11 @@
;; due to a universally crocked font width specification. Display it
;; as a space since that's what seems to be expected.
;;
-;; (make-vector 256 nil) instead of (make-display-table) because
+;; (make-char-table 'generic) instead of (make-display-table) because
;; make-display-table doesn't exist when this file is loaded.
-(let ((tab (make-vector 256 nil)))
- (aset tab 160 " ")
+(let ((tab (make-char-table 'generic)))
+ (put-char-table 160 " " tab)
(set-specifier current-display-table tab 'global 'x))
;;; x-init.el ends here
Index: lisp/mule/cyril-util.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/mule/cyril-util.el,v
retrieving revision 1.6
diff -u -u -r1.6 cyril-util.el
--- lisp/mule/cyril-util.el 2003/02/06 06:35:52 1.6
+++ lisp/mule/cyril-util.el 2007/07/21 13:21:56
@@ -28,28 +28,16 @@
;;; Code:
-;;;###autoload
-(defun cyrillic-encode-koi8-r-char (char)
- "Return KOI8-R external character code of CHAR if appropriate."
- (get-char-table char cyrillic-koi8-r-to-external-code-table))
-
-;;;###autoload
-(defun cyrillic-encode-alternativnyj-char (char)
- "Return ALTERNATIVNYJ external character code of CHAR if appropriate."
- (get-char-table char cyrillic-alternativnyj-to-external-code-table))
-
;; Display
;; Written by Valery Alexeev <valery(a)math.uga.edu>.
(defvar cyrillic-language-alist
- (list '("Belorussian") '("Bulgarian") '("Macedonian")
- '("Russian") '("Serbian") '("Ukrainian"))
- "*List of known cyrillic languages")
+ '(("Belorussian") ("Bulgarian") ("Macedonian")
+ ("Russian") ("Serbian") ("Ukrainian"))
+ "*List of known Cyrillic languages")
-(defvar standard-display-table)
-
;;;###autoload
(defun standard-display-cyrillic-translit (&optional cyrillic-language)
"Display a cyrillic buffer using a transliteration.
@@ -67,129 +55,123 @@
(completing-read
"Cyrillic language (default nil): "
cyrillic-language-alist nil t nil nil nil))))
-
- (or standard-display-table
- (setq standard-display-table (make-display-table)))
-
+
(if (equal cyrillic-language "")
(setq cyrillic-language nil))
- (if (null cyrillic-language)
- (setq standard-display-table (make-display-table))
- (aset standard-display-table ?,LP(B [?a])
- (aset standard-display-table ?,LQ(B [?b])
- (aset standard-display-table ?,LR(B [?v])
- (aset standard-display-table ?,LS(B [?g])
- (aset standard-display-table ?,LT(B [?d])
- (aset standard-display-table ?,LU(B [?e])
- (aset standard-display-table ?,Lq(B [?y?o])
- (aset standard-display-table ?,LV(B [?z?h])
- (aset standard-display-table ?,LW(B [?z])
- (aset standard-display-table ?,LX(B [?i])
- (aset standard-display-table ?,LY(B [?j])
- (aset standard-display-table ?,LZ(B [?k])
- (aset standard-display-table ?,L[(B [?l])
- (aset standard-display-table ?,L\(B [?m])
- (aset standard-display-table ?,L](B [?n])
- (aset standard-display-table ?,L^(B [?o])
- (aset standard-display-table ?,L_(B [?p])
- (aset standard-display-table ?,L`(B [?r])
- (aset standard-display-table ?,La(B [?s])
- (aset standard-display-table ?,Lb(B [?t])
- (aset standard-display-table ?,Lc(B [?u])
- (aset standard-display-table ?,Ld(B [?f])
- (aset standard-display-table ?,Le(B [?k?h])
- (aset standard-display-table ?,Lf(B [?t?s])
- (aset standard-display-table ?,Lg(B [?c?h])
- (aset standard-display-table ?,Lh(B [?s?h])
- (aset standard-display-table ?,Li(B [?s?c?h])
- (aset standard-display-table ?,Lj(B [?~])
- (aset standard-display-table ?,Lk(B [?y])
- (aset standard-display-table ?,Ll(B [?'])
- (aset standard-display-table ?,Lm(B [?e?'])
- (aset standard-display-table ?,Ln(B [?y?u])
- (aset standard-display-table ?,Lo(B [?y?a])
+ (frob-display-table
+ (lambda (display-table)
+ (if (null cyrillic-language)
+ (remove-char-table 'cyrillic-iso8859-5 display-table)
+ (put-char-table ?,LP(B "a" display-table)
+ (put-char-table ?,LQ(B "b" display-table)
+ (put-char-table ?,LR(B "v" display-table)
+ (put-char-table ?,LS(B "g" display-table)
+ (put-char-table ?,LT(B "d" display-table)
+ (put-char-table ?,LU(B "e" display-table)
+ (put-char-table ?,Lq(B "yo" display-table)
+ (put-char-table ?,LV(B "zh" display-table)
+ (put-char-table ?,LW(B "z" display-table)
+ (put-char-table ?,LX(B "i" display-table)
+ (put-char-table ?,LY(B "j" display-table)
+ (put-char-table ?,LZ(B "k" display-table)
+ (put-char-table ?,L[(B "l" display-table)
+ (put-char-table ?,L\(B "m" display-table)
+ (put-char-table ?,L](B "n" display-table)
+ (put-char-table ?,L^(B "o" display-table)
+ (put-char-table ?,L_(B "p" display-table)
+ (put-char-table ?,L`(B "r" display-table)
+ (put-char-table ?,La(B "s" display-table)
+ (put-char-table ?,Lb(B "t" display-table)
+ (put-char-table ?,Lc(B "u" display-table)
+ (put-char-table ?,Ld(B "f" display-table)
+ (put-char-table ?,Le(B "kh" display-table)
+ (put-char-table ?,Lf(B "ts" display-table)
+ (put-char-table ?,Lg(B "ch" display-table)
+ (put-char-table ?,Lh(B "sh" display-table)
+ (put-char-table ?,Li(B "sch" display-table)
+ (put-char-table ?,Lj(B "~" display-table)
+ (put-char-table ?,Lk(B "y" display-table)
+ (put-char-table ?,Ll(B "'" display-table)
+ (put-char-table ?,Lm(B "e'" display-table)
+ (put-char-table ?,Ln(B "yu" display-table)
+ (put-char-table ?,Lo(B "ya" display-table)
+ (put-char-table ?,L0(B "A" display-table)
+ (put-char-table ?,L1(B "B" display-table)
+ (put-char-table ?,L2(B "V" display-table)
+ (put-char-table ?,L3(B "G" display-table)
+ (put-char-table ?,L4(B "D" display-table)
+ (put-char-table ?,L5(B "E" display-table)
+ (put-char-table ?,L!(B "Yo" display-table)
+ (put-char-table ?,L6(B "Zh" display-table)
+ (put-char-table ?,L7(B "Z" display-table)
+ (put-char-table ?,L8(B "I" display-table)
+ (put-char-table ?,L9(B "J" display-table)
+ (put-char-table ?,L:(B "K" display-table)
+ (put-char-table ?,L;(B "L" display-table)
+ (put-char-table ?,L<(B "M" display-table)
+ (put-char-table ?,L=(B "N" display-table)
+ (put-char-table ?,L>(B "O" display-table)
+ (put-char-table ?,L?(B "P" display-table)
+ (put-char-table ?,L@(B "R" display-table)
+ (put-char-table ?,LA(B "S" display-table)
+ (put-char-table ?,LB(B "T" display-table)
+ (put-char-table ?,LC(B "U" display-table)
+ (put-char-table ?,LD(B "F" display-table)
+ (put-char-table ?,LE(B "Kh" display-table)
+ (put-char-table ?,LF(B "Ts" display-table)
+ (put-char-table ?,LG(B "Ch" display-table)
+ (put-char-table ?,LH(B "Sh" display-table)
+ (put-char-table ?,LI(B "Sch" display-table)
+ (put-char-table ?,LJ(B "~" display-table)
+ (put-char-table ?,LK(B "Y" display-table)
+ (put-char-table ?,LL(B "'" display-table)
+ (put-char-table ?,LM(B "E'" display-table)
+ (put-char-table ?,LN(B "Yu" display-table)
+ (put-char-table ?,LO(B "Ya" display-table)
+ (put-char-table ?,Lt(B "ie" display-table)
+ (put-char-table ?,Lw(B "i" display-table)
+ (put-char-table ?,L~(B "u" display-table)
+ (put-char-table ?,Lr(B "dj" display-table)
+ (put-char-table ?,L{(B "chj" display-table)
+ (put-char-table ?,Ls(B "gj" display-table)
+ (put-char-table ?,Lu(B "s" display-table)
+ (put-char-table ?,L|(B "k" display-table)
+ (put-char-table ?,Lv(B "i" display-table)
+ (put-char-table ?,Lx(B "j" display-table)
+ (put-char-table ?,Ly(B "lj" display-table)
+ (put-char-table ?,Lz(B "nj" display-table)
+ (put-char-table ?,L(B "dz" display-table)
+ (put-char-table ?,L$(B "Ye" display-table)
+ (put-char-table ?,L'(B "Yi" display-table)
+ (put-char-table ?,L.(B "U" display-table)
+ (put-char-table ?,L"(B "Dj" display-table)
+ (put-char-table ?,L+(B "Chj" display-table)
+ (put-char-table ?,L#(B "Gj" display-table)
+ (put-char-table ?,L%(B "S" display-table)
+ (put-char-table ?,L,(B "K" display-table)
+ (put-char-table ?,L&(B "I" display-table)
+ (put-char-table ?,L((B "J" display-table)
+ (put-char-table ?,L)(B "Lj" display-table)
+ (put-char-table ?,L*(B "Nj" display-table)
+ (put-char-table ?,L/(B "Dj" display-table)
- (aset standard-display-table ?,L0(B [?A])
- (aset standard-display-table ?,L1(B [?B])
- (aset standard-display-table ?,L2(B [?V])
- (aset standard-display-table ?,L3(B [?G])
- (aset standard-display-table ?,L4(B [?D])
- (aset standard-display-table ?,L5(B [?E])
- (aset standard-display-table ?,L!(B [?Y?o])
- (aset standard-display-table ?,L6(B [?Z?h])
- (aset standard-display-table ?,L7(B [?Z])
- (aset standard-display-table ?,L8(B [?I])
- (aset standard-display-table ?,L9(B [?J])
- (aset standard-display-table ?,L:(B [?K])
- (aset standard-display-table ?,L;(B [?L])
- (aset standard-display-table ?,L<(B [?M])
- (aset standard-display-table ?,L=(B [?N])
- (aset standard-display-table ?,L>(B [?O])
- (aset standard-display-table ?,L?(B [?P])
- (aset standard-display-table ?,L@(B [?R])
- (aset standard-display-table ?,LA(B [?S])
- (aset standard-display-table ?,LB(B [?T])
- (aset standard-display-table ?,LC(B [?U])
- (aset standard-display-table ?,LD(B [?F])
- (aset standard-display-table ?,LE(B [?K?h])
- (aset standard-display-table ?,LF(B [?T?s])
- (aset standard-display-table ?,LG(B [?C?h])
- (aset standard-display-table ?,LH(B [?S?h])
- (aset standard-display-table ?,LI(B [?S?c?h])
- (aset standard-display-table ?,LJ(B [?~])
- (aset standard-display-table ?,LK(B [?Y])
- (aset standard-display-table ?,LL(B [?'])
- (aset standard-display-table ?,LM(B [?E?'])
- (aset standard-display-table ?,LN(B [?Y?u])
- (aset standard-display-table ?,LO(B [?Y?a])
-
- (aset standard-display-table ?,Lt(B [?i?e])
- (aset standard-display-table ?,Lw(B [?i])
- (aset standard-display-table ?,L~(B [?u])
- (aset standard-display-table ?,Lr(B [?d?j])
- (aset standard-display-table ?,L{(B [?c?h?j])
- (aset standard-display-table ?,Ls(B [?g?j])
- (aset standard-display-table ?,Lu(B [?s])
- (aset standard-display-table ?,L|(B [?k])
- (aset standard-display-table ?,Lv(B [?i])
- (aset standard-display-table ?,Lx(B [?j])
- (aset standard-display-table ?,Ly(B [?l?j])
- (aset standard-display-table ?,Lz(B [?n?j])
- (aset standard-display-table ?,L(B [?d?z])
-
- (aset standard-display-table ?,L$(B [?Y?e])
- (aset standard-display-table ?,L'(B [?Y?i])
- (aset standard-display-table ?,L.(B [?U])
- (aset standard-display-table ?,L"(B [?D?j])
- (aset standard-display-table ?,L+(B [?C?h?j])
- (aset standard-display-table ?,L#(B [?G?j])
- (aset standard-display-table ?,L%(B [?S])
- (aset standard-display-table ?,L,(B [?K])
- (aset standard-display-table ?,L&(B [?I])
- (aset standard-display-table ?,L((B [?J])
- (aset standard-display-table ?,L)(B [?L?j])
- (aset standard-display-table ?,L*(B [?N?j])
- (aset standard-display-table ?,L/(B [?D?j])
-
- (when (equal cyrillic-language "Bulgarian")
- (aset standard-display-table ?,Li(B [?s?h?t])
- (aset standard-display-table ?,LI(B [?S?h?t])
- (aset standard-display-table ?,Ln(B [?i?u])
- (aset standard-display-table ?,LN(B [?I?u])
- (aset standard-display-table ?,Lo(B [?i?a])
- (aset standard-display-table ?,LO(B [?I?a]))
-
- (when (equal cyrillic-language "Ukrainian") ; based on the official
- ; transliteration table
- (aset standard-display-table ?,LX(B [?y])
- (aset standard-display-table ?,L8(B [?Y])
- (aset standard-display-table ?,LY(B [?i])
- (aset standard-display-table ?,L9(B [?Y])
- (aset standard-display-table ?,Ln(B [?i?u])
- (aset standard-display-table ?,Lo(B [?i?a]))))
-
-
+ (when (equal cyrillic-language "Bulgarian")
+ (put-char-table ?,Li(B "sht" display-table)
+ (put-char-table ?,LI(B "Sht" display-table)
+ (put-char-table ?,Ln(B "iu" display-table)
+ (put-char-table ?,LN(B "Iu" display-table)
+ (put-char-table ?,Lo(B "ia" display-table)
+ (put-char-table ?,LO(B "Ia" display-table))
+
+ (when (equal cyrillic-language "Ukrainian") ; based on the official
+ ; transliteration table
+ (put-char-table ?,LX(B "y" display-table)
+ (put-char-table ?,L8(B "Y" display-table)
+ (put-char-table ?,LY(B "i" display-table)
+ (put-char-table ?,L9(B "Y" display-table)
+ (put-char-table ?,Ln(B "iu" display-table)
+ (put-char-table ?,Lo(B "ia" display-table)))) nil))
;;
(provide 'cyril-util)
--
On the quay of the little Black Sea port, where the rescued pair came once
more into contact with civilization, Dobrinton was bitten by a dog which was
assumed to be mad, though it may only have been indiscriminating. (Saki)
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches
Re: [COMMIT] Two small changes, isearch-mode.el, x-init.el
17 years, 5 months
Aidan Kehoe
Ar an séú lá déag de mí Iúil, scríobh Vin Shelton:
> On 7/16/07, Aidan Kehoe <kehoea(a)parhasard.net> wrote:
>
> > I just committed a couple of changes without generating a patcher mail
> > (lost interest in it after it tied up an XEmacs for two hours.)
>
> Can't you simply supply the names of the files you modified? For me,
> that reduces the patcher time to nearly instantaneous.
How? The docstring says editing the diff command is not kosher:
When called interactively, use a prefix (ARG) to override the value of
the diff command to use for this project. Note that this is *not* the way
to restrict the diff to certain files. If you want to work on a subset of
the project (e.g. some files, subdirectories etc), you have two
alternatives:
- for temporary subprojects, you can use the function
`patcher-mail-subproject', which lets you specify the list of modified
files / directories.
- otherwise, you can also define the subprojects in the variable
`patcher-subprojects' and continue using this function.
--
On the quay of the little Black Sea port, where the rescued pair came once
more into contact with civilization, Dobrinton was bitten by a dog which was
assumed to be mad, though it may only have been indiscriminating. (Saki)
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches
[PATCH] Calendar fixes
17 years, 6 months
Jeff Miller
I'll be committing this shortly.
Jeff
ChangeLog addition:
2007-07-10 Jeff Miller <jeff.miller(a)xemacs.org>
* cal-coptic.el (coptic-prompt-for-date): fix typo
(cal-assoc-ignore -> cal-assoc-string)
* cal-dst.el, cal-html.el,
cal-tex.el, icalendar.el: indentation fixes.
* calendar.el: added autoload for calendar-goto-bahai-date
* diary-lib.el (diary-mode): use make-hook-local to really make
after-save-hook buffer local
calendar source patch:
Diff command: cvs -q diff -uN
Files affected: icalendar.el diary-lib.el calendar.el cal-tex.el cal-html.el cal-dst.el cal-coptic.el
Index: cal-coptic.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/cal-coptic.el,v
retrieving revision 1.6
diff -u -u -r1.6 cal-coptic.el
--- cal-coptic.el 2007/04/16 02:12:24 1.6
+++ cal-coptic.el 2007/07/10 22:41:23
@@ -158,7 +158,7 @@
(calendar-absolute-from-gregorian today))))))
(completion-ignore-case t)
;; XEmacs change, we don't have assoc-string
- (month (cdr (cal-assoc-ignore
+ (month (cdr (cal-assoc-string
(completing-read
(format "%s calendar month name: " coptic-name)
(mapcar 'list
Index: cal-dst.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/cal-dst.el,v
retrieving revision 1.9
diff -u -u -r1.9 cal-dst.el
--- cal-dst.el 2007/04/16 02:12:24 1.9
+++ cal-dst.el 2007/07/10 22:41:23
@@ -50,9 +50,9 @@
current date apply to all years. This is faster, but not always
correct, since the dates of daylight saving transitions sometimes
change."
-:type 'boolean
-:version "22.1"
-:group 'calendar)
+:type 'boolean
+:version "22.1"
+:group 'calendar)
(defvar calendar-current-time-zone-cache nil
"Cache for result of `calendar-current-time-zone'.")
Index: cal-html.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/cal-html.el,v
retrieving revision 1.2
diff -u -u -r1.2 cal-html.el
--- cal-html.el 2007/04/16 02:12:25 1.2
+++ cal-html.el 2007/07/10 22:41:23
@@ -37,30 +37,30 @@
(defgroup calendar-html nil
"Options for HTML calendars."
-:prefix "cal-html-"
-:group 'calendar)
+:prefix "cal-html-"
+:group 'calendar)
(defcustom cal-html-directory "~/public_html"
"Directory for HTML pages generated by cal-html."
-:type 'string
-:group 'calendar-html)
+:type 'string
+:group 'calendar-html)
(defcustom cal-html-print-day-number-flag nil
"Non-nil means print the day-of-the-year number in the monthly cal-html page."
-:type 'boolean
-:group 'calendar-html)
+:type 'boolean
+:group 'calendar-html)
(defcustom cal-html-year-index-cols 3
"Number of columns in the cal-html yearly index page."
-:type 'integer
-:group 'calendar-html)
+:type 'integer
+:group 'calendar-html)
(defcustom cal-html-day-abbrev-array
(calendar-abbrev-construct calendar-day-abbrev-array
calendar-day-name-array)
"Array of seven strings for abbreviated day names (starting with Sunday)."
-:type '(vector string string string string string string string)
-:group 'calendar-html)
+:type '(vector string string string string string string string)
+:group 'calendar-html)
(defcustom cal-html-css-default
(concat
@@ -80,8 +80,8 @@
" SPAN.BLOCK { color: #048; font-style: italic; }\n"
"</STYLE>\n\n")
"Default cal-html css style. You can override this with a \"cal.css\" file."
-:type 'string
-:group 'calendar-html)
+:type 'string
+:group 'calendar-html)
;;; End customizable variables.
Index: cal-tex.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/cal-tex.el,v
retrieving revision 1.7
diff -u -u -r1.7 cal-tex.el
--- cal-tex.el 2007/04/16 02:12:26 1.7
+++ cal-tex.el 2007/07/10 22:41:24
@@ -79,8 +79,8 @@
(defcustom cal-tex-rules nil
"*If t, pages will be ruled in some styles."
-:type 'boolean
-:group 'calendar-tex)
+:type 'boolean
+:group 'calendar-tex)
(defcustom cal-tex-daily-string
'(let* ((year (extract-calendar-year date))
@@ -127,9 +127,9 @@
"A string giving extra LaTeX commands to insert in the calendar preamble.
For example, to include extra packages:
\"\\\\usepackage{foo}\\n\\\\usepackage{bar}\\n\"."
-:type 'string
-:group 'calendar-tex
-:version "22.1")
+:type 'string
+:group 'calendar-tex
+:version "22.1")
(defcustom cal-tex-hook nil
"*List of functions called after any LaTeX calendar buffer is generated.
@@ -137,28 +137,28 @@
characters with diacritical marks to their LaTeX equivalents, use
(add-hook 'cal-tex-hook
'(lambda () (iso-iso2tex (point-min) (point-max))))"
-:type 'hook
-:group 'calendar-tex)
+:type 'hook
+:group 'calendar-tex)
(defcustom cal-tex-year-hook nil
"*List of functions called after a LaTeX year calendar buffer is generated."
-:type 'hook
-:group 'calendar-tex)
+:type 'hook
+:group 'calendar-tex)
(defcustom cal-tex-month-hook nil
"*List of functions called after a LaTeX month calendar buffer is generated."
-:type 'hook
-:group 'calendar-tex)
+:type 'hook
+:group 'calendar-tex)
(defcustom cal-tex-week-hook nil
"*List of functions called after a LaTeX week calendar buffer is generated."
-:type 'hook
-:group 'calendar-tex)
+:type 'hook
+:group 'calendar-tex)
(defcustom cal-tex-daily-hook nil
"*List of functions called after a LaTeX daily calendar buffer is generated."
-:type 'hook
-:group 'calendar-tex)
+:type 'hook
+:group 'calendar-tex)
;;;
;;; Definitions for LaTeX code
@@ -905,6 +905,7 @@
(insert ", ")
(cal-tex-large-bf (cal-tex-month-name month))
(insert " ")
+
(cal-tex-large-bf (number-to-string day))
(if (not (string= "" (cal-tex-latexify-list holidays date)))
(progn
Index: calendar.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/calendar.el,v
retrieving revision 1.15
diff -u -u -r1.15 calendar.el
--- calendar.el 2007/04/16 02:12:26 1.15
+++ calendar.el 2007/07/10 22:41:24
@@ -1862,6 +1862,10 @@
"Move cursor to Persian date."
t)
+(autoload 'calendar-goto-bahai-date "cal-bahai"
+ "Move cursor to Baha'i date."
+ t)
+
(autoload 'calendar-print-persian-date "cal-persia"
"Show the Persian date equivalents of date."
t)
Index: diary-lib.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/diary-lib.el,v
retrieving revision 1.9
diff -u -u -r1.9 diary-lib.el
--- diary-lib.el 2007/04/16 02:12:27 1.9
+++ diary-lib.el 2007/07/10 22:41:24
@@ -1922,6 +1922,10 @@
(set (make-local-variable 'font-lock-defaults)
'(diary-font-lock-keywords t))
(add-to-invisibility-spec '(diary . nil))
+ ;; XEmacs change - the "local" option for add-hook does not work the
+ ;; same as Emacs, we need to use make-local-hook to make the hook
+ ;; local.
+ (make-local-hook 'after-save-hook)
(add-hook 'after-save-hook 'diary-redraw-calendar nil t)
(if diary-header-line-flag
(setq header-line-format diary-header-line-format)))
Index: icalendar.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/calendar/icalendar.el,v
retrieving revision 1.3
diff -u -u -r1.3 icalendar.el
--- icalendar.el 2007/04/16 02:12:27 1.3
+++ icalendar.el 2007/07/10 22:41:25
@@ -111,8 +111,8 @@
;; ======================================================================
(defgroup icalendar nil
"Icalendar support."
-:prefix "icalendar-"
-:group 'calendar)
+:prefix "icalendar-"
+:group 'calendar)
(defcustom icalendar-import-format
"%s%d%l%o"
@@ -126,64 +126,64 @@
%s Summary, see `icalendar-import-format-summary'
%t Status, see `icalendar-import-format-status'
%u URL, see `icalendar-import-format-url'"
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-summary
"%s"
"Format string defining how the summary element is formatted.
This applies only if the summary is not empty! `%s' is replaced
by the summary."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-description
"\n Desc: %s"
"Format string defining how the description element is formatted.
This applies only if the description is not empty! `%s' is
replaced by the description."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-location
"\n Location: %s"
"Format string defining how the location element is formatted.
This applies only if the location is not empty! `%s' is replaced
by the location."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-organizer
"\n Organizer: %s"
"Format string defining how the organizer element is formatted.
This applies only if the organizer is not empty! `%s' is
replaced by the organizer."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-url
"\n URL: %s"
"Format string defining how the URL element is formatted.
This applies only if the URL is not empty! `%s' is replaced by
the URL."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-status
"\n Status: %s"
"Format string defining how the status element is formatted.
This applies only if the status is not empty! `%s' is replaced by
the status."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defcustom icalendar-import-format-class
"\n Class: %s"
"Format string defining how the class element is formatted.
This applies only if the class is not empty! `%s' is replaced by
the class."
-:type 'string
-:group 'icalendar)
+:type 'string
+:group 'icalendar)
(defvar icalendar-debug nil
"Enable icalendar debug messages.")
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches