unicode-internal-commit: redo char tables to handle ranges efficiently, further reduce memory use of category tables
Ben Wing
ben at xemacs.org
Wed Mar 31 00:59:59 EDT 2010
changeset: 5342:93f4f44ba25f
branch: ben-unicode-internal
user: Ben Wing <ben at xemacs.org>
date: Mon Mar 29 15:46:27 2010 -0500
files: README.unicode-internal lisp/ChangeLog lisp/derived.el lisp/mule/mule-category.el src/ChangeLog src/abbrev.c src/alloc.c src/callint.c src/casetab.c src/casetab.h src/chartab.c src/chartab.h src/cmds.c src/console.c src/data.c src/editfns.c src/event-stream.c src/fileio.c src/fns.c src/glyphs.c src/keymap.c src/lisp-disunion.h src/lisp-union.h src/lisp.h src/lrecord.h src/macros.c src/marker.c src/minibuf.c src/number.c src/print.c src/process-unix.c src/scrollbar.c src/symbols.c src/syntax.c src/syntax.h src/sysdep.c src/tests.c src/text.c src/text.h src/unicode.c src/window.c
description:
redo char tables to handle ranges efficiently, further reduce memory use of category tables
-------------------- ChangeLog entries follow: --------------------
lisp/ChangeLog addition:
2010-03-29 Ben Wing <ben at xemacs.org>
* mule/mule-category.el (describe-category-table):
map-category-table and map-char-table can pass in a range, not
just a char; fix calling code accordingly.
2010-03-29 Ben Wing <ben at xemacs.org>
* derived.el:
* derived.el (derived-mode-merge-syntax-tables):
map-char-table can pass in a range, not just a char; fix calling
code accordingly.
src/ChangeLog addition:
2010-03-29 Ben Wing <ben at xemacs.org>
* abbrev.c:
* abbrev.c (describe_abbrev):
* alloc.c:
* alloc.c (common_init_alloc_early):
* callint.c (Fprefix_numeric_value):
* callint.c (syms_of_callint):
* console.c (common_init_complex_vars_of_console):
* data.c (Fdiv):
* data.c (Fquo):
* event-stream.c (vars_of_event_stream):
* fileio.c (auto_save_error):
* fileio.c (Fdo_auto_save):
* fns.c:
* fns.c (bump_string_modiff):
* keymap.c (where_is_recursive_mapper):
* lisp.h (ONEP):
* lisp-disunion.h:
* lisp-disunion.h (INT_PLUS1):
* lisp-disunion.h (Qone):
* lisp-union.h:
* lisp-union.h (INT_MINUS):
* macros.c (Fstart_kbd_macro):
* marker.c (init_buffer_markers):
* marker.c (uninit_buffer_markers):
* minibuf.c (read_minibuffer_internal_unwind):
* number.c (Fdenominator):
* number.c (bigfloat_print):
* print.c (print_symbol):
* process-unix.c (get_internet_address):
* scrollbar.c (Fscrollbar_line_up):
* scrollbar.c (Fscrollbar_line_down):
* symbols.c:
* symbols.c (Fuser_variable_p):
* syntax.c (reset_buffer_syntax_cache_range):
* sysdep.c (init_system_name):
* tests.c (Ftest_hash_tables):
* unicode.c (allocate_jit_ucs_charset):
* window.c (temp_output_buffer_show):
* window.c (specifier_vars_of_window):
Create Qone, similar to Qzero, along with ONEP. Use it.
* alloc.c (clone_bit_vector):
* lisp.h:
New function.
* casetab.c (case_table_char):
* casetab.c (compute_canon_mapper):
* casetab.c (initialize_identity_mapper):
* casetab.c (compute_up_or_eqv_mapper):
* casetab.h:
* chartab.c:
* chartab.c (check_chartab_invariants):
* chartab.c (clone_chartab_table):
* chartab.c (create_new_chartab_table):
* chartab.c (free_chartab_table):
* chartab.c (compute_chartab_table_size_1):
* chartab.c (compute_chartab_table_size):
* chartab.c (compute_char_table_usage):
* chartab.c (char_table_memory_usage):
* chartab.c (put_chartab_table):
* chartab.c (put_char_table):
* chartab.c (map_chartab_table):
* chartab.c (check_if_blank):
* chartab.c (chartab_tables_equal):
* chartab.c (char_table_equal):
* chartab.c (hash_raw_chartab_val):
* chartab.c (char_table_hash):
* chartab.c (FROB1):
* chartab.c (init_chartab_tables):
* chartab.c (free_chartab_tables):
* chartab.c (print_table_entry):
* chartab.c (print_char_table):
* chartab.c (Fmake_char_table):
* chartab.c (Fcopy_char_table):
* chartab.c (Fget_char_table):
* chartab.c (put_char_table_range):
* chartab.c (Fput_char_table):
* chartab.c (map_char_table):
* chartab.c (slow_map_char_table_fun):
* chartab.c (chartab_data_validate):
* chartab.c (chartab_instantiate):
* chartab.c (category_table_equal):
* chartab.c (check_char_in_category):
* chartab.c (slow_map_category_table_fun):
* chartab.c (syms_of_chartab):
* chartab.c (vars_of_chartab):
* chartab.c (structure_type_create_chartab):
* chartab.h:
* chartab.h (SUBTAB_ARRAY_FROM_SUBTAB):
* chartab.h (ALLOCATE_LEVEL_1_CATEGORY_SUBTAB):
* chartab.h (GET_CHAR_BYTES):
* chartab.h (struct Lisp_Char_Table):
* chartab.h (enum chartab_range_type):
* chartab.h (CHAR_TABLES_PER_CATEGORY_TABLE):
* chartab.h (DESIGNATOR_TO_CHAR_TABLE):
* cmds.c (internal_self_insert):
* cmds.c (vars_of_cmds):
* editfns.c (Ftranslate_region):
* glyphs.c (display_table_entry):
* lrecord.h (enum lrecord_type):
* syntax.c (syntax_match):
* syntax.c (copy_to_mirrortab):
* syntax.c (copy_if_not_already_present):
* syntax.c (update_just_this_syntax_table):
* syntax.h:
* text.c:
* text.c (old_mule_round_up_to_valid_ichar):
* text.c (old_mule_round_down_to_valid_ichar):
* text.c (Fchar_octet):
Redo char tables and category tables again:
(1) In a category table, store one char table per bit. These char
tables are category char tables, and at level 1 instead of storing
a category subtable they store a size-256 bit vector.
Accordingly, the `category-subtable' object has been removed. All
the junk that involved multiplexing integral and Lisp_Object
values into a void * pointer has been removed. Now we're back to
always just using Lisp_Object pointers.
Accordingly, we rename/merge functions:
get_char_table_lisp merged into get_char_table
get_char_table_lisp_raw merged into get_char_table_raw
get_char_table_lisp_1 -> get_char_table_1
(2) We now efficiently store ranges of characters, not just single
characters. Essentially, at any level of the char tables, we can
store a non-table Lisp object, indicating that all the characters
spanned by that entry have the value of the given object. Accordingly,
put_char_table() now takes a range, and the function passed to
map_char_table() is given a range. Various places where
map_char_table() is called have been changed.
This also means that there is no more need for blank tables of the
sort that still exist in the Unicode tables.
IMPORTANT: The ranges passed in to put_char_table() may encompass
non-characters, especially in old-Mule where there are lots of gaps
in the representation of characters. put_char_table() simply stores
the ranges as-is, and does not try to avoid storing values for
non-characters. This doesn't affect get_char_table(), but it
definitely does affect map_char_table(). To deal with this, we
create new functions round_up_to_valid_ichar() and
round_down_to_valid_ichar() that are used to take raw ranges in
map_char_table() and coerce them into possibly smaller ranges where
the characters on both ends are valid characters. It may still be
the case that characters within the range are invalid.
This also means that if you copy a table in Lisp using
`map-char-table', the new table may not be `equal' to the first table
-- essentially, the "junk" areas where non-characters lie may be
set in the original table but unset in the new table, and
char_table_equal() isn't smart enough to ignore those "junk" areas.
Possibly this can be fixed by making map_char_table() expand its
ranges to the maximum extent.
* text.h:
* unicode.c (decode_utf_8):
* unicode.c (unicode_decode):
Rename the following functions and make them work correctly when
passed any Unicode codepoint:
valid_utf_16_first_surrogate() -> valid_unicode_leading_surrogate()
valid_utf_16_last_surrogate() -> valid_unicode_trailing_surrogate()
valid_utf_16_surrogate() -> valid_unicode_surrogate()
Change the definition of valid_unicode_codepoint_p() to reject
codepoints in the surrogate area. This also affects valid_ichar_p().
Also make valid_ichar_p() reject characters that don't fit into
30 bits, which is as much as we can store. Create ICHAR_MAX to
represent the maximum Ichar that fits in 30 bits.
Use the newer definition of valid_unicode_codepoint_p() in
decode_utf_8() rather than manually duplicating it.
diff -r ba0773838e57 -r 93f4f44ba25f README.unicode-internal
--- a/README.unicode-internal Thu Mar 25 19:16:54 2010 -0500
+++ b/README.unicode-internal Mon Mar 29 15:46:27 2010 -0500
@@ -1,3 +1,15 @@
+3-28-10
+
+TODO:
+
+(1) Get crash resizing jit charset to-unicode tables because they are dumped
+and you can't xrealloc() dumped data. Fix it.
+
+(2) Test char/category tables. Copy a table, see if it is `equal'.
+Copy a table using `map-*'; loop over chars, see if all chars hae same
+value in both tables. (Might not be `equal' under old-Mule due to gaps
+between valid characters.
+
3-13-10
1. New query method written but doesn't handle
diff -r ba0773838e57 -r 93f4f44ba25f lisp/ChangeLog
--- a/lisp/ChangeLog Thu Mar 25 19:16:54 2010 -0500
+++ b/lisp/ChangeLog Mon Mar 29 15:46:27 2010 -0500
@@ -1,3 +1,16 @@
+2010-03-29 Ben Wing <ben at xemacs.org>
+
+ * mule/mule-category.el (describe-category-table):
+ map-category-table and map-char-table can pass in a range, not
+ just a char; fix calling code accordingly.
+
+2010-03-29 Ben Wing <ben at xemacs.org>
+
+ * derived.el:
+ * derived.el (derived-mode-merge-syntax-tables):
+ map-char-table can pass in a range, not just a char; fix calling
+ code accordingly.
+
2010-03-23 Ben Wing <ben at xemacs.org>
* mule/mule-category.el (describe-category-table):
diff -r ba0773838e57 -r 93f4f44ba25f lisp/derived.el
--- a/lisp/derived.el Thu Mar 25 19:16:54 2010 -0500
+++ b/lisp/derived.el Mon Mar 29 15:46:27 2010 -0500
@@ -2,6 +2,7 @@
;;; (formerly mode-clone.el)
;; Copyright (C) 1993, 1994, 1999, 2003 Free Software Foundation, Inc.
+;; Copyright (C) 2010 Ben Wing.
;; Author: David Megginson (dmeggins at aix1.uottawa.ca)
;; Maintainer: XEmacs Development Team
@@ -421,9 +422,24 @@
;; check for inheritance.
(map-char-table
#'(lambda (key value)
- (let ((newval (get-char-table key new)))
- (cond ((eq ?@ (char-syntax-from-code newval)) ;; class at once
- (put-char-table key value new))))
+ ;; KEY may be a character or range. To deal with a range, map over
+ ;; KEY in the new table and remember each (KEY2, VALUE2) pair seen;
+ ;; then set (KEY, VALUE) in the new table, and then set each (KEY2,
+ ;; VALUE2) pair. That way, the existing values override the new
+ ;; values for places where an existing value has been set.
+ (if (characterp key)
+ (let ((newval (get-char-table key new)))
+ (cond ((eq ?@ (char-syntax-from-code newval)) ;; class at once
+ (put-char-table key value new))))
+ (let (list)
+ (map-char-table
+ #'(lambda (key2 value2)
+ (push (cons key2 value2) list)
+ nil)
+ new key)
+ (put-char-table key value new)
+ (loop for (k . v) in (nreverse list) do
+ (put-char-table k v new))))
nil)
old))
diff -r ba0773838e57 -r 93f4f44ba25f lisp/mule/mule-category.el
--- a/lisp/mule/mule-category.el Thu Mar 25 19:16:54 2010 -0500
+++ b/lisp/mule/mule-category.el Mon Mar 29 15:46:27 2010 -0500
@@ -162,8 +162,18 @@
(message "Mapping over category table ...")
(map-category-table
#'(lambda (char desig)
- (let ((list (get-char-table char chartab)))
- (put-char-table char (cons desig list) chartab))
+ (if (characterp char)
+ (let ((list (get-char-table char chartab)))
+ (put-char-table char (cons desig list) chartab))
+ (let (list)
+ (map-char-table
+ #'(lambda (char2 desig2)
+ (push (cons char2 desig2) list)
+ nil)
+ chartab char)
+ (put-char-table char (list desig) chartab)
+ (loop for (char2 . desig2) in (nreverse list) do
+ (put-char-table char2 (cons desig desig2) chartab))))
nil)
table)
(message "Mapping over char table ...")
diff -r ba0773838e57 -r 93f4f44ba25f src/ChangeLog
--- a/src/ChangeLog Thu Mar 25 19:16:54 2010 -0500
+++ b/src/ChangeLog Mon Mar 29 15:46:27 2010 -0500
@@ -1,3 +1,180 @@
+2010-03-29 Ben Wing <ben at xemacs.org>
+
+ * abbrev.c:
+ * abbrev.c (describe_abbrev):
+ * alloc.c:
+ * alloc.c (common_init_alloc_early):
+ * callint.c (Fprefix_numeric_value):
+ * callint.c (syms_of_callint):
+ * console.c (common_init_complex_vars_of_console):
+ * data.c (Fdiv):
+ * data.c (Fquo):
+ * event-stream.c (vars_of_event_stream):
+ * fileio.c (auto_save_error):
+ * fileio.c (Fdo_auto_save):
+ * fns.c:
+ * fns.c (bump_string_modiff):
+ * keymap.c (where_is_recursive_mapper):
+ * lisp.h (ONEP):
+ * lisp-disunion.h:
+ * lisp-disunion.h (INT_PLUS1):
+ * lisp-disunion.h (Qone):
+ * lisp-union.h:
+ * lisp-union.h (INT_MINUS):
+ * macros.c (Fstart_kbd_macro):
+ * marker.c (init_buffer_markers):
+ * marker.c (uninit_buffer_markers):
+ * minibuf.c (read_minibuffer_internal_unwind):
+ * number.c (Fdenominator):
+ * number.c (bigfloat_print):
+ * print.c (print_symbol):
+ * process-unix.c (get_internet_address):
+ * scrollbar.c (Fscrollbar_line_up):
+ * scrollbar.c (Fscrollbar_line_down):
+ * symbols.c:
+ * symbols.c (Fuser_variable_p):
+ * syntax.c (reset_buffer_syntax_cache_range):
+ * sysdep.c (init_system_name):
+ * tests.c (Ftest_hash_tables):
+ * unicode.c (allocate_jit_ucs_charset):
+ * window.c (temp_output_buffer_show):
+ * window.c (specifier_vars_of_window):
+ Create Qone, similar to Qzero, along with ONEP. Use it.
+
+ * alloc.c (clone_bit_vector):
+ * lisp.h:
+ New function.
+
+ * casetab.c (case_table_char):
+ * casetab.c (compute_canon_mapper):
+ * casetab.c (initialize_identity_mapper):
+ * casetab.c (compute_up_or_eqv_mapper):
+ * casetab.h:
+ * chartab.c:
+ * chartab.c (check_chartab_invariants):
+ * chartab.c (clone_chartab_table):
+ * chartab.c (create_new_chartab_table):
+ * chartab.c (free_chartab_table):
+ * chartab.c (compute_chartab_table_size_1):
+ * chartab.c (compute_chartab_table_size):
+ * chartab.c (compute_char_table_usage):
+ * chartab.c (char_table_memory_usage):
+ * chartab.c (put_chartab_table):
+ * chartab.c (put_char_table):
+ * chartab.c (map_chartab_table):
+ * chartab.c (check_if_blank):
+ * chartab.c (chartab_tables_equal):
+ * chartab.c (char_table_equal):
+ * chartab.c (hash_raw_chartab_val):
+ * chartab.c (char_table_hash):
+ * chartab.c (FROB1):
+ * chartab.c (init_chartab_tables):
+ * chartab.c (free_chartab_tables):
+ * chartab.c (print_table_entry):
+ * chartab.c (print_char_table):
+ * chartab.c (Fmake_char_table):
+ * chartab.c (Fcopy_char_table):
+ * chartab.c (Fget_char_table):
+ * chartab.c (put_char_table_range):
+ * chartab.c (Fput_char_table):
+ * chartab.c (map_char_table):
+ * chartab.c (slow_map_char_table_fun):
+ * chartab.c (chartab_data_validate):
+ * chartab.c (chartab_instantiate):
+ * chartab.c (category_table_equal):
+ * chartab.c (check_char_in_category):
+ * chartab.c (slow_map_category_table_fun):
+ * chartab.c (syms_of_chartab):
+ * chartab.c (vars_of_chartab):
+ * chartab.c (structure_type_create_chartab):
+ * chartab.h:
+ * chartab.h (SUBTAB_ARRAY_FROM_SUBTAB):
+ * chartab.h (ALLOCATE_LEVEL_1_CATEGORY_SUBTAB):
+ * chartab.h (GET_CHAR_BYTES):
+ * chartab.h (struct Lisp_Char_Table):
+ * chartab.h (enum chartab_range_type):
+ * chartab.h (CHAR_TABLES_PER_CATEGORY_TABLE):
+ * chartab.h (DESIGNATOR_TO_CHAR_TABLE):
+ * cmds.c (internal_self_insert):
+ * cmds.c (vars_of_cmds):
+ * editfns.c (Ftranslate_region):
+ * glyphs.c (display_table_entry):
+ * lrecord.h (enum lrecord_type):
+ * syntax.c (syntax_match):
+ * syntax.c (copy_to_mirrortab):
+ * syntax.c (copy_if_not_already_present):
+ * syntax.c (update_just_this_syntax_table):
+ * syntax.h:
+ * text.c:
+ * text.c (old_mule_round_up_to_valid_ichar):
+ * text.c (old_mule_round_down_to_valid_ichar):
+ * text.c (Fchar_octet):
+ Redo char tables and category tables again:
+
+ (1) In a category table, store one char table per bit. These char
+ tables are category char tables, and at level 1 instead of storing
+ a category subtable they store a size-256 bit vector.
+ Accordingly, the `category-subtable' object has been removed. All
+ the junk that involved multiplexing integral and Lisp_Object
+ values into a void * pointer has been removed. Now we're back to
+ always just using Lisp_Object pointers.
+
+ Accordingly, we rename/merge functions:
+
+ get_char_table_lisp merged into get_char_table
+ get_char_table_lisp_raw merged into get_char_table_raw
+ get_char_table_lisp_1 -> get_char_table_1
+
+ (2) We now efficiently store ranges of characters, not just single
+ characters. Essentially, at any level of the char tables, we can
+ store a non-table Lisp object, indicating that all the characters
+ spanned by that entry have the value of the given object. Accordingly,
+ put_char_table() now takes a range, and the function passed to
+ map_char_table() is given a range. Various places where
+ map_char_table() is called have been changed.
+
+ This also means that there is no more need for blank tables of the
+ sort that still exist in the Unicode tables.
+
+ IMPORTANT: The ranges passed in to put_char_table() may encompass
+ non-characters, especially in old-Mule where there are lots of gaps
+ in the representation of characters. put_char_table() simply stores
+ the ranges as-is, and does not try to avoid storing values for
+ non-characters. This doesn't affect get_char_table(), but it
+ definitely does affect map_char_table(). To deal with this, we
+ create new functions round_up_to_valid_ichar() and
+ round_down_to_valid_ichar() that are used to take raw ranges in
+ map_char_table() and coerce them into possibly smaller ranges where
+ the characters on both ends are valid characters. It may still be
+ the case that characters within the range are invalid.
+
+ This also means that if you copy a table in Lisp using
+ `map-char-table', the new table may not be `equal' to the first table
+ -- essentially, the "junk" areas where non-characters lie may be
+ set in the original table but unset in the new table, and
+ char_table_equal() isn't smart enough to ignore those "junk" areas.
+ Possibly this can be fixed by making map_char_table() expand its
+ ranges to the maximum extent.
+
+ * text.h:
+ * unicode.c (decode_utf_8):
+ * unicode.c (unicode_decode):
+
+ Rename the following functions and make them work correctly when
+ passed any Unicode codepoint:
+
+ valid_utf_16_first_surrogate() -> valid_unicode_leading_surrogate()
+ valid_utf_16_last_surrogate() -> valid_unicode_trailing_surrogate()
+ valid_utf_16_surrogate() -> valid_unicode_surrogate()
+
+ Change the definition of valid_unicode_codepoint_p() to reject
+ codepoints in the surrogate area. This also affects valid_ichar_p().
+ Also make valid_ichar_p() reject characters that don't fit into
+ 30 bits, which is as much as we can store. Create ICHAR_MAX to
+ represent the maximum Ichar that fits in 30 bits.
+ Use the newer definition of valid_unicode_codepoint_p() in
+ decode_utf_8() rather than manually duplicating it.
+
2010-03-25 Ben Wing <ben at xemacs.org>
* chartab.c: Expand comment at top about recent change involving
diff -r ba0773838e57 -r 93f4f44ba25f src/abbrev.c
--- a/src/abbrev.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/abbrev.c Mon Mar 29 15:46:27 2010 -0500
@@ -1,6 +1,6 @@
/* Primitives for word-abbrev mode.
Copyright (C) 1985, 1986, 1992, 1993 Free Software Foundation, Inc.
- Copyright (C) 2002 Ben Wing.
+ Copyright (C) 2002, 2010 Ben Wing.
This file is part of XEmacs.
@@ -440,7 +440,7 @@
static void
describe_abbrev (Lisp_Object sym, Lisp_Object stream)
{
- Lisp_Object one, count, system_flag;
+ Lisp_Object count, system_flag;
/* This function can GC */
struct buffer *buf = current_buffer;
@@ -458,23 +458,22 @@
if (NILP (XSYMBOL_VALUE (sym)))
return;
- one = make_int (1);
Fprin1 (Fsymbol_name (sym), stream);
if (!NILP (system_flag))
{
buffer_insert_ascstring (buf, " (sys)");
- Findent_to (make_int (20), one, Qnil);
+ Findent_to (make_int (20), Qone, Qnil);
}
else
- Findent_to (make_int (15), one, Qnil);
+ Findent_to (make_int (15), Qone, Qnil);
Fprin1 (count, stream);
- Findent_to (make_int (20), one, Qnil);
+ Findent_to (make_int (20), Qone, Qnil);
Fprin1 (XSYMBOL_VALUE (sym), stream);
if (!NILP (XSYMBOL (sym)->function))
{
- Findent_to (make_int (45), one, Qnil);
+ Findent_to (make_int (45), Qone, Qnil);
Fprin1 (XSYMBOL (sym)->function, stream);
}
buffer_insert_ascstring (buf, "\n");
diff -r ba0773838e57 -r 93f4f44ba25f src/alloc.c
--- a/src/alloc.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/alloc.c Mon Mar 29 15:46:27 2010 -0500
@@ -1905,6 +1905,17 @@
set_bit_vector_bit (p, i, bytevec[i]);
return wrap_bit_vector (p);
+}
+
+Lisp_Object
+clone_bit_vector (Lisp_Object bitvec)
+{
+ Elemcount len = bit_vector_length (XBIT_VECTOR (bitvec));
+ Elemcount num_longs = BIT_VECTOR_LONG_STORAGE (len);
+ Lisp_Object newvec = make_bit_vector (len, Qzero);
+ memcpy (XBIT_VECTOR (newvec)->bits, XBIT_VECTOR (bitvec)->bits,
+ num_longs * sizeof (long));
+ return newvec;
}
DEFUN ("make-bit-vector", Fmake_bit_vector, 2, 2, 0, /*
@@ -5440,6 +5451,9 @@
#ifndef Qzero
Qzero = make_int (0); /* Only used if Lisp_Object is a union type */
#endif
+#ifndef Qone
+ Qone = make_int (1); /* Only used if Lisp_Object is a union type */
+#endif
#ifndef Qnull_pointer
/* C guarantees that Qnull_pointer will be initialized to all 0 bits,
diff -r ba0773838e57 -r 93f4f44ba25f src/callint.c
--- a/src/callint.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/callint.c Mon Mar 29 15:46:27 2010 -0500
@@ -988,7 +988,7 @@
(raw))
{
if (NILP (raw))
- return make_int (1);
+ return Qone;
if (EQ (raw, Qminus))
return make_int (-1);
if (INTP (raw))
@@ -996,7 +996,7 @@
if (CONSP (raw) && INTP (XCAR (raw)))
return XCAR (raw);
- return make_int (1);
+ return Qone;
}
void
diff -r ba0773838e57 -r 93f4f44ba25f src/casetab.c
--- a/src/casetab.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/casetab.c Mon Mar 29 15:46:27 2010 -0500
@@ -204,7 +204,7 @@
case_table_char (Lisp_Object ch, Lisp_Object table)
{
Lisp_Object ct_char;
- ct_char = get_char_table_lisp (XCHAR (ch), table);
+ ct_char = get_char_table (XCHAR (ch), table);
if (NILP (ct_char))
return ch;
else
@@ -300,41 +300,53 @@
}
static int
-compute_canon_mapper (Lisp_Object UNUSED (table), Ichar code, void *val,
- void *arg)
+compute_canon_mapper (Lisp_Object UNUSED (table), Ichar from,
+ Ichar to, Lisp_Object val, void *arg)
{
Lisp_Object casetab = GET_LISP_FROM_VOID (arg);
- SET_TRT_TABLE_OF (XCASE_TABLE_CANON (casetab), code,
- TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (casetab),
- TRT_TABLE_OF (XCASE_TABLE_UPCASE (casetab),
- XCHAR (GET_LISP_FROM_VOID
- (val)))));
+ Ichar code;
+
+ for (code = from; code <= to; code++)
+ if (valid_ichar_p (code))
+ SET_TRT_TABLE_OF (XCASE_TABLE_CANON (casetab), code,
+ TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (casetab),
+ TRT_TABLE_OF (XCASE_TABLE_UPCASE
+ (casetab),
+ XCHAR (val))));
return 0;
}
static int
-initialize_identity_mapper (Lisp_Object UNUSED (table), Ichar code,
- void * UNUSED (val), void *arg)
+initialize_identity_mapper (Lisp_Object UNUSED (table), Ichar from,
+ Ichar to, Lisp_Object UNUSED (val), void *arg)
{
Lisp_Object trt = GET_LISP_FROM_VOID (arg);
- SET_TRT_TABLE_OF (trt, code, code);
+ Ichar code;
+
+ for (code = from; code <= to; code++)
+ if (valid_ichar_p (code))
+ SET_TRT_TABLE_OF (trt, code, code);
return 0;
}
static int
-compute_up_or_eqv_mapper (Lisp_Object UNUSED (table), Ichar code,
- void *val, void *arg)
+compute_up_or_eqv_mapper (Lisp_Object UNUSED (table), Ichar from,
+ Ichar to, Lisp_Object val, void *arg)
{
Lisp_Object inverse = GET_LISP_FROM_VOID (arg);
- Ichar toch = XCHAR (GET_LISP_FROM_VOID (val));
+ Ichar code;
+ Ichar toch = XCHAR (val);
- if (code != toch)
+ for (code = from; code <= to; code++)
{
- Ichar c = TRT_TABLE_OF (inverse, toch);
- SET_TRT_TABLE_OF (inverse, toch, code);
- SET_TRT_TABLE_OF (inverse, code, c);
+ if (valid_ichar_p (code) && code != toch)
+ {
+ Ichar c = TRT_TABLE_OF (inverse, toch);
+ SET_TRT_TABLE_OF (inverse, toch, code);
+ SET_TRT_TABLE_OF (inverse, code, c);
+ }
}
return 0;
diff -r ba0773838e57 -r 93f4f44ba25f src/casetab.h
--- a/src/casetab.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/casetab.h Mon Mar 29 15:46:27 2010 -0500
@@ -109,7 +109,7 @@
)
{
Lisp_Object TRT_char;
- TRT_char = get_char_table_lisp (ch, table);
+ TRT_char = get_char_table (ch, table);
if (NILP (TRT_char))
return ch;
else
diff -r ba0773838e57 -r 93f4f44ba25f src/chartab.c
--- a/src/chartab.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/chartab.c Mon Mar 29 15:46:27 2010 -0500
@@ -44,6 +44,11 @@
char table, to a special `category-table' object with 12
subtables, each directly storing an 8-bit bit array in place
of the Lisp_Object pointer.
+ Ben Wing: Redo char tables and category tables again, March 2010.
+ Now category tables have one char table per bit, so we have
+ 95 of them per category table. At level 1, instead of a
+ category-subtable, we have a 256-element bit vector. Char tables
+ also now can efficiently handle ranges.
*/
#include <config.h>
@@ -76,6 +81,11 @@
enum char_table_type type,
Error_Behavior errb);
+/* For a single entry at the specified level, how many characters does it
+ span? Or more correctly, if the span is described as a closed interval
+ [0, X], what is X? */
+static Ichar chartab_span_top[5];
+
/* A char table maps from characters to values.
@@ -87,29 +97,53 @@
charsets, and so the implementation was changed to use page tables,
similarly to how Unicode conversion maps are handled.
- Another possibility would be to use range tables. I think GNU Emacs
- allows char tables of both kinds, or something similar, although I don't
- know how it chooses one or the other.
+ Implementation is as follows:
- The type of value stored is normally a Lisp_Object. However, for
- category tables we instead store bit arrays. The bit arrays may also be
- smaller than the 32 (or 64) bits used to store a Lisp object pointer.
- Generally, this is more efficient than using 32 bits, as it's often only
- a single category that touches a whole lot of characters.
+ There are up to four levels, corresponding to the four bytes in a 32-bit
+ character. Each level is implemented by a char-subtable, which wraps a
+ size-256 array of Lisp_Object pointers. The value of each object in the
+ array can either be another char-subtable, corresponding to the next
+ level down, or some other value, indicating that all characters spanned
+ by this entry have the same value. A char-table contains a pointer to
+ the level-4 char-subtable, corresponding to the most-significant byte
+ (the highest 8 bits in the character).
- Possible uses:
+ The actual number of levels used by the char table may be less than 4.
+ For example, if no characters above 0xFFFFFF are given values, there
+ will be at most 3 levels. If no characters above 0xFFFF are given
+ values, there will be at most 2 levels, etc.
+
+ If MAXIMIZE_CHAR_TABLE_DEPTH is set, char tables always use the maximum
+ number of levels. This is currently the case in non-Mule, where there
+ is only one level anyway.
+
+ Category tables use a special type of char table, called a "category
+ char table". A category table is its own type of object, containing up
+ to 95 category char tables, one per category. At level 1 of a category
+ char table, instead of there being a char-subtable corresponding to the
+ lowest 8 bits of a character, there is a 256-element bit vector, with
+ the bits specifying whether the 256 characters in the range spanned by
+ the bit vector belong or don't belong to the category.
+
+ The possible values assignable to a character vary depending on the type
+ of table. At one extreme `generic' char tables allow any type of object
+ to be assigned (except Qunbound and `char-subtable' objects, which are
+ internal objects that shouldn't escape to the user level anyway). At
+ the other extreme, category tables only allow two possible values to be
+ assigned, indicating membership or nonmembership in the category.
+
+ Another implementation possibility would be to use range tables. I
+ think GNU Emacs allows char tables of both kinds (page tables and range
+ tables), or something similar, although I don't know how it chooses one
+ or the other.
+
+ Char tables are used to implement the following types of tables, among
+ others:
1) category tables
2) syntax tables
3) display tables
4) case tables
-
-
- 5) keyboard-translate-table?
-
- We provide an
- abstract type to generalize the Emacs vectors and Mule
- vectors-of-vectors goo.
*/
/************************************************************************/
@@ -122,8 +156,6 @@
code and there are a lot of differences. I originally tried abstracting
using preprocessing, but it got real ugly real fast. This is even more
the case now that char tables can use Lisp objects for their subtables. */
-
-static SUBTAB_TYPE chartab_blank[5];
static const struct memory_description char_subtable_description[] = {
{ XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Char_Subtable, ptr), 256 },
@@ -146,115 +178,101 @@
char_subtable_description,
Lisp_Char_Subtable);
-#ifdef MULE
-
-/************************************************************************/
-/* Category Table subtables */
-/************************************************************************/
-
-static SUBTAB_TYPE category_chartab_blank[5];
-
-static const struct memory_description category_subtable_description[] = {
- { XD_END }
-};
-
-DEFINE_DUMPABLE_INTERNAL_LISP_OBJECT ("category-subtable", category_subtable,
- 0, category_subtable_description,
- Lisp_Category_Subtable);
-
-#endif /* MULE */
-
/************************************************************************/
/* Char table implementation */
/************************************************************************/
+#ifdef ERROR_CHECK_STRUCTURES
+
static void
-init_blank_chartab_tables_1 (Lisp_Object *blank, int catp)
+check_chartab_invariants (Lisp_Object table, int level, int catp)
{
- int i;
-
- blank[1] = ALLOCATE_LEVEL_1_SUBTAB (catp);
- blank[2] = ALLOCATE_LEVEL_N_SUBTAB ();
- blank[3] = ALLOCATE_LEVEL_N_SUBTAB ();
- blank[4] = ALLOCATE_LEVEL_N_SUBTAB ();
- for (i = 0; i < 256; i++)
+ assert (level >= 0 && level <= 4);
+ /* No subtables at the lowest level */
+ if (level == 0)
+ assert (!CHAR_SUBTABLEP (table));
+ if (catp)
{
- if (!catp)
- BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (blank[1])[i] = Qunbound;
- SUBTAB_ARRAY_FROM_SUBTAB (blank[2])[i] = blank[1];
- SUBTAB_ARRAY_FROM_SUBTAB (blank[3])[i] = blank[2];
- SUBTAB_ARRAY_FROM_SUBTAB (blank[4])[i] = blank[3];
+ /* Category tables have a bit vector instead of a char subtable
+ at level 1, and the only possible non-table values are Qone
+ or Qunbound */
+ if (level == 1 && BIT_VECTORP (table))
+ return;
+ if (level > 1 && CHAR_SUBTABLEP (table))
+ return;
+ assert (ONEP (table) || UNBOUNDP (table));
}
}
-static void
-init_blank_chartab_tables (void)
+#else
+
+#define check_chartab_invariants(table, level, catp) DO_NOTHING
+
+#endif
+
+static Lisp_Object
+clone_chartab_table (Lisp_Object table, int level, int catp)
{
- init_blank_chartab_tables_1 (chartab_blank, 0);
-}
-
-static SUBTAB_TYPE
-copy_chartab_table (SUBTAB_TYPE table, int level, int catp)
-{
- SUBTAB_TYPE newtab;
+ Lisp_Object newtab;
Bytecount size;
- text_checking_assert (level >= 1 && level <= 4);
- /* WARNING: sizeof (Lisp_Object) maybe != sizeof (SUBTAB_TYPE). */
- if (level == 1)
+ check_chartab_invariants (table, level, catp);
+
+ if (!CHAR_SUBTABLEP (table))
{
- size = (catp
- ? sizeof (CATEGORY_TAB_BASE_TYPE)
- : sizeof (CHARTAB_BASE_TYPE));
- newtab = ALLOCATE_LEVEL_1_SUBTAB (catp);
- memcpy (BASE_TYPE_ARRAY_FROM_SUBTAB (newtab, catp),
- BASE_TYPE_ARRAY_FROM_SUBTAB (table, catp),
- 256 * size);
- }
- else
- {
- size = sizeof (SUBTAB_TYPE);
- newtab = ALLOCATE_LEVEL_N_SUBTAB ();
- memcpy (SUBTAB_ARRAY_FROM_SUBTAB (newtab),
- SUBTAB_ARRAY_FROM_SUBTAB (table),
- 256 * size);
+ if (catp && BIT_VECTORP (table))
+ return clone_bit_vector (table);
+ return table;
}
- if (level >= 2)
- {
- int i;
- SUBTAB_ARRAY_TYPE tab = SUBTAB_ARRAY_FROM_SUBTAB (newtab);
- for (i = 0; i < 256; i++)
- {
- if (!SUBTAB_EQ (tab[i], SUBTAB_BLANK (catp)[level - 1]))
- tab[i] = copy_chartab_table (tab[i], level - 1, catp);
- }
- }
+ size = sizeof (Lisp_Object);
+ newtab = ALLOCATE_LEVEL_N_SUBTAB ();
+ memcpy (SUBTAB_ARRAY_FROM_SUBTAB (newtab), SUBTAB_ARRAY_FROM_SUBTAB (table),
+ 256 * size);
+
+ {
+ int i;
+ Lisp_Object *tab = SUBTAB_ARRAY_FROM_SUBTAB (newtab);
+ for (i = 0; i < 256; i++)
+ tab[i] = clone_chartab_table (tab[i], level - 1, catp);
+ }
return newtab;
}
-static SUBTAB_TYPE
+static Lisp_Object
create_new_chartab_table (int level, int catp)
{
- return copy_chartab_table (SUBTAB_BLANK (catp)[level], level, catp);
+ Lisp_Object newtab;
+
+ if (catp && level == 1)
+ return ALLOCATE_LEVEL_1_CATEGORY_SUBTAB ();
+
+ newtab = ALLOCATE_LEVEL_N_SUBTAB ();
+ {
+ int i;
+ Lisp_Object *tab = SUBTAB_ARRAY_FROM_SUBTAB (newtab);
+ for (i = 0; i < 256; i++)
+ tab[i] = Qunbound;
+ }
+
+ return newtab;
}
static void
-free_chartab_table (SUBTAB_TYPE table, int level, int catp)
+free_chartab_table (Lisp_Object table)
{
- if (level >= 2)
- {
- int i;
- SUBTAB_ARRAY_TYPE tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
+ if (!CHAR_SUBTABLEP (table))
+ return;
- for (i = 0; i < 256; i++)
- {
- if (!SUBTAB_EQ (tab[i], SUBTAB_BLANK (catp)[level - 1]))
- free_chartab_table (tab[i], level - 1, catp);
- }
- }
+ {
+ int i;
+ Lisp_Object *tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
+
+ for (i = 0; i < 256; i++)
+ free_chartab_table (tab[i]);
+ }
FREE_ONE_SUBTAB (table);
}
@@ -270,44 +288,37 @@
};
static Bytecount
-compute_chartab_table_size_1 (SUBTAB_TYPE table, int level, int catp,
- struct usage_stats *stats)
+compute_chartab_table_size_1 (Lisp_Object table, int catp)
{
Bytecount size = 0;
- if (level >= 2)
- {
- int i;
- SUBTAB_ARRAY_TYPE tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
- for (i = 0; i < 256; i++)
- {
- if (!SUBTAB_EQ (tab[i], SUBTAB_BLANK (catp)[level - 1]))
- size += compute_chartab_table_size_1 (tab[i], level - 1,
- catp, stats);
- }
- }
+ if (catp && BIT_VECTORP (table))
+ return lisp_object_memory_usage (table);
+ if (!CHAR_SUBTABLEP (table))
+ return 0;
+
+ {
+ int i;
+ Lisp_Object *tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
+ for (i = 0; i < 256; i++)
+ size += compute_chartab_table_size_1 (tab[i], catp);
+ }
- size += SUBTAB_STORAGE_SIZE (table, level, stats);
+ size += lisp_object_memory_usage (table);
return size;
}
static Bytecount
-compute_chartab_table_size (Lisp_Object chartab, int catp,
- struct usage_stats *stats)
+compute_chartab_table_size (Lisp_Object chartab, int catp)
{
- return (compute_chartab_table_size_1
- (XCHAR_TABLE_TABLE (chartab),
- XCHAR_TABLE_LEVELS (chartab),
- catp, stats));
+ return (compute_chartab_table_size_1 (XCHAR_TABLE_TABLE (chartab), catp));
}
static void
-compute_char_table_usage (Lisp_Object chartab, struct char_table_stats *stats,
- struct usage_stats *ovstats)
+compute_char_table_usage (Lisp_Object chartab, struct char_table_stats *stats)
{
stats->page_tables +=
- compute_chartab_table_size (chartab, XCHAR_TABLE_CATEGORY_P (chartab),
- ovstats);
+ compute_chartab_table_size (chartab, XCHAR_TABLE_CATEGORY_P (chartab));
}
@@ -317,28 +328,94 @@
{
struct char_table_stats *stats = (struct char_table_stats *) gustats;
- compute_char_table_usage (char_table, stats, &stats->u);
+ compute_char_table_usage (char_table, stats);
}
#endif /* MEMORY_USAGE_STATS */
-/* Note: for category tables, VAL must be an integer. The lower 8 bits
- specify a bit to set or reset. The upper bits specify an operation
- to perform and are an `enum put_category_operation'. */
+
+/* Set all characters in the range [START, END] to VAL. *TABLE is the
+ table value at this level -- either an array of 256 elements, a bit
+ vector of 256 elements (for category char tables, at level 1), or some
+ other value, specifying that all characters spanned by this entry have
+ that value. Qunbound as a value means that the characters spanned by
+ this entry all have no defined value. LEVEL is the depth (1 - 4).
+ OFFSET is the character offset corresponding to this table (essentially,
+ the bits for all levels above this one -- e.g. if level == 3, offset
+ will have bits 0-23 cleared and bits 24-31 set to the index of this
+ table in the level-4 table, indicating the span of this entry). */
+
+static void
+put_chartab_table (Lisp_Object *table, int level, int catp,
+ int offset, Ichar start, Ichar end, Lisp_Object val)
+{
+ int i;
+ int startind, endind;
+ Lisp_Object *tab;
+
+ check_chartab_invariants (*table, level, catp);
+
+ /* Are we setting the entire spanned range? If so, we don't need a
+ table at this level. If there was one previously, free it. */
+ if (start <= offset && offset + chartab_span_top[level] <= end)
+ {
+ if (CHAR_SUBTABLEP (*table))
+ free_chartab_table (*table);
+ *table = val;
+ return;
+ }
+
+ /* Else, we need a table. */
+
+ /* Compute the start and end indices into the table */
+ startind = max (0, (start - offset) >> ((level - 1) * 8));
+ endind = min (255, (end - offset) >> ((level - 1) * 8));
+ structure_checking_assert (startind <= 255);
+ structure_checking_assert (endind >= 0);
+ structure_checking_assert (startind <= endind);
+
+ if (!SUBTAB_TABLE_P (*table, catp))
+ *table = create_new_chartab_table (level, catp);
+
+ /* As usual, have to special-case for category tables. If we see
+ a bit vector, then necessarily we're at level 1. */
+ if (BIT_VECTORP (*table))
+ {
+ int bitval = ONEP (val);
+ structure_checking_assert (level == 1);
+ for (i = startind; i <= endind; i++)
+ set_bit_vector_bit (XBIT_VECTOR (*table), i, bitval);
+ return;
+ }
+
+ tab = SUBTAB_ARRAY_FROM_SUBTAB (*table);
+
+ /* Optimize the level 1 case. We could leave this out and go through
+ put_chartab_table() as normal, but we may as well write the simpler
+ and faster code. */
+ if (level == 1)
+ {
+ for (i = startind; i <= endind; i++)
+ tab[i] = val;
+ return;
+ }
+
+ for (i = startind; i <= endind; i++)
+ {
+ put_chartab_table (&tab[i], level - 1, catp,
+ offset + (i << ((level - 1) * 8)), start, end, val);
+ }
+}
void
-put_char_table (Lisp_Object chartab, Ichar ch, Lisp_Object val)
+put_char_table (Lisp_Object chartab, Ichar start, Ichar end, Lisp_Object val)
{
- /* #### NOTE NOTE NOTE!
+ /*
- If it turns out that people are often setting large ranges to a
- particular value (and particularly so if we have to implement the FSF
- characteristic of allowing `t' to signify *all* characters), then we
- should consider either (a) modifying things so that the subtables are
- actual Lisp objects and at any level there can either be a subtable or
- some other Lisp object, which signifies the value everywhere at and below
- that level (then we also don't need blank tables; instead we just use
- Qunbound); (b) modifying the code that loops over a range to create
+ [[
+
+ Another possibility for handling the setting of large ranges to a
+ particular value: modifying the code that loops over a range to create
shared subtables, similar to the current blank tables. (Then, we would
need to implement reference-counting over the tables, to know when to
free them, and copy-on-write semantics if the reference count is greater
@@ -353,48 +430,62 @@
checks for each lookup. The Lisp-object scheme also suffers from the
same slightly-over-a-power-of-2 problem.)
- shared tables we check for could potentially be specific to the particular
+ Shared tables we check for could potentially be specific to the particular
char table; we'd keep track of the shared tables in the char-table object,
and check to see if they are shared with the generic blank tables.)
- If we don't do this, we should make sure to put in a call to QUIT
- periodically when setting a range so if someone does something stupid
- like set a range of (0,2000000000), they can break out. We also need a
- big warning about this in the docs to `put-char-table' and such.
Maybe we should also allow for two different types of char tables, one
that allows for semi-efficient handling of large ranges and one that doesn't
(but is faster). In such a case it might make sense for there to be a
get_char_table() method pointer to avoid an if-check every time for the
type. Similarly if we allow the `always-maximize-table-size' option to
- be given. */
+ be given.
+
+ ]]
+
+ -- This comment is out-of-date now. It was written when the char table
+ implementation was more similar to the Unicode page-table implementation.
+ Currently we don't have blank tables any more and we do handle ranges
+ efficiently. But I'll leave the comment for the moment since it may have
+ some useful stuff in it. --ben */
int levels;
- int u4, u3, u2, u1;
#ifndef MAXIMIZE_CHAR_TABLE_DEPTH
int code_levels;
#endif
int catp = XCHAR_TABLE_CATEGORY_P (chartab);
- text_checking_assert (valid_ichar_p (ch));
- CHARTAB_BREAKUP_CHAR_CODE ((int) ch, u4, u3, u2, u1, code_levels);
+ /* DO NOT check to see whether START and END are valid Ichars. They
+ might not be (e.g. if we pass `t' to `put-char-table' so as to set
+ all characters, this function gets called with ICHAR_MAX as the value
+ of END. Under old-Mule, that isn't valid. We go ahead and ignore the
+ fact that we may be setting values for invalid characters. It won't
+ be a problem when retrieving values either, since when retrieving we
+ will always be passed valid Ichars. Only when mapping do we have
+ to worry, and then we find the nearest valid Ichar up or down. */
+
+ GET_CHAR_LEVELS (end, code_levels);
levels = CHARTAB_LEVELS (XCHAR_TABLE_LEVELS (chartab));
text_checking_assert (levels >= 1 && levels <= 4);
#ifndef MAXIMIZE_CHAR_TABLE_DEPTH
/* Make sure the chartab's tables have at least as many levels as
- the code point has: Note that the table is guaranteed to have
- at least one level, because it was created that way */
+ the code point has. */
if (levels < code_levels)
{
- int i;
+ /* If nothing is currently set at all, or we're going to
+ replace everything that's been set, no point in expanding
+ out the tables because they'll just get replaced */
+ if (!UNBOUNDP (XCHAR_TABLE_TABLE (chartab)) &&
+ !(start == 0 && end >= chartab_span_top[levels]))
+ {
+ int i;
- for (i = 2; i <= code_levels; i++)
- {
- if (levels < i)
+ for (i = levels + 1; i <= code_levels; i++)
{
- SUBTAB_TYPE old_table = XCHAR_TABLE_TABLE (chartab);
- SUBTAB_TYPE table = create_new_chartab_table (i, catp);
+ Lisp_Object old_table = XCHAR_TABLE_TABLE (chartab);
+ Lisp_Object table = create_new_chartab_table (i, catp);
XCHAR_TABLE_TABLE (chartab) = table;
SUBTAB_ARRAY_FROM_SUBTAB (table)[0] = old_table;
}
@@ -405,175 +496,95 @@
}
#endif /* not MAXIMIZE_CHAR_TABLE_DEPTH */
- /* Now, make sure there is a non-default table at each level */
- {
- int i;
- SUBTAB_TYPE table = XCHAR_TABLE_TABLE (chartab);
-
- for (i = levels; i >= 2; i--)
- {
- int ind;
-
- switch (i)
- {
- case 4: ind = u4; break;
- case 3: ind = u3; break;
- case 2: ind = u2; break;
- default: ABORT (); ind = 0;
- }
-
- if (SUBTAB_EQ (SUBTAB_ARRAY_FROM_SUBTAB (table)[ind],
- SUBTAB_BLANK (catp)[i - 1]))
- SUBTAB_ARRAY_FROM_SUBTAB (table)[ind] =
- create_new_chartab_table (i - 1, catp);
- table = SUBTAB_ARRAY_FROM_SUBTAB (table)[ind];
- }
- }
-
- /* Finally, set the character */
-
- {
- register SUBTAB_TYPE table = XCHAR_TABLE_TABLE (chartab);
- /* We are really helping the compiler here. CHARTAB_LEVELS() will
- evaluate to a constant when MAXIMIZE_CHAR_TABLE_DEPTH is true,
- so any reasonable optimizing compiler should eliminate the
- switch entirely. */
- switch (CHARTAB_LEVELS (levels))
- {
-#if 1 /* The new way */
- /* fall through */
- case 4: table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u4];
- case 3: table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u3];
- case 2: table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u2];
- case 1:
- if (catp)
- {
-#ifdef MULE
- int ind = XINT (val);
- enum put_category_operation op =
- (enum put_category_operation) (ind >> 8);
- ind &= 0xFF;
- switch (op)
- {
- case PUT_CATEGORY_SET:
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table)[u1] |=
- BIT_INDEX_TO_SET_MASK (ind);
- break;
-
- case PUT_CATEGORY_UNSET:
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table)[u1] &=
- BIT_INDEX_TO_CLEAR_MASK (ind);
- break;
-
- case PUT_CATEGORY_RESET:
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table)[u1] = 0;
- break;
-
- case PUT_CATEGORY_RESET_AND_SET:
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table)[u1] =
- BIT_INDEX_TO_SET_MASK (ind);
- break;
- default:
- ABORT ();
- }
-#else
- ABORT ();
-#endif /* (not) MULE */
- }
- else
- BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (table)[u1] = val;
-#else /* The old way */
- /* #### Won't work with category char tables */
- case 1: ((Lisp_Object *) table)[u1] = val; break;
- case 2: ((Lisp_Object **) table)[u2][u1] = val; break;
- case 3: ((Lisp_Object ***) table)[u3][u2][u1] = val; break;
- case 4: ((Lisp_Object ****) table)[u4][u3][u2][u1] = val; break;
-#endif
- }
- }
+ put_chartab_table (&XCHAR_TABLE_TABLE (chartab), levels,
+ XCHAR_TABLE_CATEGORY_P (chartab), 0, start, end, val);
}
-/* Map over all characters in the range [START, END]. TABLE is an array
- of 256 elements, LEVEL is the depth (1 - 4). OFFSET is the character
- offset corresponding to this table. CHARTAB is the char-table object
- being mapped over. The FN will be called with CHARTAB, the code of the
- character in question, its value, and the value of ARG. Stops mapping
- the first time that FN returns non-zero, and returns that value.
- Returns zero if mapping got all the way to the end. */
+/* Map over all characters in the range [START, END]. TABLE is the table
+ value at this level -- either an array of 256 elements, a bit vector of
+ 256 elements (for category char tables, at level 1), or some other
+ value, specifying that all characters spanned by this entry have that
+ value. Qunbound as a value means that the characters spanned by this
+ entry all have no defined value. LEVEL is the depth (1 - 4). OFFSET is
+ the character offset corresponding to this table. CHARTAB is the
+ char-table object being mapped over. The FN will be called with
+ CHARTAB, the code of the character in question, its value, and the value
+ of ARG. Stops mapping the first time that FN returns non-zero, and
+ returns that value. Returns zero if mapping got all the way to the
+ end. */
static int
-map_chartab_table (SUBTAB_TYPE table, int level, int offset, int start,
- int end, Lisp_Object chartab,
- int (*fn) (Lisp_Object chartab, Ichar code, void *val,
- void *arg),
+map_chartab_table (Lisp_Object table, int level, int offset, Ichar start,
+ Ichar end, Lisp_Object chartab,
+ int (*fn) (Lisp_Object chartab, Ichar from, Ichar to,
+ Lisp_Object val, void *arg),
void *arg)
{
int i;
- int startind = max (0, (start - offset) >> ((level - 1) * 8));
- int endind = min (255, (end - offset) >> ((level - 1) * 8));
+ int startind, endind;
int catp = XCHAR_TABLE_CATEGORY_P (chartab);
+ check_chartab_invariants (table, level, catp);
+
+ if (UNBOUNDP (table))
+ return 0;
+
+ if (!SUBTAB_TABLE_P (table, catp))
+ {
+ Ichar from, to;
+ int retval;
+
+ if (level == 0)
+ {
+ structure_checking_assert (start <= offset);
+ structure_checking_assert (offset <= end);
+ }
+ from = max (start, offset);
+ to = min (end, offset + chartab_span_top[level]);
+ from = round_up_to_valid_ichar (from);
+ to = round_down_to_valid_ichar (to);
+ if (from <= to && from >= 0)
+ {
+ retval = (fn) (chartab, from, to, table, arg);
+ return retval;
+ }
+ return 0;
+ }
+
+
+ startind = max (0, (start - offset) >> ((level - 1) * 8));
+ endind = min (255, (end - offset) >> ((level - 1) * 8));
structure_checking_assert (startind <= 255);
structure_checking_assert (endind >= 0);
structure_checking_assert (startind <= endind);
+
+ if (BIT_VECTORP (table))
+ {
+ for (i = startind; i <= endind; i++)
+ {
+ if (bit_vector_bit (XBIT_VECTOR (table), i) &&
+ valid_ichar_p (offset + i))
+ {
+ int retval = (fn) (chartab, offset + i, offset + i, Qone, arg);
+ if (retval)
+ return retval;
+ }
+ }
+ return 0;
+ }
- switch (level)
- {
- case 1:
+ {
+ Lisp_Object *tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
+ for (i = startind; i <= endind; i++)
{
- if (catp)
- {
- CATEGORY_TAB_BASE_TYPE *tab =
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table);
- for (i = startind; i <= endind; i++)
- {
- if (tab[i])
- {
- int retval = (fn) (chartab, offset + i,
- (void *) (EMACS_INT) tab[i], arg);
- if (retval)
- return retval;
- }
- }
- }
- else
- {
- Lisp_Object *tab = BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (table);
- for (i = startind; i <= endind; i++)
- {
- if (!UNBOUNDP (tab[i]))
- {
- int retval = (fn) (chartab, offset + i,
- STORE_LISP_IN_VOID (tab[i]), arg);
- if (retval)
- return retval;
- }
- }
- }
- break;
+ int retval =
+ map_chartab_table (tab[i], level - 1,
+ offset + (i << ((level - 1) * 8)),
+ start, end, chartab, fn, arg);
+ if (retval)
+ return retval;
}
- case 2:
- case 3:
- case 4:
- {
- SUBTAB_ARRAY_TYPE tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
- for (i = startind; i <= endind; i++)
- {
- if (!SUBTAB_EQ (tab[i], SUBTAB_BLANK (catp)[level - 1]))
- {
- int retval =
- map_chartab_table (tab[i], level - 1,
- offset + (i << ((level - 1) * 8)),
- start, end, chartab, fn, arg);
- if (retval)
- return retval;
- }
- }
- break;
- }
- default:
- ABORT ();
- }
+ }
return 0;
}
@@ -585,123 +596,107 @@
*/
static int
-check_if_blank (SUBTAB_TYPE table, int level, int start, int depth, int catp)
+check_if_blank (Lisp_Object table, int level, int start, int depth, int catp)
{
int i;
- switch (level)
+ check_chartab_invariants (table, level, catp);
+ if (UNBOUNDP (table))
+ return 1;
+
+ if (catp && BIT_VECTORP (table))
{
- case 1:
- {
- if (catp)
- {
- CATEGORY_TAB_BASE_TYPE *tab =
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table);
- for (i = start; i < 256; i++)
- if (tab[i])
- return 0;
- break;
- }
- else
- {
- CHARTAB_BASE_TYPE *tab = BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (table);
- for (i = start; i < 256; i++)
- {
- if (!UNBOUNDP (tab[i]))
- return 0;
- }
- break;
- }
- }
- case 2:
- case 3:
- case 4:
- {
- SUBTAB_ARRAY_TYPE tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
- for (i = start; i < 256; i++)
- {
- if (!SUBTAB_EQ (tab[i], SUBTAB_BLANK (catp)[level - 1]) &&
- !check_if_blank (tab[i], level - 1, 0, depth, catp))
- return 0;
- }
- break;
- }
- default:
- ABORT ();
+ for (i = start; i < 256; i++)
+ if (bit_vector_bit (XBIT_VECTOR (table), i))
+ return 0;
+ return 1;
}
- return 1;
+ if (!CHAR_SUBTABLEP (table))
+ return 0;
+
+ {
+ Lisp_Object *tab = SUBTAB_ARRAY_FROM_SUBTAB (table);
+ for (i = start; i < 256; i++)
+ if (!check_if_blank (tab[i], level - 1, 0, depth, catp))
+ return 0;
+ return 1;
+ }
}
static int
-chartab_tables_equal (SUBTAB_TYPE table1, SUBTAB_TYPE table2, int level,
+chartab_tables_equal (Lisp_Object table1, Lisp_Object table2, int level,
int depth, int foldcase, int catp)
{
int i;
- switch (level)
+ check_chartab_invariants (table1, level, catp);
+ check_chartab_invariants (table2, level, catp);
+
+ /* Things are made tricky by the fact that one of the values may be a
+ non-table value that applies to all characters spanned, and the other
+ may be a table all of whose members are EQ to that same value (or some
+ of whose members are sub-tables whose members in turn are EQ to that
+ same value, etc.), in which case the tables are equal. */
+
+ if (catp && level == 1)
{
- case 1:
- {
- if (catp)
- {
- CATEGORY_TAB_BASE_TYPE *tab1 =
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table1);
- CATEGORY_TAB_BASE_TYPE *tab2 =
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table2);
- if (memcmp (tab1, tab2, 256 * sizeof (CATEGORY_TAB_BASE_TYPE)))
- return 0;
- break;
- }
- else
- {
- CHARTAB_BASE_TYPE *tab1 =
- BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (table1);
- CHARTAB_BASE_TYPE *tab2 =
- BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (table2);
- for (i = 0; i < 256; i++)
- {
- if (!internal_equal_0 (tab1[i], tab2[i], depth + 1, foldcase))
- return 0;
- }
- break;
- }
- }
- case 2:
- case 3:
- case 4:
- {
- SUBTAB_ARRAY_TYPE tab1 = SUBTAB_ARRAY_FROM_SUBTAB (table1);
- SUBTAB_ARRAY_TYPE tab2 = SUBTAB_ARRAY_FROM_SUBTAB (table2);
- for (i = 0; i < 256; i++)
- {
- if (SUBTAB_EQ (tab1[i], SUBTAB_BLANK (catp)[level - 1]) &&
- SUBTAB_EQ (tab2[i], SUBTAB_BLANK (catp)[level - 1]))
- ;
- else if (SUBTAB_EQ (tab1[i], SUBTAB_BLANK (catp)[level - 1]))
- {
- if (!check_if_blank (tab2[i], level - 1, 0, depth, catp))
- return 0;
- }
- else if (SUBTAB_EQ (tab2[i], SUBTAB_BLANK (catp)[level - 1]))
- {
- if (!check_if_blank (tab1[i], level - 1, 0, depth, catp))
- return 0;
- }
- else
- {
- if (!chartab_tables_equal (tab1[1], tab2[1], level - 1,
- depth, foldcase, catp))
- return 0;
- }
- }
- break;
- }
- default:
- ABORT ();
+ int table2_val;
+
+ /* The level-1 values will be either integers or bit vectors. We
+ have to handle each possible combination in turn. */
+ if (!BIT_VECTORP (table1) && !BIT_VECTORP (table2))
+ return EQ (table1, table2);
+ if (BIT_VECTORP (table1) && BIT_VECTORP (table2))
+ return internal_equal (table1, table2, depth + 1);
+ /* If one is a bit vector and the other one isn't, make sure
+ the bit vector comes first. */
+ if (!BIT_VECTORP (table1))
+ {
+ Lisp_Object tmp = table1;
+ table1 = table2;
+ table2 = tmp;
+ }
+ table2_val = ONEP (table2);
+ for (i = 0; i < 256; i++)
+ if (bit_vector_bit (XBIT_VECTOR (table1), i) != table2_val)
+ return 0;
+ return 1;
}
- return 1;
+ /* In other circumstances we are dealing with char subtables or
+ non-subtable values (which can potentially be anything). */
+
+ if (!CHAR_SUBTABLEP (table1) && !CHAR_SUBTABLEP (table2))
+ return internal_equal_0 (table1, table2, depth + 1, foldcase);
+ if (CHAR_SUBTABLEP (table1) && CHAR_SUBTABLEP (table2))
+ {
+ Lisp_Object *tab1 = SUBTAB_ARRAY_FROM_SUBTAB (table1);
+ Lisp_Object *tab2 = SUBTAB_ARRAY_FROM_SUBTAB (table2);
+ for (i = 0; i < 256; i++)
+ if (!chartab_tables_equal (tab1[i], tab2[i], level - 1, depth,
+ foldcase, catp))
+ return 0;
+ return 1;
+ }
+
+ /* If one is a table and the other one isn't, make sure the table comes
+ first. */
+
+ if (!CHAR_SUBTABLEP (table1))
+ {
+ Lisp_Object tmp = table1;
+ table1 = table2;
+ table2 = tmp;
+ }
+ {
+ Lisp_Object *tab1 = SUBTAB_ARRAY_FROM_SUBTAB (table1);
+ for (i = 0; i < 256; i++)
+ if (!chartab_tables_equal (tab1[i], table2, level - 1, depth,
+ foldcase, catp))
+ return 0;
+ return 1;
+ }
}
static int
@@ -723,7 +718,7 @@
To speed things up, we should also keep track of the # of items
currently set. */
- SUBTAB_TYPE table;
+ Lisp_Object table;
int catp = XCHAR_TABLE_CATEGORY_P (obj1);
if (XCHAR_TABLE_TYPE (obj1) != XCHAR_TABLE_TYPE (obj2))
@@ -757,6 +752,11 @@
{
if (!check_if_blank (table, i, 1, depth, catp))
return 0;
+ if (!CHAR_SUBTABLEP (table))
+ {
+ assert (UNBOUNDP (table));
+ break;
+ }
table = SUBTAB_ARRAY_FROM_SUBTAB (table)[0];
}
}
@@ -778,13 +778,10 @@
static const Ascbyte *likely_test = "\t\n\r\f\016\025\0330128!@#$%^&*`'_+=-,.<>?;:/~()[]{}\\\"acehijlnortuxyzADEGIKMOQSVY";
static inline Hashcode
-hash_raw_chartab_val (Ichar ch, Lisp_Object chartab, int depth, int catp)
+hash_raw_chartab_val (Ichar ch, Lisp_Object chartab, int depth)
{
- void *val = get_char_table_raw (ch, chartab);
- if (catp)
- return (Hashcode) val;
- else
- return internal_hash (GET_LISP_FROM_VOID (val), depth + 1);
+ Lisp_Object val = get_char_table_raw (ch, chartab);
+ return internal_hash (val, depth + 1);
}
static Hashcode
@@ -795,15 +792,13 @@
depth + 1));
const Ascbyte *p;
Ichar ch;
- int catp = XCHAR_TABLE_CATEGORY_P (obj);
/* Hash those most likely to have values */
for (p = likely_test; *p; p++)
- hashval = HASH2 (hashval,
- hash_raw_chartab_val ((Ichar) *p, obj, depth, catp));
+ hashval = HASH2 (hashval, hash_raw_chartab_val ((Ichar) *p, obj, depth));
/* Hash some random Latin characters */
for (ch = 130; ch <= 255; ch += 5)
- hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth, catp));
+ hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth));
/* Don't bother trying to hash higher stuff if there is none. */
if (XCHAR_TABLE_LEVELS (obj) > 1)
{
@@ -816,16 +811,16 @@
change if the Unicode-to-charset tables are changed. */
/* Hash some random extended Latin characters */
for (ch = 260; ch <= 500; ch += 10)
- hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth, catp));
+ hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth));
/* Hash some higher characters */
for (ch = 500; ch <= 4000; ch += 50)
- hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth, catp));
+ hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth));
/* Hash some random CJK characters */
for (ch = 0x4E00; ch <= 0x9FFF; ch += 791)
- hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth, catp));
+ hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth));
/* Hash some random Hangul characters */
for (ch = 0xAC00; ch <= 0xD7AF; ch += 791)
- hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth, catp));
+ hashval = HASH2 (hashval, hash_raw_chartab_val (ch, obj, depth));
#elif defined (MULE)
/* 0xA1 is usually the first alphabetic character and differs across
charsets, whereas 0xA0 is no-break-space across many of them.
@@ -834,15 +829,13 @@
#define FROB1(cs) \
hashval = HASH2 (hashval, \
hash_raw_chartab_val (charset_codepoint_to_ichar_raw \
- (cs, 0, 0xA1), \
- obj, depth, catp))
+ (cs, 0, 0xA1), obj, depth))
/* 0x3021 is the first CJK character in a number of different CJK charsets
and differs across them. */
#define FROB2(cs) \
hashval = HASH2 (hashval, \
hash_raw_chartab_val (charset_codepoint_to_ichar_raw \
- (cs, 0x30, 0x21), \
- obj, depth, catp))
+ (cs, 0x30, 0x21), obj, depth))
FROB1 (Vcharset_latin_iso8859_2);
FROB1 (Vcharset_latin_iso8859_3);
FROB1 (Vcharset_latin_iso8859_4);
@@ -883,7 +876,6 @@
return XCHAR_TABLE_TABLE (obj);
}
-/* Allocate and blank the tables. */
static void
init_chartab_tables (Lisp_Object chartab)
{
@@ -891,9 +883,7 @@
and MULE, to 1 if MAXIMIZE_CHAR_TABLE_DEPTH and not MULE, and to
foo otherwise. */
XCHAR_TABLE_LEVELS (chartab) = CHARTAB_LEVELS (1);
- XCHAR_TABLE_TABLE (chartab) =
- create_new_chartab_table (XCHAR_TABLE_LEVELS (chartab),
- XCHAR_TABLE_CATEGORY_P (chartab));
+ XCHAR_TABLE_TABLE (chartab) = Qunbound;
}
static void
@@ -901,9 +891,7 @@
{
if (!UNBOUNDP (XCHAR_TABLE_TABLE (chartab)))
{
- free_chartab_table (XCHAR_TABLE_TABLE (chartab),
- XCHAR_TABLE_LEVELS (chartab),
- XCHAR_TABLE_CATEGORY_P (chartab));
+ free_chartab_table (XCHAR_TABLE_TABLE (chartab));
XCHAR_TABLE_TABLE (chartab) = Qunbound;
}
}
@@ -950,7 +938,8 @@
};
static int
-print_table_entry (Lisp_Object table, Ichar ch, void *val, void *arg)
+print_table_entry (Lisp_Object UNUSED (table), Ichar from, Ichar to,
+ Lisp_Object val, void *arg)
{
struct ptemap *a = (struct ptemap *) arg;
QUIT;
@@ -962,11 +951,14 @@
write_ascstring (a->printcharfun, "...");
return 1;
}
- write_fmt_string_lisp (a->printcharfun, "%s ", 1, make_char (ch));
- if (XCHAR_TABLE_CATEGORY_P (table))
- write_fmt_string (a->printcharfun, "#x%lx", (long) val);
+ if (from == to)
+ write_fmt_string_lisp (a->printcharfun, "%s %S", 2, make_char (from), val);
+ else if (print_readably)
+ write_fmt_string_lisp (a->printcharfun, "(%s %s) %S", 3,
+ make_char (from), make_char (to), val);
else
- write_fmt_string_lisp (a->printcharfun, "%S", 1, GET_LISP_FROM_VOID (val));
+ write_fmt_string_lisp (a->printcharfun, "%s-%s %S", 3,
+ make_char (from), make_char (to), val);
a->num_printed++;
return 0;
}
@@ -1003,8 +995,9 @@
else
write_ascstring (printcharfun, ")>");
- /* #### need to print and read the default; but that will allow the
- default to be modified, which we don't (yet) support -- but FSF does */
+ /* [[ #### need to print and read the default; but that will allow the
+ default to be modified, which we don't (yet) support -- but FSF does ]]
+ But we do support setting the default. --ben */
}
static const struct memory_description char_table_description[] = {
@@ -1302,13 +1295,6 @@
Each char table type is used for a different purpose and allows different
sorts of values. The different char table types are
-`category'
- Used internally for category tables. These are a special type of
- char tables , which specify the regexp categories that a
- character is in. The valid values are nil or a bit vector of 95
- elements, and values default to nil. Higher-level Lisp functions
- are provided for working with category tables. Currently categories
- and category tables only exist when Mule support is present.
`char'
A generalized char table, for mapping from one character to another.
Used for case tables, syntax matching tables,
@@ -1319,11 +1305,6 @@
`generic'
An even more generalized char table, for mapping from a character to
anything. The default result given by `get-char-table' is nil.
-`display'
- Used for display tables, which specify how a particular character is
- to appear when displayed. #### Not yet implemented; currently, the
- display table code uses generic char tables, and it's not clear that
- implementing this char table type would be useful.
`syntax'
Used for syntax tables, which specify the syntax of a particular
character. Higher-level Lisp functions are provided for
@@ -1331,6 +1312,18 @@
to be syntax codes as generated by `syntax-string-to-code'), and the
default result given by `get-char-table' is the syntax code for
`word'. (Note: In 21.4 and prior, it was the code for `inherit'.)
+`category'
+ Used internally for category tables. These are a special type of
+ char tables, which specify the regexp categories that a
+ character is in. It is not possible to create such a table from
+ Lisp. Instead, use the functions that are provided for working
+ with category tables (see `make-category-table'). Currently categories
+ and category tables only exist when Mule support is present.
+`display'
+ Used for display tables, which specify how a particular character is
+ to appear when displayed. #### Not yet implemented; currently, the
+ display table code uses generic char tables, and it's not clear that
+ implementing this char table type would be useful.
*/
(type))
{
@@ -1358,8 +1351,8 @@
ctnew->parent = ct->parent;
ctnew->default_ = ct->default_;
ctnew->levels = ct->levels;
- ctnew->table = copy_chartab_table (ct->table, ct->levels,
- CHAR_TABLE_CATEGORY_P (ct));
+ ctnew->table = clone_chartab_table (ct->table, ct->levels,
+ CHAR_TABLE_CATEGORY_P (ct));
obj = wrap_char_table (ctnew);
#ifdef MIRROR_TABLE
@@ -1419,7 +1412,7 @@
check_non_category_char_table (char_table);
CHECK_CHAR_COERCE_INT (character);
- return get_char_table_lisp (XCHAR (character), char_table);
+ return get_char_table (XCHAR (character), char_table);
}
static int
@@ -1519,7 +1512,7 @@
/* Assign VAL to all characters in RANGE in char table TABLE. */
-void
+static void
put_char_table_range (Lisp_Object table, struct chartab_range *range,
Lisp_Object val)
{
@@ -1530,6 +1523,19 @@
switch (range->type)
{
+ case CHARTAB_RANGE_ALL:
+ put_char_table (table, 0, ICHAR_MAX, val);
+ break;
+
+ case CHARTAB_RANGE_RANGE:
+ put_char_table (table, range->ch, range->chtop, val);
+ break;
+
+ case CHARTAB_RANGE_CHAR:
+ put_char_table (table, range->ch, range->ch, val);
+ break;
+
+
#ifdef MULE
case CHARTAB_RANGE_ROW:
{
@@ -1539,42 +1545,36 @@
}
case CHARTAB_RANGE_CHARSET:
+ get_charset_limits (range->charset, &l1, &l2, &h1, &h2);
+ iterate_charset:
+#ifdef UNICODE_INTERNAL
{
int i, j;
- get_charset_limits (range->charset, &l1, &l2, &h1, &h2);
- iterate_charset:
+
+ /* Under Unicode-internal, the characters in a charset row or in a
+ charset are non-contiguous and may not even exist */
for (i = l1; i <= h1; i++)
for (j = l2; j <= h2; j++)
{
Ichar ch = charset_codepoint_to_ichar_raw (range->charset, i, j);
if (ch >= 0)
- put_char_table (table, ch, val);
+ put_char_table (table, ch, ch, val);
}
+ break;
}
- break;
+#else /* not UNICODE_INTERNAL */
+ {
+ /* Under old-Mule, the characters in a charset row all exist and
+ are contiguous. The characters in a charset are contiguous
+ but with gaps in them; however, that's not an issue here. */
+ Ichar from = charset_codepoint_to_ichar_raw (range->charset, l1, l2);
+ Ichar to = charset_codepoint_to_ichar_raw (range->charset, h1, h2);
+ text_checking_assert (from >= 0);
+ text_checking_assert (to >= 0);
+ put_char_table (table, from, to, val);
+ }
+#endif /* (not) UNICODE_INTERNAL */
#endif /* MULE */
-
-#define CHAR_INTERVAL_FOR_QUIT 1000
- case CHARTAB_RANGE_RANGE:
- {
- Ichar i;
- for (i = range->ch; i <= range->chtop; i += CHAR_INTERVAL_FOR_QUIT)
- {
- Ichar stop = min (i + CHAR_INTERVAL_FOR_QUIT - 1, range->chtop);
- Ichar j;
-
- /* QUIT every CHAR_INTERVAL_FOR_QUIT characters */
- for (j = i; j <= stop; j++)
- put_char_table (table, j, val);
- QUIT;
- }
- }
-
- break;
-
- case CHARTAB_RANGE_CHAR:
- put_char_table (table, range->ch, val);
- break;
}
if (ct->type == CHAR_TABLE_TYPE_SYNTAX)
@@ -1607,9 +1607,6 @@
ct = XCHAR_TABLE (char_table);
check_valid_char_table_value (value, ct->type, ERROR_ME);
decode_char_table_range (range, &rainj);
- if (rainj.type == CHARTAB_RANGE_ALL)
- invalid_operation ("Can't currently set all characters in a char table",
- range);
value = canonicalize_char_table_value (value, ct->type);
put_char_table_range (char_table, &rainj, value);
return Qnil;
@@ -1648,27 +1645,25 @@
}
/* Map FN (with client data ARG) over range RANGE in char table CT.
- Mapping stops the first time FN returns non-zero, and that value
- becomes the return value of map_char_table().
+ Function is called on a range [START, END] (specifying a range over
+ which teh ta, poss Mapping stops the first time FN returns non-zero, and
+ that value becomes the return value of map_char_table().
*/
int
map_char_table (Lisp_Object table,
struct chartab_range *range,
- int (*fn) (Lisp_Object table, Ichar code, void *val,
- void *arg),
+ int (*fn) (Lisp_Object table, Ichar from, Ichar to,
+ Lisp_Object val, void *arg),
void *arg)
{
#ifdef MULE
int l1, h1, l2, h2;
#endif
- int catp = XCHAR_TABLE_CATEGORY_P (table);
int levels = XCHAR_TABLE_LEVELS (table);
/* Compute maximum allowed value for this table, which may be less than
the range we have been requested to map over. */
- int maxval = /* Value is 2^31-1 for 4, but 2^24-1 for 3,
- 2^16-1 for 2, 2^8-1 for 1. */
- levels == 4 ? INT_32_BIT_MAX : (1 << (levels * 8)) - 1;
+ int maxval = chartab_span_top[levels];
switch (range->type)
{
case CHARTAB_RANGE_ALL:
@@ -1684,6 +1679,16 @@
min (range->chtop, maxval),
table, fn, arg);
+
+ case CHARTAB_RANGE_CHAR:
+ {
+ Lisp_Object val = get_char_table_raw (range->ch, table);
+
+ if (!UNBOUNDP (val))
+ return (fn) (table, range->ch, range->ch, val, arg);
+ else
+ return 0;
+ }
#ifdef MULE
case CHARTAB_RANGE_ROW:
{
@@ -1693,39 +1698,47 @@
}
case CHARTAB_RANGE_CHARSET:
+ get_charset_limits (range->charset, &l1, &l2, &h1, &h2);
+ iterate_charset:
+#ifdef UNICODE_INTERNAL
{
int i, j;
- get_charset_limits (range->charset, &l1, &l2, &h1, &h2);
- iterate_charset:
+
+ /* Under Unicode-internal, the characters in a charset row or in a
+ charset are non-contiguous and may not even exist */
for (i = l1; i <= h1; i++)
for (j = l2; j <= h2; j++)
{
Ichar ch = charset_codepoint_to_ichar_raw (range->charset, i, j);
if (ch >= 0)
{
- void *val = get_char_table (ch, table);
- if (catp ? !!val : !UNBOUNDP (GET_LISP_FROM_VOID (val)))
+ Lisp_Object val = get_char_table_raw (ch, table);
+ if (!UNBOUNDP (val))
{
- int retval = (fn) (table, ch, val, arg);
+ int retval = (fn) (table, ch, ch, val, arg);
if (retval)
return retval;
}
}
}
+ break;
}
- break;
-
+#else /* not UNICODE_INTERNAL */
+ {
+ /* Under old-Mule, the characters in a charset row all exist and
+ are contiguous. The characters in a charset are contiguous
+ but with gaps in them; however, that's not an issue here. */
+ Ichar from = charset_codepoint_to_ichar_raw (range->charset, l1, l2);
+ Ichar to = charset_codepoint_to_ichar_raw (range->charset, h1, h2);
+ text_checking_assert (from >= 0);
+ text_checking_assert (to >= 0);
+ return map_chartab_table (XCHAR_TABLE_TABLE (table),
+ XCHAR_TABLE_LEVELS (table),
+ 0, min (from, maxval), min (to, maxval),
+ table, fn, arg);
+ }
+#endif /* (not) UNICODE_INTERNAL */
#endif /* MULE */
-
- case CHARTAB_RANGE_CHAR:
- {
- void *val = get_char_table (range->ch, table);
-
- if (catp ? !!val : !UNBOUNDP (GET_LISP_FROM_VOID (val)))
- return (fn) (table, range->ch, val, arg);
- else
- return 0;
- }
default:
ABORT ();
@@ -1742,21 +1755,24 @@
static int
slow_map_char_table_fun (Lisp_Object UNUSED (table),
- Ichar ch, void *val, void *arg)
+ Ichar from, Ichar to, Lisp_Object val, void *arg)
{
struct slow_map_char_table_arg *closure =
(struct slow_map_char_table_arg *) arg;
- closure->retval = call2 (closure->function, make_char (ch),
- GET_LISP_FROM_VOID (val));
+ closure->retval =
+ call2 (closure->function,
+ from == to ? make_char (from) :
+ Fcons (make_char (from), make_char (to)),
+ val);
return !NILP (closure->retval);
}
DEFUN ("map-char-table", Fmap_char_table, 2, 3, 0, /*
Map FUNCTION over CHAR-TABLE until it returns non-nil; return that value.
-FUNCTION is called with two arguments, a character and the value for that
-character in the table. FUNCTION will only be called for characters whose
-value has been set.
+FUNCTION is called with two arguments, a character or range (FROM . TO) and
+the associated value in the table. FUNCTION will only be called for
+characters whose value has been set.
RANGE specifies a subrange to map over. If omitted or t, it defaults to
the entire table. Other possible values are the same as can be passed to
@@ -1806,18 +1822,17 @@
/* #### should deal with ERRB */
EXTERNAL_PROPERTY_LIST_LOOP_3 (range, data, value)
{
- struct chartab_range dummy;
-
if (CONSP (range))
{
- if (!CONSP (XCDR (range))
- || !NILP (XCDR (XCDR (range))))
- sferror ("Invalid range format", range);
- decode_char_table_range (XCAR (range), &dummy);
- decode_char_table_range (XCAR (XCDR (range)), &dummy);
+ if (CHARP (XCAR (range)) &&
+ CONSP (XCDR (range)) &&
+ CHARP (XCAR (XCDR (range))) &&
+ NILP (XCDR (XCDR (range))))
+ continue;
}
- else
- decode_char_table_range (range, &dummy);
+ else if (CHARP (range))
+ continue;
+ sferror ("Invalid range format", range);
}
return 1;
@@ -1846,30 +1861,20 @@
chartab = Fmake_char_table (type);
- data = dataval;
- while (!NILP (data))
- {
- Lisp_Object range = Fcar (data);
- Lisp_Object val = Fcar (Fcdr (data));
-
- data = Fcdr (Fcdr (data));
- if (CONSP (range))
- {
- if (CHAR_OR_CHAR_INTP (XCAR (range)))
- {
- Ichar first = XCHAR_OR_CHAR_INT (Fcar (range));
- Ichar last = XCHAR_OR_CHAR_INT (Fcar (Fcdr (range)));
- Ichar i;
-
- for (i = first; i <= last; i++)
- Fput_char_table (make_char (i), val, chartab);
- }
- else
- ABORT ();
- }
- else
- Fput_char_table (range, val, chartab);
- }
+ {
+ PROPERTY_LIST_LOOP_3 (range, val, dataval)
+ {
+ if (CONSP (range))
+ {
+ Ichar first = XCHAR_OR_CHAR_INT (Fcar (range));
+ Ichar last = XCHAR_OR_CHAR_INT (Fcar (Fcdr (range)));
+
+ put_char_table (chartab, first, last, val);
+ }
+ else
+ Fput_char_table (range, val, chartab);
+ }
+ }
return chartab;
}
@@ -1880,12 +1885,6 @@
/************************************************************************/
/* Category Tables */
/************************************************************************/
-
-static void
-init_blank_category_chartab_tables (void)
-{
- init_blank_chartab_tables_1 (category_chartab_blank, 1);
-}
static int
category_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth,
@@ -2003,14 +2002,12 @@
int
check_char_in_category (Ichar ch, Lisp_Object table, int designator, int not_p)
{
- CATEGORY_TAB_BASE_TYPE val;
- Lisp_Object chartab;
+ Lisp_Object chartab, val;
chartab =
XCATEGORY_TABLE_TABLES (table)[DESIGNATOR_TO_CHAR_TABLE (designator)];
- val = (CATEGORY_TAB_BASE_TYPE) (EMACS_INT) get_char_table_raw (ch, chartab);
- return BIT_IS_SET_IN_ARRAY (val, DESIGNATOR_TO_BIT_INDEX (designator)) ?
- !not_p : not_p;
+ val = get_char_table_raw (ch, chartab);
+ return ONEP (val) ? !not_p : not_p;
}
DEFUN ("check-category-at", Fcheck_category_at, 2, 4, 0, /*
@@ -2118,25 +2115,22 @@
(char_range, designator, category_table, reset))
{
struct chartab_range rainj;
- int ind;
decode_char_table_range (char_range, &rainj);
category_table = check_category_table (category_table,
Fcategory_table (Qnil));
CHECK_CATEGORY_DESIGNATOR (designator);
- ind = DESIGNATOR_TO_BIT_INDEX (XCHAR (designator));
if (!NILP (reset))
{
int i;
for (i = 0; i < CHAR_TABLES_PER_CATEGORY_TABLE; i++)
put_char_table_range (XCATEGORY_TABLE_TABLES (category_table)[i],
- &rainj, make_int (PUT_CATEGORY_RESET << 8));
+ &rainj, Qunbound);
}
put_char_table_range
(XCATEGORY_TABLE_TABLES (category_table)
- [DESIGNATOR_TO_CHAR_TABLE (XCHAR (designator))],
- &rainj, make_int ((PUT_CATEGORY_SET << 8) + ind));
+ [DESIGNATOR_TO_CHAR_TABLE (XCHAR (designator))], &rainj, Qone);
return Qnil;
}
@@ -2157,23 +2151,19 @@
static int
slow_map_category_table_fun (Lisp_Object UNUSED (table),
- Ichar ch, void *val, void *arg)
+ Ichar from, Ichar to, Lisp_Object val, void *arg)
{
struct slow_map_category_table_arg *closure =
(struct slow_map_category_table_arg *) arg;
- CATEGORY_TAB_BASE_TYPE baseval = (CATEGORY_TAB_BASE_TYPE) (EMACS_INT) val;
- int ind;
+ structure_checking_assert (ONEP (val));
- for (ind = 0; ind < BITS_PER_CATEGORY_SUBTABLE; ind++)
- if (BIT_IS_SET_IN_ARRAY (baseval, ind))
- {
- closure->retval = call2 (closure->function, make_char (ch),
- make_char
- (BIT_INDEX_TO_DESIGNATOR
- (closure->tablenum, ind)));
- if (!NILP (closure->retval))
- return 1;
- }
+ closure->retval = call2 (closure->function,
+ from == to ? make_char (from) :
+ Fcons (make_char (from), make_char (to)),
+ make_char
+ (CHAR_TABLE_TO_DESIGNATOR (closure->tablenum)));
+ if (!NILP (closure->retval))
+ return 1;
return 0;
}
@@ -2279,7 +2269,6 @@
#ifdef MULE
INIT_LISP_OBJECT (category_table);
- INIT_LISP_OBJECT (category_subtable);
DEFSYMBOL_MULTIWORD_PREDICATE (Qcategory_tablep);
DEFSYMBOL (Qcategory_designator_p);
#endif /* MULE */
@@ -2335,19 +2324,14 @@
Vall_syntax_tables = Qnil;
dump_add_weak_object_chain (&Vall_syntax_tables);
- init_blank_chartab_tables ();
- staticpro (&chartab_blank[1]);
- staticpro (&chartab_blank[2]);
- staticpro (&chartab_blank[3]);
- staticpro (&chartab_blank[4]);
-
-#ifdef MULE
- init_blank_category_chartab_tables ();
- staticpro (&category_chartab_blank[1]);
- staticpro (&category_chartab_blank[2]);
- staticpro (&category_chartab_blank[3]);
- staticpro (&category_chartab_blank[4]);
-#endif
+ /* The value at level 4 is not 2^32 - 1. With 32-bit EMACS_INTs, it's
+ 2^30 - 1 because characters are only 30 bits wide. */
+ chartab_span_top[0] = 0;
+ chartab_span_top[1] = (1 << 8) - 1;
+ chartab_span_top[2] = (1 << 16) - 1;
+ chartab_span_top[3] = (1 << 24) - 1;
+ chartab_span_top[4] = ICHAR_MAX;
+ dump_add_opaque (chartab_span_top, sizeof (chartab_span_top));
}
void
diff -r ba0773838e57 -r 93f4f44ba25f src/chartab.h
--- a/src/chartab.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/chartab.h Mon Mar 29 15:46:27 2010 -0500
@@ -34,16 +34,30 @@
/* Basic Char Table Format */
/************************************************************************/
-/* Things are written this way because at one point I designed the
- subtables so they could either be stored as "plain tables" (as direct
- 256-element arrays), as unified Lisp objects (where the header and
- following array is a single unit) or as split Lisp object (with a
- wrapper Lisp object around a separately allocated table). The plain
- tables are the fastest and most memory efficient of the three, but
- can't be used with KKCC. (KKCC doesn't keep track of
+/* See the comment at the top of chartab.c for a description of the format
+ of char tables.
+
+ At one point I designed the subtables so they could either be stored as
+ "plain tables" (as direct 256-element arrays), as unified Lisp objects
+ (where the header and following array is a single unit) or as split Lisp
+ objects (with a wrapper Lisp object around a separately allocated
+ table). The plain tables are the fastest and most memory efficient of
+ the three, but can't be used with KKCC. (KKCC doesn't keep track of
whether it has already traversed non-Lisp-object arrays, and thus
traverses the shared "blank" subtables numerous times when marking,
- making it become *extremely* slow.) */
+ making it become *extremely* slow.) Furthermore, they preclude the
+ possibility of storing a non-table Lisp object at some level to indicate
+ that all characters spanned by that entry have the same value. At this
+ point, the assumption that subtables are Lisp objects is completely
+ built in to the code. However, it would still be possible to implement
+ split Lisp objects without too much difficulty, due to the macros that
+ wrap the operations of fetching the actual array of a subtable, creating
+ a subtable, and freeing a subtable. The potential advantage of split
+ Lisp tables compared with plain tables is that the plain tables take up
+ slightly more than a power of two in size, which makes them maximally
+ inefficient for certain implementations of malloc(), e.g. gmalloc.c,
+ which will round them up to the next power of two and hence use almost
+ twice the space necessary to store the object. */
struct Lisp_Char_Subtable
{
@@ -52,7 +66,7 @@
};
/* Definition of the non-level-1 subtables, which are always `char subtables'
- whether or not we have a category table or other char table. */
+ whether or not we have a category char table or other char table. */
#define ALLOCATE_LEVEL_N_SUBTAB() ALLOC_NORMAL_LISP_OBJECT (char_subtable)
#define SUBTAB_STORAGE_SIZE(table, level, stats) \
@@ -61,37 +75,18 @@
/* If we use split Lisp char subtables, we'd modify the above struct and
three defines (ALLOCATE_LEVEL_N_SUBTAB, SUBTAB_STORAGE_SIZE,
- FREE_ONE_SUBTAB). If we use "plain" non-Lisp char subtables, we'd
- modify the three macros above and the various macros below as well, and
- omit the definition of a Lisp subtable object. */
+ FREE_ONE_SUBTAB). */
-#define SUBTAB_EQ(a, b) EQ (a, b)
-#define SUBTAB_TYPE Lisp_Object
-#define SUBTAB_ARRAY_TYPE SUBTAB_TYPE *
#define SUBTAB_ARRAY_FROM_SUBTAB(tab) (XCHAR_SUBTABLE (tab)->ptr)
+/* WARNING: Evaluates arguments more than once. */
+#define SUBTAB_TABLE_P(tab, catp) \
+ (CHAR_SUBTABLEP (tab) || (catp && BIT_VECTORP (tab)))
+
/* Definition of the level-1 subtables, which are either `char subtables'
- or `category subtables'. */
+ or bit vectors. */
-#ifdef MULE
-#define ALLOCATE_LEVEL_1_SUBTAB(catp) \
- ((catp) ? ALLOCATE_CATEGORY_SUBTABLE () : ALLOCATE_LEVEL_N_SUBTAB ())
-#define BASE_TYPE_ARRAY_FROM_SUBTAB(tab, catp) \
- (catp ? (void *) BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (tab) : \
- (void *) BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (tab))
-#define SUBTAB_BLANK(catp) (catp ? category_chartab_blank : chartab_blank)
-#else
-#define ALLOCATE_LEVEL_1_SUBTAB(catp) \
- (assert (!catp), ALLOCATE_LEVEL_N_SUBTAB ())
-#define BASE_TYPE_ARRAY_FROM_SUBTAB(tab, catp) \
- (assert (!catp), BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (tab))
-#define SUBTAB_BLANK(catp) (assert (!catp), chartab_blank)
-#endif /* (not) MULE */
-
-/* Specialization of above code to level-1 char subtables. */
-
-#define CHARTAB_BASE_TYPE Lisp_Object
-#define BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB(tab) (XCHAR_SUBTABLE (tab)->ptr)
+#define ALLOCATE_LEVEL_1_CATEGORY_SUBTAB() make_bit_vector (256, Qzero)
typedef struct Lisp_Char_Subtable Lisp_Char_Subtable;
@@ -110,18 +105,34 @@
#define MAXIMIZE_CHAR_TABLE_DEPTH
#endif
+/* Return number of table levels required to store a character. */
+
+#define GET_CHAR_LEVELS(ch, levels) \
+do { \
+ int _cl_ch = (ch); \
+ (levels) = (_cl_ch <= 0xFF ? 1 : \
+ _cl_ch <= 0xFFFF ? 2 : \
+ _cl_ch <= 0xFFFFFF ? 3 : \
+ 4); \
+} while (0)
+
/* Break up a 32-bit character code into 8-bit parts. */
+#define GET_CHAR_BYTES(ch, u1, u2, u3, u4) \
+do { \
+ int _gcb_ch = (ch); \
+ \
+ (u1) = _gcb_ch >> 24; \
+ (u2) = (_gcb_ch >> 16) & 255; \
+ (u3) = (_gcb_ch >> 8) & 255; \
+ (u4) = _gcb_ch & 255; \
+} while (0)
+
+
#ifdef MAXIMIZE_CHAR_TABLE_DEPTH
-# define CHARTAB_BREAKUP_CHAR_CODE(val, u1, u2, u3, u4, levels) \
-do { \
- int buc_val = (val); \
- \
- (u1) = buc_val >> 24; \
- (u2) = (buc_val >> 16) & 255; \
- (u3) = (buc_val >> 8) & 255; \
- (u4) = buc_val & 255; \
-} while (0)
+
+# define CHARTAB_BREAKUP_CHAR_CODE(val, u1, u2, u3, u4, levels) \
+ GET_CHAR_BYTES (val, u1, u2, u3, u4)
/* Define the current chartab levels given an expr indicating the level value.
This is an optimization designed to cause compiler simplfication of code
due to constant expression in if, switch, etc. statements. */
@@ -130,21 +141,17 @@
# else
# define CHARTAB_LEVELS(expr) 1
# endif
+
#else /* not MAXIMIZE_CHAR_TABLE_DEPTH */
+
# define CHARTAB_BREAKUP_CHAR_CODE(val, u1, u2, u3, u4, levels) \
do { \
int buc_val = (val); \
- \
- (u1) = buc_val >> 24; \
- (u2) = (buc_val >> 16) & 255; \
- (u3) = (buc_val >> 8) & 255; \
- (u4) = buc_val & 255; \
- (levels) = (buc_val <= 0xFF ? 1 : \
- buc_val <= 0xFFFF ? 2 : \
- buc_val <= 0xFFFFFF ? 3 : \
- 4); \
+ GET_CHAR_BYTES (buc_val, u1, u2, u3, u4); \
+ GET_CHAR_LEVELS (buc_val, levels); \
} while (0)
# define CHARTAB_LEVELS(expr) (expr)
+
#endif /* not MAXIMIZE_CHAR_TABLE_DEPTH */
enum char_table_type
@@ -171,7 +178,7 @@
Possibly, we could/should allow the type to be chosen at creation
time as a parameter to `make-char-table'. */
- SUBTAB_TYPE table;
+ Lisp_Object table;
int levels;
Lisp_Object default_;
Lisp_Object parent; /* #### not yet implemented */
@@ -229,12 +236,12 @@
enum chartab_range_type
{
CHARTAB_RANGE_ALL,
+ CHARTAB_RANGE_RANGE,
+ CHARTAB_RANGE_CHAR,
#ifdef MULE
CHARTAB_RANGE_CHARSET,
CHARTAB_RANGE_ROW,
#endif
- CHARTAB_RANGE_RANGE,
- CHARTAB_RANGE_CHAR
};
struct chartab_range
@@ -246,22 +253,12 @@
};
void set_char_table_default (Lisp_Object table, Lisp_Object value);
-
-enum put_category_operation
-{
- PUT_CATEGORY_SET,
- PUT_CATEGORY_UNSET,
- PUT_CATEGORY_RESET,
- PUT_CATEGORY_RESET_AND_SET
-};
-
-void put_char_table (Lisp_Object chartab, Ichar ch, Lisp_Object val);
-void put_char_table_range (Lisp_Object table, struct chartab_range *range,
- Lisp_Object val);
+void put_char_table (Lisp_Object chartab, Ichar start, Ichar end,
+ Lisp_Object val);
int map_char_table (Lisp_Object table,
struct chartab_range *range,
- int (*fn) (Lisp_Object table, Ichar code, void *val,
- void *arg),
+ int (*fn) (Lisp_Object table, Ichar from, Ichar to,
+ Lisp_Object val, void *arg),
void *arg);
void prune_syntax_tables (void);
int word_boundary_p (struct buffer *buf, Ichar c1, Ichar c2);
@@ -320,48 +317,7 @@
*/
-/* Specialization of general level-1 subtable code to category subtables. */
-
-#define BITS_PER_CATEGORY_SUBTABLE 8
-#define BITS_PER_CATEGORY_TABLE 96
-
-#if BITS_PER_CATEGORY_SUBTABLE == 8
-#define CATEGORY_DIVIDE_SHIFT 3
-#define CATEGORY_MOD_AND 0x7
-#define CATEGORY_TAB_BASE_TYPE unsigned char
-#elif BITS_PER_CATEGORY_SUBTABLE == 16
-#define CATEGORY_DIVIDE_SHIFT 4
-#define CATEGORY_MOD_AND 0xF
-#define CATEGORY_TAB_BASE_TYPE UINT_16_BIT
-#elif BITS_PER_CATEGORY_SUBTABLE == 32
-#define CATEGORY_DIVIDE_SHIFT 5
-#define CATEGORY_MOD_AND 0x1F
-#define CATEGORY_TAB_BASE_TYPE UINT_32_BIT
-#else
-#error "Invalid value for BITS_PER_CATEGORY_SUBTABLE"
-#endif
-
-#define CHAR_TABLES_PER_CATEGORY_TABLE \
- (BITS_PER_CATEGORY_TABLE / BITS_PER_CATEGORY_SUBTABLE)
-
-struct Lisp_Category_Subtable
-{
- NORMAL_LISP_OBJECT_HEADER header;
- CATEGORY_TAB_BASE_TYPE ptr[256];
-};
-typedef struct Lisp_Category_Subtable Lisp_Category_Subtable;
-
-#define ALLOCATE_CATEGORY_SUBTABLE() \
- ALLOC_NORMAL_LISP_OBJECT (category_subtable)
-#define BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB(tab) \
- (XCATEGORY_SUBTABLE (tab)->ptr)
-
-DECLARE_LISP_OBJECT (category_subtable, Lisp_Category_Subtable);
-#define XCATEGORY_SUBTABLE(x) XRECORD (x, category_subtable, Lisp_Category_Subtable)
-#define wrap_category_subtable(p) wrap_record (p, category_subtable)
-#define CATEGORY_SUBTABLEP(x) RECORDP (x, category_subtable)
-#define CHECK_CATEGORY_SUBTABLE(x) CHECK_RECORD (x, category_subtable)
-#define CONCHECK_CATEGORY_SUBTABLE(x) CONCHECK_RECORD (x, category_subtable)
+#define CHAR_TABLES_PER_CATEGORY_TABLE 95
struct Lisp_Category_Table
{
@@ -381,7 +337,7 @@
#define XCATEGORY_TABLE_TABLES(ct) CATEGORY_TABLE_TABLES (XCATEGORY_TABLE (ct))
int check_char_in_category (Ichar ch, Lisp_Object ctbl, int designator,
- int not_p);
+ int not_p);
extern Lisp_Object Vstandard_category_table;
@@ -401,32 +357,8 @@
/* Return the index of the char table storing the setting for this
designator */
-#define DESIGNATOR_TO_CHAR_TABLE(desig) \
- (((desig) - 0x20) >> CATEGORY_DIVIDE_SHIFT)
-/* Return the bit index into the value of type CATEGORY_TAB_BASE_TYPE
- corresponding to the given designator */
-#define DESIGNATOR_TO_BIT_INDEX(desig) \
- (((desig) - 0x20) & CATEGORY_MOD_AND)
-/* Given a char table number and an index, return the corresponding
- designator */
-#define BIT_INDEX_TO_DESIGNATOR(tablenum, ind) \
- ((tablenum) * BITS_PER_CATEGORY_SUBTABLE + (ind) + 0x20)
-
-
-/* Return the OR (set) mask for this designator in the integral value of
- type CATEGORY_TAB_BASE_TYPE */
-#define DESIGNATOR_TO_OR_MASK(desig) \
- BIT_INDEX_TO_SET_MASK (DESIGNATOR_TO_BIT_INDEX (desig))
-/* Return the AND (clear) mask for this designator in the integral value of
- type CATEGORY_TAB_BASE_TYPE */
-#define DESIGNATOR_TO_AND_MASK(desig) \
- BIT_INDEX_TO_CLEAR_MASK (DESIGNATOR_TO_BIT_INDEX (desig))
-
-#else /* not MULE */
-
-#define CATEGORY_TAB_BASE_TYPE unsigned char
-#define BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB(tab) \
- ((CATEGORY_TAB_BASE_TYPE *) NULL)
+#define DESIGNATOR_TO_CHAR_TABLE(desig) ((desig) - 0x20)
+#define CHAR_TABLE_TO_DESIGNATOR(tabnum) ((tabnum) + 0x20)
#endif /* MULE */
@@ -436,11 +368,10 @@
/************************************************************************/
/* Get the raw value of CHARTAB for character CH. If the character's value
- has not been set, return NULL (for a category char table) or the void *
- equivalent of Qunbound (for other char tables). */
+ has not been set, return Qunbound. */
DECLARE_INLINE_HEADER (
-void *
+Lisp_Object
get_char_table_raw (Ichar ch, Lisp_Object chartab)
)
{
@@ -468,11 +399,11 @@
/* If not that many levels even in the table, then value definitely not
in the table */
if (levels < code_levels)
- return catp ? NULL : STORE_LISP_IN_VOID (Qunbound);
+ return Qunbound;
#endif /* not MAXIMIZE_CHAR_TABLE_DEPTH */
{
- register SUBTAB_TYPE table = XCHAR_TABLE_TABLE (chartab);
+ register Lisp_Object table = XCHAR_TABLE_TABLE (chartab);
/* We are really helping the compiler here. CHARTAB_LEVELS() will
evaluate to a constant when MAXIMIZE_CHAR_TABLE_DEPTH is true,
so any reasonable optimizing compiler should eliminate the
@@ -480,56 +411,37 @@
switch (CHARTAB_LEVELS (levels))
{
/* Fall through */
- case 4: table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u4];
- case 3: table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u3];
- case 2: table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u2];
+ case 4:
+ if (!CHAR_SUBTABLEP (table))
+ return table;
+ table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u4];
+ case 3:
+ if (!CHAR_SUBTABLEP (table))
+ return table;
+ table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u3];
+ case 2:
+ if (!CHAR_SUBTABLEP (table))
+ return table;
+ table = SUBTAB_ARRAY_FROM_SUBTAB (table)[u2];
case 1:
if (catp)
- return (void *) (EMACS_INT) \
- BASE_TYPE_ARRAY_FROM_CATEGORY_SUBTAB (table)[u1];
- else
- return
- STORE_LISP_IN_VOID (BASE_TYPE_ARRAY_FROM_CHAR_SUBTAB (table)[u1]);
+ {
+ type_checking_assert (!CHAR_SUBTABLEP (table));
+ if (!BIT_VECTORP (table))
+ return table;
+ return make_int (bit_vector_bit (XBIT_VECTOR (table), u1));
+ }
+ if (!CHAR_SUBTABLEP (table))
+ return table;
+ return SUBTAB_ARRAY_FROM_SUBTAB (table)[u1];
}
}
ABORT (); /* Should never happen */
- return NULL;
+ return Qunbound;
}
-/* Same as get_char_table_raw() but return the default value for
- non-category char tables if the value is not set. */
-
-DECLARE_INLINE_HEADER (
-void *
-get_char_table (Ichar ch, Lisp_Object chartab)
-)
-{
- void *retval = get_char_table_raw (ch, chartab);
- if (XCHAR_TABLE_CATEGORY_P (chartab))
- return retval;
- else
- {
- if (!EQ (GET_LISP_FROM_VOID (retval), Qunbound))
- return retval;
- else
- return STORE_LISP_IN_VOID (XCHAR_TABLE_DEFAULT (chartab));
- }
-}
-
-/* Given a non-category char table, return the raw value of CHARTAB for
- character CH. This will be a Lisp object, since we don't have to worry
- about category char tables. */
-DECLARE_INLINE_HEADER (
-Lisp_Object
-get_char_table_lisp_raw (Ichar ch, Lisp_Object chartab)
-)
-{
- type_checking_assert (!XCHAR_TABLE_CATEGORY_P (chartab));
- return GET_LISP_FROM_VOID (get_char_table_raw (ch, chartab));
-}
-
-/* Same as get_char_table_lisp but don't trip an assert that we aren't
+/* Same as get_char_table() but don't trip an assert that we aren't
retrieving the value for a mirror table. (Normally we have this assert
in place to make sure that mirror tables don't escape to where they
shouldn't be. But some code really does need to access the mirror value
@@ -537,30 +449,28 @@
tables. */
DECLARE_INLINE_HEADER (
Lisp_Object
-get_char_table_lisp_1 (Ichar ch, Lisp_Object chartab)
+get_char_table_1 (Ichar ch, Lisp_Object chartab)
)
{
- Lisp_Object retval;
- retval = get_char_table_lisp_raw (ch, chartab);
+ Lisp_Object retval = get_char_table_raw (ch, chartab);
if (!EQ (retval, Qunbound))
return retval;
else
return XCHAR_TABLE_DEFAULT (chartab);
}
-/* Get the value of CHARTAB for character CH. TABLE must not be a
- category-table char table. If the character's value has not been set,
- this returns the default value for the char table. */
+/* Get the value of CHARTAB for character CH. If the character's value has
+ not been set, this returns the default value for the char table. */
DECLARE_INLINE_HEADER (
Lisp_Object
-get_char_table_lisp (Ichar ch, Lisp_Object table)
+get_char_table (Ichar ch, Lisp_Object table)
)
{
#ifdef MIRROR_TABLE
type_checking_assert (!XCHAR_TABLE (table)->mirror_table_p);
#endif /* MIRROR_TABLE */
- return get_char_table_lisp_1 (ch, table);
+ return get_char_table_1 (ch, table);
}
#endif /* INCLUDED_chartab_h_ */
diff -r ba0773838e57 -r 93f4f44ba25f src/cmds.c
--- a/src/cmds.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/cmds.c Mon Mar 29 15:46:27 2010 -0500
@@ -444,7 +444,7 @@
}
}
if ((CHAR_TABLEP (Vauto_fill_chars)
- ? !NILP (get_char_table_lisp (c1, Vauto_fill_chars))
+ ? !NILP (get_char_table (c1, Vauto_fill_chars))
: (c1 == ' ' || c1 == '\n'))
&& !noautofill
&& !NILP (buf->auto_fill_function))
@@ -553,6 +553,6 @@
Such characters have value t in this table.
*/);
Vauto_fill_chars = Fmake_char_table (Qgeneric);
- put_char_table (Vauto_fill_chars, ' ', Qt);
- put_char_table (Vauto_fill_chars, '\n', Qt);
+ put_char_table (Vauto_fill_chars, ' ', ' ', Qt);
+ put_char_table (Vauto_fill_chars, '\n', '\n', Qt);
}
diff -r ba0773838e57 -r 93f4f44ba25f src/console.c
--- a/src/console.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/console.c Mon Mar 29 15:46:27 2010 -0500
@@ -1452,7 +1452,7 @@
console_local_flags.tty_erase_char = always_local_resettable;
#endif
- console_local_flags.function_key_map = make_int (1);
+ console_local_flags.function_key_map = Qone;
/* #### Warning, 0x4000000 (that's six zeroes) is the largest number
currently allowable due to the XINT() handling of this value.
diff -r ba0773838e57 -r 93f4f44ba25f src/data.c
--- a/src/data.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/data.c Mon Mar 29 15:46:27 2010 -0500
@@ -1737,7 +1737,7 @@
if (nargs == 1)
{
i = 0;
- accum = make_int (1);
+ accum = Qone;
}
else
{
@@ -1807,7 +1807,7 @@
if (nargs == 1)
{
i = 0;
- accum = make_int (1);
+ accum = Qone;
}
else
{
diff -r ba0773838e57 -r 93f4f44ba25f src/editfns.c
--- a/src/editfns.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/editfns.c Mon Mar 29 15:46:27 2010 -0500
@@ -1935,7 +1935,7 @@
for (; pos < stop && (oc = BUF_FETCH_CHAR (buf, pos), 1); pos++)
{
- Lisp_Object replacement = get_char_table_lisp (oc, table);
+ Lisp_Object replacement = get_char_table (oc, table);
retry2:
if (CHAR_OR_CHAR_INTP (replacement))
{
diff -r ba0773838e57 -r 93f4f44ba25f src/event-stream.c
--- a/src/event-stream.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/event-stream.c Mon Mar 29 15:46:27 2010 -0500
@@ -4946,7 +4946,7 @@
DEFVAR_LISP ("echo-keystrokes", &Vecho_keystrokes /*
*Nonzero means echo unfinished commands after this many seconds of pause.
*/ );
- Vecho_keystrokes = make_int (1);
+ Vecho_keystrokes = Qone;
DEFVAR_INT ("auto-save-interval", &auto_save_interval /*
*Number of keyboard input characters between auto-saves.
diff -r ba0773838e57 -r 93f4f44ba25f src/fileio.c
--- a/src/fileio.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/fileio.c Mon Mar 29 15:46:27 2010 -0500
@@ -3990,11 +3990,11 @@
clear_echo_area (selected_frame (), Qauto_saving, 1);
Fding (Qt, Qauto_save_error, Qnil);
message ("Auto-saving...error for %s", XSTRING_DATA (current_buffer->name));
- Fsleep_for (make_int (1));
+ Fsleep_for (Qone);
message ("Auto-saving...error!for %s", XSTRING_DATA (current_buffer->name));
- Fsleep_for (make_int (1));
+ Fsleep_for (Qone);
message ("Auto-saving...error for %s", XSTRING_DATA (current_buffer->name));
- Fsleep_for (make_int (1));
+ Fsleep_for (Qone);
return Qnil;
}
@@ -4189,7 +4189,7 @@
and prevent any more warnings. */
b->saved_size = make_int (-1);
if (!gc_in_progress)
- Fsleep_for (make_int (1));
+ Fsleep_for (Qone);
continue;
}
set_buffer_internal (b);
diff -r ba0773838e57 -r 93f4f44ba25f src/fns.c
--- a/src/fns.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/fns.c Mon Mar 29 15:46:27 2010 -0500
@@ -433,7 +433,7 @@
if (CONSP (*ptr) && INTP (XCAR (*ptr)))
XCAR (*ptr) = make_int (1+XINT (XCAR (*ptr)));
else
- *ptr = Fcons (make_int (1), *ptr);
+ *ptr = Fcons (Qone, *ptr);
}
diff -r ba0773838e57 -r 93f4f44ba25f src/glyphs.c
--- a/src/glyphs.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/glyphs.c Mon Mar 29 15:46:27 2010 -0500
@@ -5057,12 +5057,12 @@
else if (CHAR_TABLEP (table)
&& XCHAR_TABLE_TYPE (table) == CHAR_TABLE_TYPE_CHAR)
{
- return get_char_table_lisp (ch, table);
+ return get_char_table (ch, table);
}
else if (CHAR_TABLEP (table)
&& XCHAR_TABLE_TYPE (table) == CHAR_TABLE_TYPE_GENERIC)
{
- Lisp_Object gotit = get_char_table_lisp (ch, table);
+ Lisp_Object gotit = get_char_table (ch, table);
if (!NILP (gotit))
return gotit;
else
diff -r ba0773838e57 -r 93f4f44ba25f src/keymap.c
--- a/src/keymap.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/keymap.c Mon Mar 29 15:46:27 2010 -0500
@@ -3691,7 +3691,7 @@
{
assert (firstonly);
format_raw_keys (so_far, keys_count + 1, target_buffer);
- return make_int (1);
+ return Qone;
}
else if (firstonly)
return raw_keys_to_keys (so_far, keys_count + 1);
diff -r ba0773838e57 -r 93f4f44ba25f src/lisp-disunion.h
--- a/src/lisp-disunion.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/lisp-disunion.h Mon Mar 29 15:46:27 2010 -0500
@@ -1,6 +1,6 @@
/* Fundamental definitions for XEmacs Lisp interpreter -- non-union objects.
Copyright (C) 1985, 1986, 1987, 1992, 1993 Free Software Foundation, Inc.
- Copyright (C) 2001, 2002, 2005 Ben Wing.
+ Copyright (C) 2001, 2002, 2005, 2010 Ben Wing.
This file is part of XEmacs.
@@ -73,6 +73,7 @@
XUINT The value bits of a Lisp_Object storing an integer, unsigned
INTP Non-zero if this Lisp_Object is an integer
Qzero Lisp Integer 0
+ Qone Lisp Integer 1
EQ Non-zero if two Lisp_Objects are identical, not merely equal. */
@@ -107,10 +108,11 @@
#define INTP(x) ((EMACS_UINT)(x) & Lisp_Type_Int_Bit)
#define INT_PLUS(x,y) ((x)+(y)-Lisp_Type_Int_Bit)
#define INT_MINUS(x,y) ((x)-(y)+Lisp_Type_Int_Bit)
-#define INT_PLUS1(x) INT_PLUS (x, make_int (1))
-#define INT_MINUS1(x) INT_MINUS (x, make_int (1))
+#define INT_PLUS1(x) INT_PLUS (x, Qone)
+#define INT_MINUS1(x) INT_MINUS (x, Qone)
#define Qzero make_int (0)
+#define Qone make_int (1)
#define Qnull_pointer ((Lisp_Object) 0)
#define EQ(x,y) ((x) == (y))
diff -r ba0773838e57 -r 93f4f44ba25f src/lisp-union.h
--- a/src/lisp-union.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/lisp-union.h Mon Mar 29 15:46:27 2010 -0500
@@ -132,7 +132,7 @@
return obj;
}
-extern MODULE_API Lisp_Object Qnull_pointer, Qzero;
+extern MODULE_API Lisp_Object Qnull_pointer, Qzero, Qone;
#define INTP(x) ((x).s.bits)
#define INT_PLUS(x,y) make_int (XINT (x) + XINT (y))
diff -r ba0773838e57 -r 93f4f44ba25f src/lisp.h
--- a/src/lisp.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/lisp.h Mon Mar 29 15:46:27 2010 -0500
@@ -3012,6 +3012,7 @@
/*-------------------basic int (no connection to char)------------------*/
#define ZEROP(x) EQ (x, Qzero)
+#define ONEP(x) EQ (x, Qone)
#ifdef ERROR_CHECK_TYPES
@@ -4358,6 +4359,7 @@
MODULE_API Lisp_Object vector3 (Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object make_bit_vector (Elemcount, Lisp_Object);
Lisp_Object make_bit_vector_from_byte_vector (unsigned char *, Elemcount);
+Lisp_Object clone_bit_vector (Lisp_Object bitvec);
Lisp_Object noseeum_make_marker (void);
#ifndef NEW_GC
void garbage_collect_1 (void);
diff -r ba0773838e57 -r 93f4f44ba25f src/lrecord.h
--- a/src/lrecord.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/lrecord.h Mon Mar 29 15:46:27 2010 -0500
@@ -335,7 +335,6 @@
lrecord_type_bit_vector, /* Lisp_Bit_Vector */
lrecord_type_buffer, /* struct buffer */
lrecord_type_case_table, /* Lisp_Case_Table */
- lrecord_type_category_subtable, /* Lisp_Category_Subtable */
lrecord_type_category_table, /* Lisp_Category_Table */
lrecord_type_char_subtable, /* Lisp_Char_Subtable */
lrecord_type_char_table, /* Lisp_Char_Table */
diff -r ba0773838e57 -r 93f4f44ba25f src/macros.c
--- a/src/macros.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/macros.c Mon Mar 29 15:46:27 2010 -0500
@@ -83,7 +83,7 @@
{
message ("Appending to kbd macro...");
con->kbd_macro_ptr = con->kbd_macro_end;
- Fexecute_kbd_macro (con->last_kbd_macro, make_int (1));
+ Fexecute_kbd_macro (con->last_kbd_macro, Qone);
}
con->defining_kbd_macro = Qt;
diff -r ba0773838e57 -r 93f4f44ba25f src/marker.c
--- a/src/marker.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/marker.c Mon Mar 29 15:46:27 2010 -0500
@@ -532,7 +532,7 @@
b->point_marker = Fmake_marker ();
Fset_marker (b->point_marker,
/* For indirect buffers, point is already set. */
- b->base_buffer ? make_int (BUF_PT (b)) : make_int (1),
+ b->base_buffer ? make_int (BUF_PT (b)) : Qone,
buf);
}
diff -r ba0773838e57 -r 93f4f44ba25f src/minibuf.c
--- a/src/minibuf.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/minibuf.c Mon Mar 29 15:46:27 2010 -0500
@@ -93,7 +93,7 @@
XWINDOW (minibuf_window)->last_facechange[DESIRED_DISP] = Qzero;
XWINDOW (minibuf_window)->last_facechange[CMOTION_DISP] = Qzero;
Vminibuf_prompt = Felt (unwind_data, Qzero);
- minibuf_level = XINT (Felt (unwind_data, make_int (1)));
+ minibuf_level = XINT (Felt (unwind_data, Qone));
while (CONSP (unwind_data))
{
Lisp_Object victim = unwind_data;
diff -r ba0773838e57 -r 93f4f44ba25f src/number.c
--- a/src/number.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/number.c Mon Mar 29 15:46:27 2010 -0500
@@ -238,7 +238,7 @@
(XRATIO_DENOMINATOR (rational)));
}
#endif
- return make_int (1);
+ return Qone;
}
diff -r ba0773838e57 -r 93f4f44ba25f src/print.c
--- a/src/print.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/print.c Mon Mar 29 15:46:27 2010 -0500
@@ -1986,7 +1986,7 @@
tem = make_int (XINT (XCDR (XCAR (Vprint_gensym_alist))) + 1);
}
else
- tem = make_int (1);
+ tem = Qone;
Vprint_gensym_alist = Fcons (Fcons (obj, tem), Vprint_gensym_alist);
write_ascstring (printcharfun, "#");
diff -r ba0773838e57 -r 93f4f44ba25f src/process-unix.c
--- a/src/process-unix.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/process-unix.c Mon Mar 29 15:46:27 2010 -0500
@@ -466,7 +466,7 @@
if (! (host_info_ptr == 0 && h_errno == TRY_AGAIN))
#endif
break;
- Fsleep_for (make_int (1));
+ Fsleep_for (Qone);
}
if (host_info_ptr)
{
diff -r ba0773838e57 -r 93f4f44ba25f src/scrollbar.c
--- a/src/scrollbar.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/scrollbar.c Mon Mar 29 15:46:27 2010 -0500
@@ -748,7 +748,7 @@
(window))
{
CHECK_LIVE_WINDOW (window);
- window_scroll (window, make_int (1), -1, ERROR_ME_NOT);
+ window_scroll (window, Qone, -1, ERROR_ME_NOT);
zmacs_region_stays = 1;
return Qnil;
}
@@ -762,7 +762,7 @@
(window))
{
CHECK_LIVE_WINDOW (window);
- window_scroll (window, make_int (1), 1, ERROR_ME_NOT);
+ window_scroll (window, Qone, 1, ERROR_ME_NOT);
zmacs_region_stays = 1;
return Qnil;
}
diff -r ba0773838e57 -r 93f4f44ba25f src/symbols.c
--- a/src/symbols.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/symbols.c Mon Mar 29 15:46:27 2010 -0500
@@ -2889,7 +2889,7 @@
return Qnil;
}
- assert (EQ (make_int (1), mapped));
+ assert (EQ (Qone, mapped));
return Qt;
}
@@ -3503,6 +3503,9 @@
#ifndef Qzero
Lisp_Object Qzero;
+#endif
+#ifndef Qone
+Lisp_Object Qone;
#endif
#ifndef Qnull_pointer
Lisp_Object Qnull_pointer;
diff -r ba0773838e57 -r 93f4f44ba25f src/syntax.c
--- a/src/syntax.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/syntax.c Mon Mar 29 15:46:27 2010 -0500
@@ -295,8 +295,8 @@
reset_buffer_syntax_cache_range (struct syntax_cache *cache,
Lisp_Object buffer, int infinite)
{
- Fset_marker (cache->start, make_int (1), buffer);
- Fset_marker (cache->end, make_int (1), buffer);
+ Fset_marker (cache->start, Qone, buffer);
+ Fset_marker (cache->end, Qone, buffer);
Fset_marker_insertion_type (cache->start, Qt);
Fset_marker_insertion_type (cache->end, Qnil);
/* #### Should we "cache->no_syntax_table_prop = 1;" here? */
@@ -683,13 +683,13 @@
Lisp_Object
syntax_match (Lisp_Object syntax_table, Ichar ch)
{
- Lisp_Object code = get_char_table_lisp (ch, syntax_table);
+ Lisp_Object code = get_char_table (ch, syntax_table);
Lisp_Object code2 = code;
if (CONSP (code))
code2 = XCAR (code);
if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit)
- code = get_char_table_lisp (ch, Vstandard_syntax_table);
+ code = get_char_table (ch, Vstandard_syntax_table);
return CONSP (code) ? XCDR (code) : Qnil;
}
@@ -2288,31 +2288,39 @@
*/
static int
-copy_to_mirrortab (Lisp_Object UNUSED (table), Ichar ch,
- Lisp_Object val, void *arg)
+copy_to_mirrortab (Lisp_Object UNUSED (table), Ichar from,
+ Ichar to, Lisp_Object val, void *arg)
{
Lisp_Object mirrortab = GET_LISP_FROM_VOID (arg);
if (CONSP (val))
val = XCAR (val);
if (SYNTAX_FROM_CODE (XINT (val)) != Sinherit)
- put_char_table (mirrortab, ch, val);
+ put_char_table (mirrortab, from, to, val);
return 0;
}
static int
-copy_if_not_already_present (Lisp_Object UNUSED (table), Ichar ch,
- Lisp_Object val, void *arg)
+copy_if_not_already_present (Lisp_Object UNUSED (table), Ichar from,
+ Ichar to, Lisp_Object val, void *arg)
{
Lisp_Object mirrortab = GET_LISP_FROM_VOID (arg);
+
if (CONSP (val))
val = XCAR (val);
if (SYNTAX_FROM_CODE (XINT (val)) != Sinherit)
{
- Lisp_Object existing = get_char_table_lisp_raw (ch, mirrortab);
- if (UNBOUNDP (existing))
- /* nothing at all */
- put_char_table (mirrortab, ch, val);
+ Ichar ch;
+ for (ch = from; ch <= to; ch++)
+ {
+ if (valid_ichar_p (ch))
+ {
+ Lisp_Object existing = get_char_table_raw (ch, mirrortab);
+ if (UNBOUNDP (existing))
+ /* nothing at all */
+ put_char_table (mirrortab, ch, ch, val);
+ }
+ }
}
return 0;
@@ -2334,12 +2342,14 @@
entries don't already exist in that table. (The copying step requires
another mapping.)
*/
- map_char_table (table, &range, copy_to_mirrortab, STORE_LISP_IN_VOID (mirrortab));
+ map_char_table (table, &range, copy_to_mirrortab,
+ STORE_LISP_IN_VOID (mirrortab));
/* second clause catches bootstrapping problems when initializing the
standard syntax table */
if (!EQ (table, Vstandard_syntax_table) && !NILP (Vstandard_syntax_table))
map_char_table (Vstandard_syntax_table, &range,
- copy_if_not_already_present, STORE_LISP_IN_VOID (mirrortab));
+ copy_if_not_already_present,
+ STORE_LISP_IN_VOID (mirrortab));
/* The resetting made the default be Qnil. Put it back to Sword. */
set_char_table_default (mirrortab, make_int (Sword));
XCHAR_TABLE (mirrortab)->dirty = 0;
diff -r ba0773838e57 -r 93f4f44ba25f src/syntax.h
--- a/src/syntax.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/syntax.h Mon Mar 29 15:46:27 2010 -0500
@@ -154,9 +154,9 @@
#ifdef MIRROR_TABLE
type_checking_assert (XCHAR_TABLE (table)->mirror_table_p);
update_mirror_syntax_if_dirty (table);
- return XINT (get_char_table_lisp_1 (c, table));
+ return XINT (get_char_table_1 (c, table));
#else
- code = get_char_table_lisp (c, table);
+ code = get_char_table (c, table);
/* #### It's possible this code will be time consuming because of getting
run in an inner-loop. But it's all inlined. */
@@ -165,7 +165,7 @@
code = XCAR (code);
if (SYNTAX_FROM_CODE (XINT (code)) == Sinherit)
{
- code = get_char_table_lisp (c, Vstandard_syntax_table);
+ code = get_char_table (c, Vstandard_syntax_table);
if (CONSP (code))
code = XCAR (code);
}
@@ -184,7 +184,7 @@
)
{
type_checking_assert (XCHAR_TABLE (table)->mirror_table_p);
- return (enum syntaxcode) XINT (get_char_table_lisp_1 (c, table));
+ return (enum syntaxcode) XINT (get_char_table_1 (c, table));
}
#endif /* NOT_WORTH_THE_EFFORT */
diff -r ba0773838e57 -r 93f4f44ba25f src/sysdep.c
--- a/src/sysdep.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/sysdep.c Mon Mar 29 15:46:27 2010 -0500
@@ -2075,7 +2075,7 @@
# ifdef TRY_AGAIN
if (! (hp == 0 && h_errno == TRY_AGAIN))
break;
- Fsleep_for (make_int (1));
+ Fsleep_for (Qone);
}
# endif
if (hp)
diff -r ba0773838e57 -r 93f4f44ba25f src/tests.c
--- a/src/tests.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/tests.c Mon Mar 29 15:46:27 2010 -0500
@@ -646,7 +646,7 @@
data.hash_table = make_lisp_hash_table (50, HASH_TABLE_NON_WEAK,
HASH_TABLE_EQUAL);
- Fputhash (make_int (1), make_int (2), data.hash_table);
+ Fputhash (Qone, make_int (2), data.hash_table);
Fputhash (make_int (3), make_int (4), data.hash_table);
data.sum = 0;
diff -r ba0773838e57 -r 93f4f44ba25f src/text.c
--- a/src/text.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/text.c Mon Mar 29 15:46:27 2010 -0500
@@ -1762,8 +1762,128 @@
return charset_codepoint_to_ichar (charset, c1, c2, CONVERR_FAIL);
}
+static Ichar
+old_mule_round_up_to_valid_ichar (int charpos)
+{
+ int i, dim;
+
+ /* Within a particular dimension (1 or 2), charset ID order corresponds
+ to character order. Furthermore, all dimension-1 characters are less
+ than all dimension-2 characters. This is not the case with charset
+ ID's, however. So we have to loop twice over the range of encodable
+ ID's, once per dimension. We could shorten things somewhat by using
+ more specific knowledge about exactly which ID's are assigned to which
+ dimensions, but that would be more fragile, and it's unlikely that
+ this way takes a significant amount of time as there are only 225 or
+ so possible encodable ID's. */
+ for (dim = 1; dim <= 2; dim++)
+ {
+ for (i = MIN_ENCODABLE_CHARSET_ID; i <= MAX_ENCODABLE_CHARSET_ID; i++)
+ {
+ Lisp_Object charset = charset_by_encodable_id (i);
+ if (!NILP (charset) && XCHARSET_DIMENSION (charset) == dim)
+ {
+ int l1, l2, h1, h2;
+ Ichar minchar, maxchar;
+ get_charset_limits (charset, &l1, &l2, &h1, &h2);
+ minchar = charset_codepoint_to_ichar (charset, l1, l2,
+ CONVERR_ABORT);
+ maxchar = charset_codepoint_to_ichar (charset, h1, h2,
+ CONVERR_ABORT);
+ /* Either we are between charsets, or in a gap within a
+ charset. */
+ if (i < minchar)
+ /* We are between charsets */
+ return minchar;
+ if (i < maxchar)
+ {
+ /* We are in a gap. The gaps aren't more than 34 characters
+ wide, so just move up till we find the end of the gap. */
+ while (!valid_ichar_p (charpos))
+ charpos++;
+ return charpos;
+ }
+ }
+ }
+ }
+
+ return -1;
+}
+
+static Ichar
+old_mule_round_down_to_valid_ichar (int charpos)
+{
+ int i, dim;
+
+ for (dim = 2; dim >= 1; dim--)
+ {
+ for (i = MAX_ENCODABLE_CHARSET_ID; i >= MIN_ENCODABLE_CHARSET_ID; i--)
+ {
+ Lisp_Object charset = charset_by_encodable_id (i);
+ if (!NILP (charset) && XCHARSET_DIMENSION (charset) == dim)
+ {
+ int l1, l2, h1, h2;
+ Ichar minchar, maxchar;
+ get_charset_limits (charset, &l1, &l2, &h1, &h2);
+ minchar = charset_codepoint_to_ichar (charset, l1, l2,
+ CONVERR_ABORT);
+ maxchar = charset_codepoint_to_ichar (charset, h1, h2,
+ CONVERR_ABORT);
+ if (i > maxchar)
+ return maxchar;
+ if (i > minchar)
+ {
+ while (!valid_ichar_p (charpos))
+ charpos--;
+ return charpos;
+ }
+ }
+ }
+ }
+
+ return -1;
+}
+
#endif /* not UNICODE_INTERNAL */
+/* Take a possibly invalid Ichar value (must be >= 0) and move upwards as
+ necessary until we find the first valid Ichar. Return -1 if we're above
+ all valid Ichars. */
+
+Ichar
+round_up_to_valid_ichar (int charpos)
+{
+ text_checking_assert (charpos >= 0);
+ if (valid_ichar_p (charpos))
+ return (Ichar) charpos;
+#ifdef UNICODE_INTERNAL
+ if (valid_unicode_surrogate (charpos))
+ return (Ichar) (LAST_UTF_16_SURROGATE + 1);
+ text_checking_assert (charpos > ICHAR_MAX);
+ return -1;
+#else
+ return old_mule_round_up_to_valid_ichar (charpos);
+#endif
+}
+
+/* Take a possibly invalid Ichar value (must be >= 0) and move downwards as
+ necessary until we find the first valid Ichar. */
+
+Ichar
+round_down_to_valid_ichar (int charpos)
+{
+ text_checking_assert (charpos >= 0);
+ if (valid_ichar_p (charpos))
+ return (Ichar) charpos;
+#ifdef UNICODE_INTERNAL
+ if (charpos > ICHAR_MAX)
+ return ICHAR_MAX;
+ text_checking_assert (valid_unicode_surrogate (charpos));
+ return (Ichar) (FIRST_UTF_16_SURROGATE - 1);
+#else
+ return old_mule_round_down_to_valid_ichar (charpos);
+#endif
+}
/* Convert a charset codepoint (guaranteed not to be ASCII) into a
character in the internal string representation. Return number
@@ -5899,7 +6019,7 @@
if (NILP (n) || EQ (n, Qzero))
return make_int (c1);
- else if (EQ (n, make_int (1)))
+ else if (EQ (n, Qone))
return make_int (c2);
else
invalid_constant ("Octet number must be 0 or 1", n);
diff -r ba0773838e57 -r 93f4f44ba25f src/text.h
--- a/src/text.h Thu Mar 25 19:16:54 2010 -0500
+++ b/src/text.h Mon Mar 29 15:46:27 2010 -0500
@@ -188,6 +188,40 @@
*/
/* ---------------------------------------------------------------------- */
+/* UTF-16 properties */
+/* ---------------------------------------------------------------------- */
+
+/* Assuming a Unicode codepoint is in range i.e. [0, 7FFFFFFF], does it
+ correspond to a UTF-16 surrogate, a UTF-16 leading surrogate, or a
+ UTF-16 trailing surrogate? Note that these are written to work properly
+ on any Unicode codepoint, not just those in the UTF-16 range. */
+
+#define valid_unicode_leading_surrogate(ch) (((ch) & 0x7FFFFC00) == 0xD800)
+#define valid_unicode_trailing_surrogate(ch) (((ch) & 0x7FFFFC00) == 0xDC00)
+#define valid_unicode_surrogate(ch) (((ch) & 0x7FFFF800) == 0xD800)
+
+#define FIRST_UTF_16_SURROGATE 0xD800
+#define LAST_UTF_16_SURROGATE 0xDFFF
+
+/* See the Unicode FAQ, http://www.unicode.org/faq/utf_bom.html#35 for this
+ algorithm.
+
+ (They also give another, really verbose one, as part of their explanation
+ of the various planes of the encoding, but we won't use that.) */
+
+#define UTF_16_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
+#define UTF_16_SURROGATE_OFFSET (0x10000 - (0xD800 << 10) - 0xDC00)
+
+#define utf_16_surrogates_to_code(lead, trail) \
+ (((lead) << 10) + (trail) + UTF_16_SURROGATE_OFFSET)
+
+#define CODE_TO_UTF_16_SURROGATES(codepoint, lead, trail) do { \
+ int __ctu16s_code = (codepoint); \
+ lead = UTF_16_LEAD_OFFSET + (__ctu16s_code >> 10); \
+ trail = 0xDC00 + (__ctu16s_code & 0x3FF); \
+} while (0)
+
+/* ---------------------------------------------------------------------- */
/* Validating Unicode code points */
/* ---------------------------------------------------------------------- */
@@ -214,20 +248,25 @@
valid_unicode_codepoint_p (EMACS_INT ch, enum unicode_allow allow)
)
{
+ if (ch < 0)
+ return 0;
if (allow == UNICODE_ALLOW_PRIVATE)
{
#if SIZEOF_EMACS_INT > 4
/* On 64-bit machines, we could have a value too large */
- return ch >= 0 && ch <= UNICODE_PRIVATE_MAX;
+ if (ch > UNICODE_PRIVATE_MAX)
+ return 0;
#else
- return ch >= 0;
+ DO_NOTHING;
#endif
}
else
{
text_checking_assert (allow == UNICODE_OFFICIAL_ONLY);
- return ch <= UNICODE_OFFICIAL_MAX && ch >= 0;
+ if (ch > UNICODE_OFFICIAL_MAX)
+ return 0;
}
+ return !valid_unicode_surrogate (ch);
}
#define ASSERT_VALID_UNICODE_CODEPOINT(code) \
@@ -271,32 +310,6 @@
#define unicode_error_octet_code_to_octet(code) \
((unsigned char) ((code) & 0xFF))
-
-/* ---------------------------------------------------------------------- */
-/* UTF-16 properties */
-/* ---------------------------------------------------------------------- */
-
-#define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800)
-#define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00)
-#define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800)
-
-/* See the Unicode FAQ, http://www.unicode.org/faq/utf_bom.html#35 for this
- algorithm.
-
- (They also give another, really verbose one, as part of their explanation
- of the various planes of the encoding, but we won't use that.) */
-
-#define UTF_16_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
-#define UTF_16_SURROGATE_OFFSET (0x10000 - (0xD800 << 10) - 0xDC00)
-
-#define utf_16_surrogates_to_code(lead, trail) \
- (((lead) << 10) + (trail) + UTF_16_SURROGATE_OFFSET)
-
-#define CODE_TO_UTF_16_SURROGATES(codepoint, lead, trail) do { \
- int __ctu16s_code = (codepoint); \
- lead = UTF_16_LEAD_OFFSET + (__ctu16s_code >> 10); \
- trail = 0xDC00 + (__ctu16s_code & 0x3FF); \
-} while (0)
/****************************************************************************/
@@ -508,6 +521,19 @@
MODULE_API int old_mule_non_ascii_valid_ichar_p (Ichar ch);
#endif
+/* Ichar is defined to be a 32-bit integer. However, non-negative Ichar
+ values need to be storable as a Lisp character, which is unsigned. If
+ we have 64-bit EMACS_INTs, then we have 62 bits available to hold a
+ character, more than enough to hold the 31 bits of nonnegativeness
+ available in a 32-bit integer. However, if we have 32-bit EMACS_INTs,
+ then we have only 30 bits available to hold a character. so Ichars have
+ to be restricted to 30 bits of nonnegativeness. */
+#if SIZEOF_EMACS_INT > 4
+#define ICHAR_MAX INT_32_BIT_MAX
+#else
+#define ICHAR_MAX 0x3FFFFFFF
+#endif
+
/* Return whether the given Ichar is valid.
*/
@@ -517,7 +543,8 @@
)
{
#ifdef UNICODE_INTERNAL
- return valid_unicode_codepoint_p ((EMACS_INT) ch, UNICODE_ALLOW_PRIVATE);
+ return ch <= ICHAR_MAX &&
+ valid_unicode_codepoint_p ((EMACS_INT) ch, UNICODE_ALLOW_PRIVATE);
#else
return (! (ch & ~0xFF)) || old_mule_non_ascii_valid_ichar_p (ch);
#endif /* UNICODE_INTERNAL */
@@ -788,6 +815,9 @@
}
#endif /* (not) defined (UNICODE_INTERNAL) */
}
+
+Ichar round_up_to_valid_ichar (int charpos);
+Ichar round_down_to_valid_ichar (int charpos);
/************************************************************************/
/* Unicode conversion */
diff -r ba0773838e57 -r 93f4f44ba25f src/unicode.c
--- a/src/unicode.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/unicode.c Mon Mar 29 15:46:27 2010 -0500
@@ -1298,7 +1298,7 @@
Vcurrent_jit_charset = Fmake_charset
(intern (setname), Vcharset_descr,
- nconc2 (list6 (Qcolumns, make_int (1), Qchars,
+ nconc2 (list6 (Qcolumns, Qone, Qchars,
make_int (96),
Qdimension, make_int (2)),
list4 (Qregistries, Qunicode_registries,
@@ -2875,10 +2875,13 @@
case 5: invalid = data->ch < 0x200000; break;
case 6: invalid = data->ch < 0x4000000; break;
}
- if (invalid || valid_utf_16_surrogate (data->ch) ||
- /* We accept values above #x10FFFF in
- escape-quoted, though not in UTF-8. */
- (!allow_private && data->ch > UNICODE_OFFICIAL_MAX))
+ /* We accept values above #x10FFFF in
+ escape-quoted, though not in UTF-8. */
+ if (invalid ||
+ !valid_unicode_codepoint_p (data->ch,
+ allow_private ?
+ UNICODE_ALLOW_PRIVATE :
+ UNICODE_OFFICIAL_ONLY))
{
indicate_invalid_utf_8 (data->indicated_length,
data->counter,
@@ -3115,7 +3118,7 @@
{
int tempch = ch;
- if (valid_utf_16_first_surrogate (ch))
+ if (valid_unicode_leading_surrogate (ch))
continue;
ch = 0;
counter = 0;
@@ -3127,7 +3130,7 @@
if (little_endian)
{
- if (!valid_utf_16_last_surrogate (ch >> 16))
+ if (!valid_unicode_trailing_surrogate (ch >> 16))
{
UNICODE_DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
ignore_bom);
@@ -3148,7 +3151,7 @@
}
else
{
- if (!valid_utf_16_last_surrogate (ch & 0xFFFF))
+ if (!valid_unicode_trailing_surrogate (ch & 0xFFFF))
{
UNICODE_DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst,
data, ignore_bom);
diff -r ba0773838e57 -r 93f4f44ba25f src/window.c
--- a/src/window.c Thu Mar 25 19:16:54 2010 -0500
+++ b/src/window.c Mon Mar 29 15:46:27 2010 -0500
@@ -3894,9 +3894,9 @@
w = XWINDOW (window);
w->hscroll = 0;
w->modeline_hscroll = 0;
- set_marker_restricted (w->start[CURRENT_DISP], make_int (1), buf);
- set_marker_restricted (w->pointm[CURRENT_DISP], make_int (1), buf);
- set_marker_restricted (w->sb_point, make_int (1), buf);
+ set_marker_restricted (w->start[CURRENT_DISP], Qone, buf);
+ set_marker_restricted (w->pointm[CURRENT_DISP], Qone, buf);
+ set_marker_restricted (w->sb_point, Qone, buf);
}
}
@@ -5769,7 +5769,7 @@
{
Lisp_Object fb = Qnil;
#ifdef HAVE_TTY
- fb = Fcons (Fcons (list1 (Qtty), make_int (1)), fb);
+ fb = Fcons (Fcons (list1 (Qtty), Qone), fb);
#endif
#ifdef HAVE_GTK
fb = Fcons (Fcons (list1 (Qgtk), make_int (3)), fb);
More information about the XEmacs-Patches
mailing list