APPROVE COMMIT 21.5
Revision 5543:fbe90e6f7a43 "Initialize start and end properly (to new
markers, not Qnil)" introduced a bad performance regression, resulting
in map_extents (a fairly expensive function) being called ~15,000,000
times instead of ~100,000 times while fontifying python-mode.el.
xemacs -vanilla -eval '(find-lib "python-mode")'
-eval "(profile-command 'font-lock-fontify-buffer)"
to see the evil for yourself. :-/
All versions up to 5551:40a52efbf3a3 "Reflect change of location of
packages to share/" are affected. I recommend you avoid using those
versions if possible. If you must use one and run into performance
problems, setting `lookup-syntax-properties' to nil will probably help
performance (untested) but will definitely break cc-mode and cperl.
The attached patch fixes the issue, and due to the performance
regression, I'm going to commit it now. There's a bunch of other
stuff in there, refactoring and docs etc. Sorry about that, but I
needed to do most of the refactoring to figure out what was going
wrong.
FWIW, I'm fairly sure that the hunk "@@ -344,22 +381,37 @@" is what
actually fixes the regression.
diff -r 40a52efbf3a3 src/ChangeLog
--- a/src/ChangeLog Sun Aug 14 13:51:14 2011 +0200
+++ b/src/ChangeLog Tue Aug 23 04:32:35 2011 +0900
@@ -1,3 +1,41 @@
+2011-08-23 Stephen Turnbull <stephen(a)xemacs.org>
+
+ Fix performance regression in refactored syntax cache setup.
+ More doc improvements.
+
+ * syntax.h (enum syntax_source):
+ New. Specify whether syntax is from property or buffer.
+ (struct syntax_cache):
+ Use enum syntax_source source, instead of no_syntax_table_prop
+ and use_code.
+ Improve comments.
+ (SOURCE_IS_TABLE):
+ New predicate.
+ (SYNTAX_CODE_FROM_CACHE):
+ Use it instead of use_code, and adjust logic.
+
+ * syntax.c (syntax_cache_table_was_changed):
+ Check cache->source (cache->no_syntax_table_prop is gone).
+ (reset_syntax_cache_range):
+ All information about OBJECT and BUFFER is in CACHE already.
+ Also reset markers in OBJECT if it is a buffer.
+ Rename INFINITE to VALID_EVERYWHERE.
+ (init_syntax_cache):
+ Initialize source (cache->no_syntax_table_prop is gone).
+ Maybe initialize start and end to null markers.
+ Initialize cache range with reset_syntax_cache_range.
+ (update_syntax_cache):
+ Use source instead of no_syntax_table_prop and use_code.
+ (setup_syntax_cache):
+ Add header comment. Improve other comments.
+ Make calls to reset_syntax_cache_range and init_syntax_cache match
+ their prototypes.
+ (init_buffer_syntax_cache):
+ Use init_syntax_cache to do the work.
+ (signal_syntax_cache_extent_changed):
+ Make call to reset_syntax_cache_range match its prototype.
+ Improve local variable naming.
+
2011-08-08 Stephen J. Turnbull <stephen(a)xemacs.org>
* syntax.c (update_syntax_cache):
diff -r 40a52efbf3a3 src/syntax.c
--- a/src/syntax.c Sun Aug 14 13:51:14 2011 +0200
+++ b/src/syntax.c Tue Aug 23 04:32:35 2011 +0900
@@ -273,7 +273,7 @@
syntax_cache_table_was_changed (struct buffer *buf)
{
struct syntax_cache *cache = buf->syntax_cache;
- if (cache->no_syntax_table_prop)
+ if (cache->source == syntax_source_buffer_table)
{
cache->syntax_table =
BUFFER_SYNTAX_TABLE (buf);
@@ -284,59 +284,96 @@
static void
reset_syntax_cache_range (struct syntax_cache *cache, /* initialized cache */
- Lisp_Object object) /* string or buffer */
+ int valid_everywhere) /* non-zero if we can assume
+ syntax-table properties
+ never need be respected
+ in the life of the cache */
{
- /* reinitialize cache parameters */
- if (BUFFERP (object))
+ if (BUFFERP (cache->object))
{
/* make known region zero-length and reset insertion behavior */
- Fset_marker (cache->start, make_int (1), object);
- Fset_marker (cache->end, make_int (1), object);
+ Fset_marker (cache->start, make_int (1), cache->object);
+ Fset_marker (cache->end, make_int (1), cache->object);
Fset_marker_insertion_type (cache->start, Qnil);
Fset_marker_insertion_type (cache->end, Qt);
}
- else
+ /* #### Should reset "cache->source" here?
+ If so, also reset tables. */
+ if (valid_everywhere)
{
- /* invalidate the known region markers */
- Fset_marker (cache->start, Qnil, Qnil);
- Fset_marker (cache->end, Qnil, Qnil);
+ cache->prev_change = EMACS_INT_MIN;
+ cache->next_change = EMACS_INT_MAX;
}
- cache->no_syntax_table_prop = 1;
- if (lookup_syntax_properties)
+ else /* valid nowhere */
{
cache->prev_change = -1;
cache->next_change = -1;
}
- else
- {
- cache->prev_change = EMACS_INT_MIN;
- cache->next_change = EMACS_INT_MAX;
- }
}
-/* init_syntax_cache
- Arguments:
- cache: pointer to a zero-ed struct syntax_cache
- object: a Lisp string or buffer
- buffer: NULL or the struct buffer of buffer */
static void
-init_syntax_cache (struct syntax_cache *cache, /* cache must be zero'ed */
+init_syntax_cache (struct syntax_cache *cache, /* xzero'ed memory */
Lisp_Object object, /* string or buffer */
- struct buffer *buffer) /* may not be NULL */
+ struct buffer *buffer) /* buffer; if OBJECT is a
+ buffer, this is the same */
{
- /* initialize cache resources */
cache->object = object;
cache->buffer = buffer;
+
+ cache->source = syntax_source_buffer_table;
cache->syntax_table =
BUFFER_SYNTAX_TABLE (cache->buffer);
cache->mirror_table =
BUFFER_MIRROR_SYNTAX_TABLE (cache->buffer);
- cache->start = Fmake_marker();
- cache->end = Fmake_marker();
+
+ /* Qnil avoids GC'ing markers, which are useless for strings. */
+ cache->start = BUFFERP (object) ? Fmake_marker () : Qnil;
+ cache->end = BUFFERP (object) ? Fmake_marker () : Qnil;
+
+ reset_syntax_cache_range (cache, 0);
}
/* external syntax cache API */
+/* At this time (hg rev 5551:dab422055bab) setup_syntax_cache() is called
+ directly once in setup_buffer_syntax_cache and twice in regex.c. The
+ calls in regex.c are obfuscated, so it's hard to tell, but it looks like
+ they can be called with OBJECT being a buffer.
+
+ "You are in a confusing maze of initializations, all alike."
+
+ reset_syntax_cache_range (3 uses in setup_syntax_cache,
+ signal_syntax_cache_extent_changed, and init_buffer_syntax_cache)
+ reinitializes:
+ 1. if BUFFERP(OBJECT), marker positions to 1 (giving a null range)
+ 2. if BUFFERP(OBJECT), marker movement type
+ 3. cache range per VALID_EVERYWHERE
+
+ init_syntax_cache (2 uses in init_buffer_syntax_cache and
+ setup_syntax_cache) initializes:
+ 1. source to syntax_source_buffer_table
+ 2. syntax_table and mirror_syntax table to BUFFER's tables
+ 3. marker members to BUFFERP(OBJECT) ? markers w/o position : Qnil
+ 4. cache range with VALID_EVERYWHERE==0
+ 5. object and buffer to corresponding arguments.
+
+ init_buffer_syntax_cache (1 use in buffer.c) initializes:
+ 0. allocation of buffer's cache memory (done by allocator)
+ 1. cache memory to zero (done by allocator)
+ 2. cache to buffer's cache
+ 3. cache members by init_syntax_cache with object==buffer==BUF.
+
+ setup_buffer_syntax_cache (1 call in font-lock.c, 1 use in search.c,
+ and 7 uses in this file) initializes:
+ 0. buffer's syntax cache by calling setup_syntax_cache.
+
+ setup_buffer_syntax_cache and setup_syntax_cache are called by functions
+ that analyze text using character syntax. They are called repeatedly on
+ the same cache. init_syntax_cache and init_buffer_syntax_cache are
+ conceptually called once for each cache. reset_syntax_cache_range may
+ be called repeatedly on the same cache. The last three are for internal
+ use by the syntax setup code and buffer initialization. */
+
struct syntax_cache * /* return CACHE or the cache of OBJECT */
setup_syntax_cache (struct syntax_cache *cache, /* may be NULL only if
OBJECT is a buffer */
@@ -344,22 +381,37 @@
is associated with */
struct buffer *buffer, /* the buffer to use as source
of the syntax table */
- Charxpos UNUSED (from), /* initial position of cache */
- int UNUSED (count)) /* direction? see code */
+ Charxpos from, /* initial position of cache */
+ int count) /* direction? see code */
{
- /* If OBJECT is a buffer, use its cache, otherwise use CACHE.
- Initialize CACHE. Invalidate the cache if the syntax-table property is
- being respected, otherwise make it valid for the whole object. */
+ /* If OBJECT is a buffer, use its cache; else use CACHE and initialize it.
+ Invalidate the cache if the syntax-table property is being respected;
+ else make it valid for the whole object. */
if (BUFFERP (object))
{
cache = XBUFFER (object)->syntax_cache;
+ if (!lookup_syntax_properties)
+ reset_syntax_cache_range (cache, 1);
}
else
{
xzero (*cache);
init_syntax_cache (cache, object, buffer);
}
- reset_syntax_cache_range (cache, object);
+
+ if (lookup_syntax_properties)
+ {
+ if (count <= 0)
+ {
+ --from;
+ from = buffer_or_string_clip_to_accessible_char (cache->object,
+ from);
+ }
+ /* If lookup_syntax_properties && BUFFERP (object), this
+ optimization may matter. */
+ if (!(from >= cache->prev_change && from <
cache->next_change))
+ update_syntax_cache (cache, from, count);
+ }
#ifdef NOT_WORTH_THE_EFFORT
update_mirror_syntax_if_dirty (cache->mirror_table);
@@ -454,24 +506,21 @@
if (!NILP (Fsyntax_table_p (tmp_table)))
{
- cache->use_code = 0;
+ cache->source = syntax_source_property_table;
cache->syntax_table = tmp_table;
cache->mirror_table = XCHAR_TABLE (tmp_table)->mirror_table;
- cache->no_syntax_table_prop = 0;
#ifdef NOT_WORTH_THE_EFFORT
update_mirror_syntax_if_dirty (cache->mirror_table);
#endif /* NOT_WORTH_THE_EFFORT */
}
else if (CONSP (tmp_table) && INTP (XCAR (tmp_table)))
{
- cache->use_code = 1;
+ cache->source = syntax_source_property_code;
cache->syntax_code = XINT (XCAR (tmp_table));
- cache->no_syntax_table_prop = 0;
}
else
{
- cache->use_code = 0;
- cache->no_syntax_table_prop = 1;
+ cache->source = syntax_source_buffer_table;
cache->syntax_table = BUFFER_SYNTAX_TABLE (cache->buffer);
cache->mirror_table = BUFFER_MIRROR_SYNTAX_TABLE (cache->buffer);
#ifdef NOT_WORTH_THE_EFFORT
@@ -501,14 +550,15 @@
void
init_buffer_syntax_cache (struct buffer *buf)
{
+ struct syntax_cache *cache;
#ifdef NEW_GC
- buf->syntax_cache = XSYNTAX_CACHE (ALLOC_NORMAL_LISP_OBJECT (syntax_cache));
+ cache = XSYNTAX_CACHE (ALLOC_NORMAL_LISP_OBJECT (syntax_cache));
#else /* not NEW_GC */
- buf->syntax_cache = xnew_and_zero (struct syntax_cache);
+ cache = xnew_and_zero (struct syntax_cache);
#endif /* not NEW_GC */
- init_syntax_cache (buf->syntax_cache, wrap_buffer(buf), buf);
- reset_syntax_cache_range (buf->syntax_cache, wrap_buffer(buf));
+ init_syntax_cache (cache, wrap_buffer (buf), buf);
+ buf->syntax_cache = cache;
}
/* finalize the syntax cache for BUF */
@@ -535,18 +585,18 @@
void
signal_syntax_cache_extent_changed (EXTENT extent)
{
- Lisp_Object buffer = Fextent_object (wrap_extent (extent));
- if (BUFFERP (buffer))
+ Lisp_Object object = Fextent_object (wrap_extent (extent));
+ if (BUFFERP (object))
{
- struct syntax_cache *cache = XBUFFER (buffer)->syntax_cache;
- Bytexpos start = extent_endpoint_byte (extent, 0);
- Bytexpos end = extent_endpoint_byte (extent, 1);
- Bytexpos start2 = byte_marker_position (cache->start);
- Bytexpos end2 = byte_marker_position (cache->end);
+ struct syntax_cache *cache = XBUFFER (object)->syntax_cache;
+ Bytexpos extent_start = extent_endpoint_byte (extent, 0);
+ Bytexpos extent_end = extent_endpoint_byte (extent, 1);
+ Bytexpos cache_start = byte_marker_position (cache->start);
+ Bytexpos cache_end = byte_marker_position (cache->end);
/* If the extent is entirely before or entirely after the cache
range, it doesn't overlap. Otherwise, invalidate the range. */
- if (!(end < start2 || start > end2))
- reset_syntax_cache_range (cache, buffer);
+ if (!(extent_end < cache_start || extent_start > cache_end))
+ reset_syntax_cache_range (cache, 0);
}
}
diff -r 40a52efbf3a3 src/syntax.h
--- a/src/syntax.h Sun Aug 14 13:51:14 2011 +0200
+++ b/src/syntax.h Tue Aug 23 04:32:35 2011 +0900
@@ -234,20 +234,21 @@
#### sjt sez: I'm not sure I believe that last claim. That seems to
require that we use directional information, etc, but that is ignored in
the current implementation. */
+
+enum syntax_source { syntax_source_property_code = 0,
+ syntax_source_property_table = 1,
+ syntax_source_buffer_table = 2 };
+#define SOURCE_IS_TABLE(source) (source)
+
struct syntax_cache
{
#ifdef NEW_GC
NORMAL_LISP_OBJECT_HEADER header;
#endif /* NEW_GC */
- int use_code; /* Non-zero if a syntax-table property
- specified a syntax code. When zero, the
- syntax_code member is invalid. Otherwise
- the syntax_table member is invalid. */
- int no_syntax_table_prop; /* If non-zero, there was no `syntax-table'
- property on the current range, and so we're
- using the buffer's syntax table.
- Then we must invalidate the cache if the
- buffer's syntax table is changed. */
+ enum syntax_source source; /* Source of syntax information: the buffer's
+ syntax table, a syntax table specified by
+ a syntax-table property, or a syntax code
+ specified by a syntax-table property. */
Lisp_Object object; /* The buffer or string the current syntax
cache applies to, or Qnil for a string of
text not coming from a buffer or string. */
@@ -260,6 +261,7 @@
Lisp_Object mirror_table; /* Mirror table for this table. */
Lisp_Object start, end; /* Markers to keep track of the known region
in a buffer.
+ Both are Qnil if object is a string.
Normally these correspond to prev_change
and next_change, respectively, except when
insertions and deletions occur. Then
@@ -269,7 +271,8 @@
We'd like to use an extent, but it seems
that having an extent over the entire
buffer causes serious slowdowns in extent
- operations! Yuck! */
+ operations! Yuck!
+ #### May not be true any more. */
Charxpos next_change; /* Position of the next extent change. */
Charxpos prev_change; /* Position of the previous extent change. */
};
@@ -341,9 +344,10 @@
#define SYNTAX_FROM_CACHE(cache, c) \
SYNTAX_FROM_CODE (SYNTAX_CODE_FROM_CACHE (cache, c))
-#define SYNTAX_CODE_FROM_CACHE(cache, c) \
- ((cache)->use_code ? (cache)->syntax_code \
-: SYNTAX_CODE ((cache)->mirror_table, c))
+#define SYNTAX_CODE_FROM_CACHE(cache, c) \
+ (SOURCE_IS_TABLE ((cache)->source) \
+ ? SYNTAX_CODE ((cache)->mirror_table, c) \
+: (cache)->syntax_code)
#ifdef NOT_WORTH_THE_EFFORT
/* If we really cared about the theoretical performance hit of the dirty
@@ -356,9 +360,10 @@
functions and could execute arbitrary Lisp very easily), etc. The QUIT
problem is the biggest one, probably, and one of the main reasons it's
probably just not worth it. */
-#define SYNTAX_CODE_FROM_CACHE(cache, c) \
- ((cache)->use_code ? (cache)->syntax_code \
-: SYNTAX_CODE_1 ((cache)->mirror_table, c))
+#define SYNTAX_CODE_FROM_CACHE(cache, c) \
+ (SOURCE_IS_TABLE ((cache)->source) \
+ ? SYNTAX_CODE_1 ((cache)->mirror_table, c) \
+: (cache)->syntax_code)
#endif
_______________________________________________
XEmacs-Beta mailing list
XEmacs-Beta(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-beta