>>>> "Martin" == Martin Stjernholm
<mast(a)lysator.liu.se> writes:
Martin> There seems to be a case where stale values are returned
Martin> by match-beginning, match-end and match-string. [...]
Martin> I've verified this in XEmacs 21.4 (patch 6).
>>>> "sjt" == Stephen J Turnbull
<stephen(a)xemacs.org> writes:
sjt> Also in 21.4.9. Could someone check in 21.5?
sjt> Initialization could be done in search.c compile_pattern, but
sjt> I don't know if this is the "right" place for it yet.
It wasn't; XEmacs optimizes the regexp compilation away for trivial
regexps and regexps found in a cache. This patch fixes the problem
for me, and adds a case to the regression test suite.
This patch is informational. It will be committed later as part of a
larger revision of the syntax, search, and regexp modules.
Index: src/ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/ChangeLog,v
retrieving revision 1.290.2.28.2.2
diff -u -U0 -r1.290.2.28.2.2 ChangeLog
--- src/ChangeLog 30 Aug 2002 02:55:46 -0000 1.290.2.28.2.2
+++ src/ChangeLog 9 Sep 2002 11:10:17 -0000
@@ -0,0 +1,11 @@
+2002-09-09 Stephen J. Turnbull <stephen(a)xemacs.org>
+
+ * search.c (clear_unused_search_regs): New static function.
+ (search_buffer):
+ (simple_search):
+ (boyer_moore):
+ Use it. Fixes "stale match data" bug reported by Martin Stjernholm.
+ Minor clarifications in comments.
+
+ * regex.c (re_match_2_internal): Ensure no stale submatches.
+
Index: src/regex.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/regex.c,v
retrieving revision 1.25.2.2
diff -u -r1.25.2.2 regex.c
--- src/regex.c 17 May 2001 13:37:45 -0000 1.25.2.2
+++ src/regex.c 9 Sep 2002 10:58:21 -0000
@@ -4716,16 +4716,24 @@
= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
}
}
-
- /* If the regs structure we return has more elements than
- were in the pattern, set the extra elements to -1. If
- we (re)allocated the registers, this is the case,
- because we always allocate enough to have at least one
- -1 at the end. */
- for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
- regs->start[mcnt] = regs->end[mcnt] = -1;
} /* regs && !bufp->no_sub */
+ /* If we have regs and the regs structure has more elements than
+ were in the pattern, set the extra elements to -1. If we
+ (re)allocated the registers, this is the case, because we
+ always allocate enough to have at least one -1 at the end.
+
+ We do this even when no_sub is set because some applications
+ (XEmacs) reuse register structures which may contain stale
+ information, and permit attempts to access those registers.
+
+ It would be possible to require the caller to do this, but we'd
+ have to change the API for this function to reflect that, and
+ audit all callers. */
+ if (regs && regs->num_regs > 0)
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
nfailure_points_pushed, nfailure_points_popped,
nfailure_points_pushed - nfailure_points_popped);
Index: src/search.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/search.c,v
retrieving revision 1.17.2.3
diff -u -r1.17.2.3 search.c
--- src/search.c 20 Aug 2002 11:38:08 -0000 1.17.2.3
+++ src/search.c 9 Sep 2002 10:58:22 -0000
@@ -108,6 +108,7 @@
Lisp_Object Vskip_chars_range_table;
static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
+static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
/* #### according to comment in 21.5, unnecessary */
static void save_search_regs (void);
static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
@@ -1173,10 +1174,11 @@
if (len == 0)
{
set_search_regs (buf, bufpos, 0);
+ clear_unused_search_regs (&search_regs, 0);
return bufpos;
}
- /* Searching 0 times means don't move. */
+ /* Searching 0 times means noop---don't move, don't touch registers. */
if (n == 0)
return bufpos;
@@ -1223,6 +1224,8 @@
search_regs.start[i] += j;
search_regs.end[i] += j;
}
+ /* re_match (called from re_search et al) does this for us */
+ /* clear_unused_search_regs (search_regs, bufp->no_sub); */
XSETBUFFER (last_thing_searched, buf);
/* Set pos to the new position. */
pos = search_regs.start[0];
@@ -1260,6 +1287,8 @@
search_regs.start[i] += j;
search_regs.end[i] += j;
}
+ /* re_match (called from re_search et al) does this for us */
+ /* clear_unused_search_regs (search_regs, bufp->no_sub); */
XSETBUFFER (last_thing_searched, buf);
/* Set pos to the new position. */
pos = search_regs.end[0];
@@ -1460,6 +1489,7 @@
end = bytind_to_bufpos (buf, idx + buf_len);
}
set_search_regs (buf, beg, end - beg);
+ clear_unused_search_regs (&search_regs, 0);
return retval;
}
@@ -1821,6 +1851,7 @@
Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
set_search_regs (buf, bufstart, bufend - bufstart);
+ clear_unused_search_regs (&search_regs, 0);
}
if ((n -= direction) != 0)
@@ -1910,6 +1941,7 @@
Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
set_search_regs (buf, bufstart, bufend - bufstart);
+ clear_unused_search_regs (&search_regs, 0);
}
if ((n -= direction) != 0)
@@ -1929,8 +1961,8 @@
return bytind_to_bufpos (buf, pos);
}
-/* Record beginning BEG and end BEG + LEN
- for a match just found in the current buffer. */
+/* Record the whole-match data (beginning BEG and end BEG + LEN) and the
+ buffer for a match just found. */
static void
set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
@@ -1948,6 +1980,24 @@
search_regs.start[0] = beg;
search_regs.end[0] = beg + len;
XSETBUFFER (last_thing_searched, buf);
+}
+
+/* Clear unused search registers so match data will be null.
+ REGP is a pointer to the register structure to clear, usually the global
+ search_regs.
+ NO_SUB is the number of subexpressions to allow for. (Does not count
+ the whole match, ie, for a string search NO_SUB == 0.)
+ It is an error if NO_SUB > REGP.num_regs - 1. */
+
+static void
+clear_unused_search_regs (struct re_registers *regp, int no_sub)
+{
+ /* This function has been Mule-ized. */
+ int i;
+
+ assert (no_sub >= 0 && no_sub < regp->num_regs);
+ for (i = no_sub + 1; i < regp->num_regs; i++)
+ regp->start[i] = regp->end[i] = -1;
}
Index: tests/ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/ChangeLog,v
retrieving revision 1.2.2.15
diff -u -r1.2.2.15 ChangeLog
--- tests/ChangeLog 23 Aug 2002 16:44:41 -0000 1.2.2.15
+++ tests/ChangeLog 9 Sep 2002 10:58:22 -0000
@@ -0,0 +1,8 @@
+2002-09-09 Stephen J. Turnbull <stephen(a)xemacs.org>
+
+ * automated/regexp-tests.el: Add test for stale subexpr match-data.
+ Thanks to Martin Stjernholm for the report.
+
+ * automated/syntax-tests.el: Conditionalize syntax-table property
+ tests on feature. Enable feature if present.
+
Index: tests/automated/regexp-tests.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/automated/regexp-tests.el,v
retrieving revision 1.2
diff -u -r1.2 regexp-tests.el
--- tests/automated/regexp-tests.el 12 Apr 2001 18:24:55 -0000 1.2
+++ tests/automated/regexp-tests.el 9 Sep 2002 10:58:22 -0000
@@ -213,3 +213,18 @@
(looking-at "Unmatchable text")
(replace-match "")
(Assert (looking-at "^buffer.$")))
+
+;; Test that trivial regexps reset unused registers
+;; Thanks to Martin Sternholm for the report.
+;; xemacs-beta <5blm6h2ki5.fsf(a)lister.roxen.com>
+(with-temp-buffer
+ (insert "ab")
+ (goto-char (point-min))
+ (re-search-forward "\\(a\\)")
+ ;; test the whole-match data, too -- one try scotched that, too!
+ (Assert (string= (match-string 0) "a"))
+ (Assert (string= (match-string 1) "a"))
+ (re-search-forward "b")
+ (Assert (string= (match-string 0) "b"))
+ (Assert (string= (match-string 1) nil)))
+
Index: tests/automated/syntax-tests.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/automated/syntax-tests.el,v
retrieving revision 1.2
diff -u -r1.2 syntax-tests.el
--- tests/automated/syntax-tests.el 12 Apr 2001 18:24:55 -0000 1.2
+++ tests/automated/syntax-tests.el 9 Sep 2002 10:58:22 -0000
@@ -107,16 +107,20 @@
;; <apply-pos> can be in the form (start . end), or can be a
;; character position.
(defun test-syntax-table (string apply-pos apply-syntax stop)
- (goto-char (point-max))
- (unless (consp apply-pos)
- (setq apply-pos `(,apply-pos . ,(+ 1 apply-pos))))
- (let ((point (point)))
- (insert string)
- (put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos))
- 'syntax-table apply-syntax)
- (goto-char point)
- (forward-word 1)
- (Assert (eq (point) (+ point stop)))))
+ ;; We don't necessarily have syntax-table properties ...
+ (when (fboundp 'lookup-syntax-properties) ; backwards compatible kludge
+ ;; ... and they may not be enabled by default if we do.
+ (setq lookup-syntax-properties t)
+ (goto-char (point-max))
+ (unless (consp apply-pos)
+ (setq apply-pos `(,apply-pos . ,(+ 1 apply-pos))))
+ (let ((point (point)))
+ (insert string)
+ (put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos))
+ 'syntax-table apply-syntax)
+ (goto-char point)
+ (forward-word 1)
+ (Assert (eq (point) (+ point stop))))))
;; test syntax-table extents
(with-temp-buffer
@@ -126,6 +130,8 @@
(test-syntax-table "W." 1 `(,(syntax-string-to-code "w")) 2))
;; Test forward-comment at buffer boundaries
+;; #### The second Assert fails (once interpreted, once compiled) on 21.4.9
+;; with sjt's version of Andy's syntax-text-property-killer patch.
(with-temp-buffer
(c-mode)
(insert "// comment\n")
--
Institute of Policy and Planning Sciences
http://turnbull.sk.tsukuba.ac.jp
University of Tsukuba Tennodai 1-1-1 Tsukuba 305-8573 JAPAN
My nostalgia for Icon makes me forget about any of the bad things. I don't
have much nostalgia for Perl, so its faults I remember. Scott Gilbert c.l.py