>>>> "Martin" == Martin Stjernholm
<mast(a)lysator.liu.se> writes: 
    Martin> There seems to be a case where stale values are returned
    Martin> by match-beginning, match-end and match-string.  [...]
    Martin> I've verified this in XEmacs 21.4 (patch 6).
>>>> "sjt" == Stephen J Turnbull
<stephen(a)xemacs.org> writes: 
    sjt> Also in 21.4.9.  Could someone check in 21.5?
    sjt> Initialization could be done in search.c compile_pattern, but
    sjt> I don't know if this is the "right" place for it yet.
It wasn't; XEmacs optimizes the regexp compilation away for trivial
regexps and regexps found in a cache.  This patch fixes the problem
for me, and adds a case to the regression test suite.
This patch is informational.  It will be committed later as part of a
larger revision of the syntax, search, and regexp modules.
Index: src/ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/ChangeLog,v
retrieving revision 1.290.2.28.2.2
diff -u -U0 -r1.290.2.28.2.2 ChangeLog
--- src/ChangeLog	30 Aug 2002 02:55:46 -0000	1.290.2.28.2.2
+++ src/ChangeLog	9 Sep 2002 11:10:17 -0000
@@ -0,0 +1,11 @@
+2002-09-09  Stephen J. Turnbull  <stephen(a)xemacs.org>
+
+	* search.c (clear_unused_search_regs): New static function.
+	(search_buffer): 
+	(simple_search): 
+	(boyer_moore):
+	Use it.  Fixes "stale match data" bug reported by Martin Stjernholm.
+	Minor clarifications in comments.
+
+	* regex.c (re_match_2_internal): Ensure no stale submatches.
+
Index: src/regex.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/regex.c,v
retrieving revision 1.25.2.2
diff -u -r1.25.2.2 regex.c
--- src/regex.c	17 May 2001 13:37:45 -0000	1.25.2.2
+++ src/regex.c	9 Sep 2002 10:58:21 -0000
@@ -4716,16 +4716,24 @@
 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
                     }
 		}
-
-              /* If the regs structure we return has more elements than
-                 were in the pattern, set the extra elements to -1.  If
-                 we (re)allocated the registers, this is the case,
-                 because we always allocate enough to have at least one
-                 -1 at the end.  */
-              for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
-                regs->start[mcnt] = regs->end[mcnt] = -1;
 	    } /* regs && !bufp->no_sub */
 
+	  /* If we have regs and the regs structure has more elements than
+             were in the pattern, set the extra elements to -1.  If we
+	     (re)allocated the registers, this is the case, because we
+	     always allocate enough to have at least one -1 at the end.
+
+	     We do this even when no_sub is set because some applications
+             (XEmacs) reuse register structures which may contain stale
+	     information, and permit attempts to access those registers.
+
+	     It would be possible to require the caller to do this, but we'd
+	     have to change the API for this function to reflect that, and
+	     audit all callers. */
+	  if (regs && regs->num_regs > 0)
+	    for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+	      regs->start[mcnt] = regs->end[mcnt] = -1;
+
           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
                         nfailure_points_pushed, nfailure_points_popped,
                         nfailure_points_pushed - nfailure_points_popped);
Index: src/search.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/search.c,v
retrieving revision 1.17.2.3
diff -u -r1.17.2.3 search.c
--- src/search.c	20 Aug 2002 11:38:08 -0000	1.17.2.3
+++ src/search.c	9 Sep 2002 10:58:22 -0000
@@ -108,6 +108,7 @@
 Lisp_Object Vskip_chars_range_table;
 
 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
+static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
 /* #### according to comment in 21.5, unnecessary */
 static void save_search_regs (void);
 static Bufpos simple_search (struct buffer *buf, Bufbyte *base_pat,
@@ -1173,10 +1174,11 @@
   if (len == 0)
     {
       set_search_regs (buf, bufpos, 0);
+      clear_unused_search_regs (&search_regs, 0);
       return bufpos;
     }
 
-  /* Searching 0 times means don't move.  */
+  /* Searching 0 times means noop---don't move, don't touch registers.  */
   if (n == 0)
     return bufpos;
 
@@ -1223,6 +1224,8 @@
 		    search_regs.start[i] += j;
 		    search_regs.end[i] += j;
 		  }
+	      /* re_match (called from re_search et al) does this for us */
+	      /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
 	      XSETBUFFER (last_thing_searched, buf);
 	      /* Set pos to the new position. */
 	      pos = search_regs.start[0];
@@ -1260,6 +1287,8 @@
 		    search_regs.start[i] += j;
 		    search_regs.end[i] += j;
 		  }
+	      /* re_match (called from re_search et al) does this for us */
+	      /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
 	      XSETBUFFER (last_thing_searched, buf);
 	      /* Set pos to the new position. */
 	      pos = search_regs.end[0];
@@ -1460,6 +1489,7 @@
 	  end = bytind_to_bufpos (buf, idx + buf_len);
 	}
       set_search_regs (buf, beg, end - beg);
+      clear_unused_search_regs (&search_regs, 0);
 
       return retval;
     }
@@ -1821,6 +1851,7 @@
 		    Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
 
 		    set_search_regs (buf, bufstart, bufend - bufstart);
+		    clear_unused_search_regs (&search_regs, 0);
 		  }
 
 		  if ((n -= direction) != 0)
@@ -1910,6 +1941,7 @@
 		    Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
 
 		    set_search_regs (buf, bufstart, bufend - bufstart);
+		    clear_unused_search_regs (&search_regs, 0);
 		  }
 
 		  if ((n -= direction) != 0)
@@ -1929,8 +1961,8 @@
   return bytind_to_bufpos (buf, pos);
 }
 
-/* Record beginning BEG and end BEG + LEN
-   for a match just found in the current buffer.  */
+/* Record the whole-match data (beginning BEG and end BEG + LEN) and the
+   buffer for a match just found.  */
 
 static void
 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
@@ -1948,6 +1980,24 @@
   search_regs.start[0] = beg;
   search_regs.end[0] = beg + len;
   XSETBUFFER (last_thing_searched, buf);
+}
+
+/* Clear unused search registers so match data will be null.
+   REGP is a pointer to the register structure to clear, usually the global
+   search_regs.
+   NO_SUB is the number of subexpressions to allow for.  (Does not count
+   the whole match, ie, for a string search NO_SUB == 0.)
+   It is an error if NO_SUB > REGP.num_regs - 1. */
+
+static void
+clear_unused_search_regs (struct re_registers *regp, int no_sub)
+{
+  /* This function has been Mule-ized. */
+  int i;
+
+  assert (no_sub >= 0 && no_sub < regp->num_regs);
+  for (i = no_sub + 1; i < regp->num_regs; i++)
+    regp->start[i] = regp->end[i] = -1;
 }
 
 
Index: tests/ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/ChangeLog,v
retrieving revision 1.2.2.15
diff -u -r1.2.2.15 ChangeLog
--- tests/ChangeLog	23 Aug 2002 16:44:41 -0000	1.2.2.15
+++ tests/ChangeLog	9 Sep 2002 10:58:22 -0000
@@ -0,0 +1,8 @@
+2002-09-09  Stephen J. Turnbull  <stephen(a)xemacs.org>
+
+	* automated/regexp-tests.el: Add test for stale subexpr match-data.
+	Thanks to Martin Stjernholm for the report.
+
+	* automated/syntax-tests.el: Conditionalize syntax-table property
+	tests on feature.  Enable feature if present.
+
Index: tests/automated/regexp-tests.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/automated/regexp-tests.el,v
retrieving revision 1.2
diff -u -r1.2 regexp-tests.el
--- tests/automated/regexp-tests.el	12 Apr 2001 18:24:55 -0000	1.2
+++ tests/automated/regexp-tests.el	9 Sep 2002 10:58:22 -0000
@@ -213,3 +213,18 @@
   (looking-at "Unmatchable text")
   (replace-match "")
   (Assert (looking-at "^buffer.$")))
+
+;; Test that trivial regexps reset unused registers
+;; Thanks to Martin Sternholm for the report.
+;; xemacs-beta <5blm6h2ki5.fsf(a)lister.roxen.com>
+(with-temp-buffer
+  (insert "ab")
+  (goto-char (point-min))
+  (re-search-forward "\\(a\\)")
+  ;; test the whole-match data, too -- one try scotched that, too!
+  (Assert (string= (match-string 0) "a"))
+  (Assert (string= (match-string 1) "a"))
+  (re-search-forward "b")
+  (Assert (string= (match-string 0) "b"))
+  (Assert (string= (match-string 1) nil)))
+
Index: tests/automated/syntax-tests.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/tests/automated/syntax-tests.el,v
retrieving revision 1.2
diff -u -r1.2 syntax-tests.el
--- tests/automated/syntax-tests.el	12 Apr 2001 18:24:55 -0000	1.2
+++ tests/automated/syntax-tests.el	9 Sep 2002 10:58:22 -0000
@@ -107,16 +107,20 @@
 ;; <apply-pos> can be in the form (start . end), or can be a
 ;; character position.
 (defun test-syntax-table (string apply-pos apply-syntax stop)
-  (goto-char (point-max))
-  (unless (consp apply-pos)
-	(setq apply-pos `(,apply-pos . ,(+ 1 apply-pos))))
-  (let ((point (point)))
-	(insert string)
-	(put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos))
-					   'syntax-table apply-syntax)
-	(goto-char point)
-	(forward-word 1)
-	(Assert (eq (point) (+ point stop)))))
+  ;; We don't necessarily have syntax-table properties ...
+  (when (fboundp 'lookup-syntax-properties) ; backwards compatible kludge
+    ;; ... and they may not be enabled by default if we do.
+    (setq lookup-syntax-properties t)
+    (goto-char (point-max))
+    (unless (consp apply-pos)
+      (setq apply-pos `(,apply-pos . ,(+ 1 apply-pos))))
+    (let ((point (point)))
+      (insert string)
+      (put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos))
+			 'syntax-table apply-syntax)
+      (goto-char point)
+      (forward-word 1)
+      (Assert (eq (point) (+ point stop))))))
 
 ;; test syntax-table extents
 (with-temp-buffer
@@ -126,6 +130,8 @@
   (test-syntax-table "W." 1 `(,(syntax-string-to-code "w")) 2))
 
 ;; Test forward-comment at buffer boundaries
+;; #### The second Assert fails (once interpreted, once compiled) on 21.4.9
+;; with sjt's version of Andy's syntax-text-property-killer patch.
 (with-temp-buffer
   (c-mode)
   (insert "// comment\n")
-- 
Institute of Policy and Planning Sciences     
http://turnbull.sk.tsukuba.ac.jp
University of Tsukuba                    Tennodai 1-1-1 Tsukuba 305-8573 JAPAN
 My nostalgia for Icon makes me forget about any of the bad things.  I don't
have much nostalgia for Perl, so its faults I remember.  Scott Gilbert c.l.py