APPROVE COMMIT
NOTE: This patch has been committed.
This includes the patch I mentioned in
http://mid.gmane.org/18307.63955.896221.621765@parhasard.net , and
supersedes it.  Tested on the packages, to the extent that that was possible
with complaints from EDE saying "Given parent class eieio-persistent is not
a class", after significant hacking to to get it to acknowledge the eieio
autoloads at all.  All the differences from GNU 1.34 are documented and
motivated, with the exception of those self-evidently XEmacs-specific and
the generated regexp in the example.
2008-01-11  Aidan Kehoe  <kehoea(a)parhasard.net>
	* regexp-opt.el: Merge revision 1.34 from GNU, of
	2007-01-21. Thank you GNU.
XEmacs Packages source patch:
Diff command:   cvs -q diff -Nu
Files affected: xemacs-packages/xemacs-base/regexp-opt.el
===================================================================
RCS
Index: xemacs-packages/xemacs-base/regexp-opt.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/xemacs-base/regexp-opt.el,v
retrieving revision 1.3
diff -u -u -r1.3 regexp-opt.el
--- xemacs-packages/xemacs-base/regexp-opt.el	2003/10/29 16:31:46	1.3
+++ xemacs-packages/xemacs-base/regexp-opt.el	2008/01/11 23:10:12
@@ -1,6 +1,7 @@
 ;;; regexp-opt.el --- generate efficient regexps to match strings
 
-;; Copyright (C) 1994,95,96,97,98,99,2000 Free Software Foundation, Inc.
+;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+;;   2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
 
 ;; Author: Simon Marshall <simon(a)gnu.org>
 ;; Maintainer: FSF
@@ -20,18 +21,14 @@
 
 ;; You should have received a copy of the GNU General Public License
 ;; along with XEmacs; see the file COPYING.  If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
 
-;;; Synched up with: GNU Emacs 21.3 + paren-in-char-set fix from CVS
-;;;                  revision 1.25.  Some implementation differences in
-;;;                  regexp-opt-group and regexp-opt-charset but the APIs
-;;;                  are compatible and should return compatible (if not
-;;;                  exactly the same) regexps.
+;;; Synched up with: Revision 1.34 in GNU Emacs CVS, of 2007-01-21.
 
 ;;; Commentary:
 
-;; The "opt" in "regexp-opt" stands for
"optim\\(?:al\\|i\\(?:se\\|ze\\)\\)".
+;; The "opt" in "regexp-opt" stands for
"optim\\(al\\|i[sz]e\\)".
 ;;
 ;; This package generates a regexp from a given list of strings (which matches
 ;; one of those strings) so that the regexp generated by:
@@ -50,7 +47,7 @@
 ;; 		    "save-current-buffer" "save-match-data"
 ;; 		    "catch" "throw" "unwind-protect"
"condition-case")))
 ;;   (concat "(" (regexp-opt strings t) "\\>"))
-;;  =>
"(\\(c\\(?:atch\\|ond\\(?:ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(?:current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(?:less\\|wind-protect\\)\\|wh\\(?:en\\|ile\\)\\)\\>"
+;;  =>
"(\\(c\\(?:atch\\|ond\\(?:ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(?:current-buffer\\|excursion\\|match-data\\|\\(?:restrict\\|window-excurs\\)ion\\)\\|throw\\|un\\(?:less\\|wind-protect\\)\\|wh\\(?:en\\|ile\\)\\)\\>"
 ;;
 ;; Searching using the above example `regexp-opt' regexp takes approximately
 ;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
@@ -93,7 +90,7 @@
 
 ;;;###autoload
 (defun regexp-opt (strings &optional paren)
-  "Return a regexp to match a string in STRINGS.
+  "Return a regexp to match a string in the list STRINGS.
 Each string should be unique in STRINGS and should not contain any regexps,
 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
 is enclosed by at least one regexp grouping construct.
@@ -107,47 +104,78 @@
   (save-match-data
     ;; Recurse on the sorted list.
     (let* ((max-lisp-eval-depth (* 1024 1024))
+	   (max-specpdl-size (* 1024 1024))
 	   (completion-ignore-case nil)
+	   (completion-regexp-list nil)
 	   (words (eq paren 'words))
 	   (open (cond ((stringp paren) paren) (paren "\\(")))
-	   (sorted-strings (sort (copy-sequence strings) 'string-lessp))
+           ;; XEmacs; 21.4 doesn't have #'delete-dups, but
+           ;; #'delete-duplicates is dumped.
+	   (sorted-strings (sort (delete-duplicates
+                                  (copy-sequence strings) :test #'string=)
+                                 #'string-lessp))
 	   (re (regexp-opt-group sorted-strings open)))
       (if words (concat "\\<" re "\\>") re))))
 
-(defconst regexp-opt-not-groupie*-re
-  (let* ((harmless-ch "[^\\\\[]")
-         (esc-pair-not-lp "\\\\[^(]")
-         (class-harmless-ch "[^][]")
-         (class-lb-harmless "[^]:]")
-         (class-lb-colon-maybe-charclass ":\\([a-z]+:]\\)?")
-         (class-lb (concat "\\[\\(" class-lb-harmless
-                           "\\|" class-lb-colon-maybe-charclass
"\\)"))
-         (class
-          (concat "\\[^?]?"
-                  "\\(" class-harmless-ch
-                  "\\|" class-lb "\\)*"
-                  "\\[?]"))         ; special handling for bare [ at end of re
-         (shy-lp "\\\\(\\?:"))
-    (concat "\\(" harmless-ch "\\|" esc-pair-not-lp
-            "\\|" class "\\|" shy-lp "\\)*"))
-  "Matches any part of a regular expression EXCEPT for non-shy
\"\\\\(\"s")
+;; XEmacs; added here. This is in subr.el in GNU. 
+(defun-when-void subregexp-context-p (regexp pos &optional start)
+  "Return non-nil if POS is in a normal subregexp context in REGEXP. 
+A subregexp context is one where a sub-regexp can appear. 
+A non-subregexp context is for example within brackets, or within a 
+repetition bounds operator `\\=\\{...\\}', or right after a `\\'. 
+If START is non-nil, it should be a position in REGEXP, smaller 
+than POS, and known to be in a subregexp context." 
+  ;; Here's one possible implementation, with the great benefit that it 
+  ;; reuses the regexp-matcher's own parser, so it understands all the 
+  ;; details of the syntax.  A disadvantage is that it needs to match the 
+  ;; error string. 
+  (condition-case err 
+      (progn 
+        (string-match (substring regexp (or start 0) pos) "") 
+        t) 
+    (invalid-regexp 
+     (not (member (cadr err) '("Unmatched [ or [^" 
+                               "Unmatched \\{" 
+                               "Trailing backslash"))))) 
+  ;; An alternative implementation: 
+  ;; (defconst re-context-re 
+  ;;   (let* ((harmless-ch "[^\\[]") 
+  ;;          (harmless-esc "\\\\[^{]") 
+  ;;          (class-harmless-ch "[^][]") 
+  ;;          (class-lb-harmless "[^]:]") 
+  ;;          (class-lb-colon-maybe-charclass ":\\([a-z]+:]\\)?") 
+  ;;          (class-lb (concat "\\[\\(" class-lb-harmless 
+  ;;                            "\\|" class-lb-colon-maybe-charclass
"\\)")) 
+  ;;          (class 
+  ;;           (concat "\\[^?]?" 
+  ;;                   "\\(" class-harmless-ch 
+  ;;                   "\\|" class-lb "\\)*" 
+  ;;                   "\\[?]"))     ; special handling for bare [ at end of
re
+  ;;          (braces "\\\\{[0-9,]+\\\\}")) 
+  ;;     (concat "\\`\\(" harmless-ch "\\|" harmless-esc 
+  ;;             "\\|" class "\\|" braces "\\)*\\'")) 
+  ;;   "Matches any prefix that corresponds to a normal subregexp context.") 
+  ;; (string-match re-context-re (substring regexp (or start 0) pos)) 
+  ) 
 
 ;;;###autoload
 (defun regexp-opt-depth (regexp)
   "Return the depth of REGEXP.
-This means the number of regexp grouping constructs (parenthesised expressions)
-in REGEXP."
+This means the number of non-shy regexp grouping constructs
+\(parenthesized expressions) in REGEXP."
   (save-match-data
     ;; Hack to signal an error if REGEXP does not have balanced parentheses.
     (string-match regexp "")
     ;; Count the number of open parentheses in REGEXP.
-    (let ((count 0) start)
-      (while
-          (progn
-            (string-match regexp-opt-not-groupie*-re regexp start)
-            (setq start ( + (match-end 0) 2))  ; +2 for "\\(" after match-end.
-            (<= start (length regexp)))
-        (setq count (1+ count)))
+    (let ((count 0) start last)
+      (while (string-match "\\\\(\\(\\?:\\)?" regexp start)
+	(setq start (match-end 0))	      ; Start of next search.
+	(when (and (not (match-beginning 1))
+		   (subregexp-context-p regexp (match-beginning 0) last))
+	  ;; It's not a shy group and it's not inside brackets or after
+	  ;; a backslash: it's really a group-open marker.
+	  (setq last start)	    ; Speed up next regexp-opt-re-context-p.
+	  (setq count (1+ count))))
       count)))
 
 ;;; Workhorse functions.
@@ -156,6 +184,7 @@
   (require 'cl))
 
 (defun regexp-opt-group (strings &optional paren lax)
+  ;; XEmacs; docstring, not just a comment. 
   "Return a regexp to match a string in STRINGS.
 If PAREN non-nil, output regexp parentheses around returned regexp.
 If LAX non-nil, don't output parentheses if it doesn't require them.
@@ -190,45 +219,73 @@
 	      (regexp-opt-group (cdr strings) t t) "?"
 	      close-charset))
      ;;
-     ;; If all are one-character strings, just return a character set.
-     ((= (length strings) (apply '+ (mapcar 'length strings)))
-      (concat open-charset
-	      (regexp-opt-charset strings)
-	      close-charset))
+     ;; If there are several one-char strings, use charsets
+     ((and (= (length (car strings)) 1)
+	   (let ((strs (cdr strings)))
+	     (while (and strs (/= (length (car strs)) 1))
+	       (pop strs))
+	     strs))
+      (let (letters rest)
+	;; Collect one-char strings
+	(dolist (s strings)
+	  (if (= (length s) 1) (push (string-to-char s) letters) (push s rest)))
+
+	(if rest
+	    ;; several one-char strings: take them and recurse
+	    ;; on the rest (first so as to match the longest).
+	    (concat open-group
+		    (regexp-opt-group (nreverse rest))
+		    "\\|" (regexp-opt-charset letters)
+		    close-group)
+	  ;; all are one-char strings: just return a character set.
+	  (concat open-charset
+		  (regexp-opt-charset letters)
+		  close-charset))))
      ;;
      ;; We have a list of different length strings.
      (t
-      (let ((prefix (try-completion "" (mapcar 'list strings)))
-	    (letters (let ((completion-regexp-list '("^.$")))
-		       (all-completions "" (mapcar 'list strings)))))
-	(cond
-	 ;;
-	 ;; If there is a common prefix, remove it and recurse on the suffixes.
-	 ((> (length prefix) 0)
-	  (let* ((length (length prefix))
-		 (suffixes (mapcar (lambda (s) (substring s length)) strings)))
-	    (concat open-group
-		    (regexp-quote prefix) (regexp-opt-group suffixes t t)
-		    close-group)))
-	 ;;
-	 ;; If there are several one-character strings, remove them and recurse
-	 ;; on the rest (first so the final regexp finds the longest match).
-	 ((> (length letters) 1)
-	  (let ((rest (let ((completion-regexp-list '("^..+$")))
-			(all-completions "" (mapcar 'list strings)))))
-	    (concat open-group
-		    (regexp-opt-group rest) "\\|" (regexp-opt-charset letters)
-		    close-group)))
-	 ;;
-	 ;; Otherwise, divide the list into those that start with a particular
-	 ;; letter and those that do not, and recurse on them.
-	 (t
-	  (let* ((char (substring (car strings) 0 1))
-		 (half1 (all-completions char (mapcar 'list strings)))
-		 (half2 (nthcdr (length half1) strings)))
-	    (concat open-group
-		    (regexp-opt-group half1) "\\|" (regexp-opt-group half2)
-		    close-group)))))))))
+      ;; XEmacs; our #'try-completion requires an alist. 
+      (let ((prefix (try-completion "" (mapcar 'list strings))))
+	(if (> (length prefix) 0)
+	    ;; common prefix: take it and recurse on the suffixes.
+	    (let* ((n (length prefix))
+		   (suffixes (mapcar (lambda (s) (substring s n)) strings)))
+	      (concat open-group
+		      (regexp-quote prefix)
+		      (regexp-opt-group suffixes t t)
+		      close-group))
+
+	  (let* ((sgnirts (mapcar (lambda (s)
+                                    ;; XEmacs; our #'try-completion requires
+                                    ;; an alist.
+                                    (list
+                                     (concat (nreverse (string-to-list s)))))
+				  strings))
+		 (xiffus (try-completion "" sgnirts)))
+	    (if (> (length xiffus) 0)
+		;; common suffix: take it and recurse on the prefixes.
+		(let* ((n (- (length xiffus)))
+		       (prefixes
+			;; Sorting is necessary in cases such as ("ad" "d").
+			(sort (mapcar (lambda (s) (substring s 0 n)) strings)
+			      'string-lessp)))
+		  (concat open-group
+			  (regexp-opt-group prefixes t t)
+			  (regexp-quote
+			   (concat (nreverse (string-to-list xiffus))))
+			  close-group))
+
+	      ;; Otherwise, divide the list into those that start with a
+	      ;; particular letter and those that do not, and recurse on them.
+	      (let* ((char (char-to-string (string-to-char (car strings))))
+                     ;; XEmacs; #'all-completions requires an alist.
+		     (half1 (all-completions char (mapcar 'list strings)))
+		     (half2 (nthcdr (length half1) strings)))
+		(concat open-group
+			(regexp-opt-group half1)
+			"\\|" (regexp-opt-group half2)
+			close-group))))))))))
+
 
 (defun regexp-opt-charset (chars)
   ;;
@@ -237,14 +294,13 @@
   ;; The basic idea is to find character ranges.  Also we take care in the
   ;; position of character set meta characters in the character set regexp.
   ;;
-  (let* ((charwidth 256)				; Yeah, right.
-	 ;; XEmacs: use bit-vectors instead of bool-vectors
-	 (charmap (make-bit-vector charwidth 0))
+  (let* ((charmap (make-char-table 'generic)) ;; XEmacs; case-tables not suited.
+	 (start -1) (end -2)
 	 (charset "")
 	 (bracket "") (dash "") (caret ""))
     ;;
     ;; Make a character map but extract character set meta characters.
-    (dolist (char (mapcar 'string-to-char chars))
+    (dolist (char chars)
       (case char
 	(?\]
 	 (setq bracket "]"))
@@ -253,20 +309,27 @@
 	(?-
 	 (setq dash "-"))
 	(otherwise
-	 ;; XEmacs: 1
-	 (aset charmap char 1))))
+         (put-char-table char t charmap)))) ;; XEmacs; not a sequence, no aset
     ;;
     ;; Make a character set from the map using ranges where applicable.
-    (dotimes (char charwidth)
-      (let ((start char))
-	(while (and (< char charwidth)
-		    ;; XEmacs: (not (zerop ...))
-		    (not (zerop (aref charmap char))))
-	  (incf char))
-	(cond ((> char (+ start 3))
-	       (setq charset (format "%s%c-%c" charset start (1- char))))
-	      ((> char start)
-	       (setq charset (format "%s%c" charset (setq char start)))))))
+    (map-char-table
+     (lambda (c v)
+       (when v
+	 (if (= (1- c) end) (setq end c)
+	   (if (> end (+ start 2))
+	       (setq charset (format "%s%c-%c" charset start end))
+	     (while (>= end start)
+	       (setq charset (format "%s%c" charset start))
+	       (incf start)))
+	   (setq start c end c)))
+      nil) ;; XEmacs; don't end the loop with the first char 
+     charmap)
+    (when (>= end start)
+      (if (> end (+ start 2))
+	  (setq charset (format "%s%c-%c" charset start end))
+	(while (>= end start)
+	  (setq charset (format "%s%c" charset start))
+	  (incf start))))
     ;;
     ;; Make sure a caret is not first and a dash is first or last.
     (if (and (string-equal charset "") (string-equal bracket ""))
@@ -275,4 +338,5 @@
 
 (provide 'regexp-opt)
 
+;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370
 ;;; regexp-opt.el ends here
-- 
¿Dónde estará ahora mi sobrino Yoghurtu Nghé, que tuvo que huir
precipitadamente de la aldea por culpa de la escasez de rinocerontes?
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches