APPROVE COMMIT
NOTE: This patch has been committed.
This includes the patch I mentioned in
http://mid.gmane.org/18307.63955.896221.621765@parhasard.net , and
supersedes it. Tested on the packages, to the extent that that was possible
with complaints from EDE saying "Given parent class eieio-persistent is not
a class", after significant hacking to to get it to acknowledge the eieio
autoloads at all. All the differences from GNU 1.34 are documented and
motivated, with the exception of those self-evidently XEmacs-specific and
the generated regexp in the example.
2008-01-11 Aidan Kehoe <kehoea(a)parhasard.net>
* regexp-opt.el: Merge revision 1.34 from GNU, of
2007-01-21. Thank you GNU.
XEmacs Packages source patch:
Diff command: cvs -q diff -Nu
Files affected: xemacs-packages/xemacs-base/regexp-opt.el
===================================================================
RCS
Index: xemacs-packages/xemacs-base/regexp-opt.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/packages/xemacs-packages/xemacs-base/regexp-opt.el,v
retrieving revision 1.3
diff -u -u -r1.3 regexp-opt.el
--- xemacs-packages/xemacs-base/regexp-opt.el 2003/10/29 16:31:46 1.3
+++ xemacs-packages/xemacs-base/regexp-opt.el 2008/01/11 23:10:12
@@ -1,6 +1,7 @@
;;; regexp-opt.el --- generate efficient regexps to match strings
-;; Copyright (C) 1994,95,96,97,98,99,2000 Free Software Foundation, Inc.
+;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+;; 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
;; Author: Simon Marshall <simon(a)gnu.org>
;; Maintainer: FSF
@@ -20,18 +21,14 @@
;; You should have received a copy of the GNU General Public License
;; along with XEmacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
-;;; Synched up with: GNU Emacs 21.3 + paren-in-char-set fix from CVS
-;;; revision 1.25. Some implementation differences in
-;;; regexp-opt-group and regexp-opt-charset but the APIs
-;;; are compatible and should return compatible (if not
-;;; exactly the same) regexps.
+;;; Synched up with: Revision 1.34 in GNU Emacs CVS, of 2007-01-21.
;;; Commentary:
-;; The "opt" in "regexp-opt" stands for
"optim\\(?:al\\|i\\(?:se\\|ze\\)\\)".
+;; The "opt" in "regexp-opt" stands for
"optim\\(al\\|i[sz]e\\)".
;;
;; This package generates a regexp from a given list of strings (which matches
;; one of those strings) so that the regexp generated by:
@@ -50,7 +47,7 @@
;; "save-current-buffer" "save-match-data"
;; "catch" "throw" "unwind-protect"
"condition-case")))
;; (concat "(" (regexp-opt strings t) "\\>"))
-;; =>
"(\\(c\\(?:atch\\|ond\\(?:ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(?:current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(?:less\\|wind-protect\\)\\|wh\\(?:en\\|ile\\)\\)\\>"
+;; =>
"(\\(c\\(?:atch\\|ond\\(?:ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(?:current-buffer\\|excursion\\|match-data\\|\\(?:restrict\\|window-excurs\\)ion\\)\\|throw\\|un\\(?:less\\|wind-protect\\)\\|wh\\(?:en\\|ile\\)\\)\\>"
;;
;; Searching using the above example `regexp-opt' regexp takes approximately
;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
@@ -93,7 +90,7 @@
;;;###autoload
(defun regexp-opt (strings &optional paren)
- "Return a regexp to match a string in STRINGS.
+ "Return a regexp to match a string in the list STRINGS.
Each string should be unique in STRINGS and should not contain any regexps,
quoted or not. If optional PAREN is non-nil, ensure that the returned regexp
is enclosed by at least one regexp grouping construct.
@@ -107,47 +104,78 @@
(save-match-data
;; Recurse on the sorted list.
(let* ((max-lisp-eval-depth (* 1024 1024))
+ (max-specpdl-size (* 1024 1024))
(completion-ignore-case nil)
+ (completion-regexp-list nil)
(words (eq paren 'words))
(open (cond ((stringp paren) paren) (paren "\\(")))
- (sorted-strings (sort (copy-sequence strings) 'string-lessp))
+ ;; XEmacs; 21.4 doesn't have #'delete-dups, but
+ ;; #'delete-duplicates is dumped.
+ (sorted-strings (sort (delete-duplicates
+ (copy-sequence strings) :test #'string=)
+ #'string-lessp))
(re (regexp-opt-group sorted-strings open)))
(if words (concat "\\<" re "\\>") re))))
-(defconst regexp-opt-not-groupie*-re
- (let* ((harmless-ch "[^\\\\[]")
- (esc-pair-not-lp "\\\\[^(]")
- (class-harmless-ch "[^][]")
- (class-lb-harmless "[^]:]")
- (class-lb-colon-maybe-charclass ":\\([a-z]+:]\\)?")
- (class-lb (concat "\\[\\(" class-lb-harmless
- "\\|" class-lb-colon-maybe-charclass
"\\)"))
- (class
- (concat "\\[^?]?"
- "\\(" class-harmless-ch
- "\\|" class-lb "\\)*"
- "\\[?]")) ; special handling for bare [ at end of re
- (shy-lp "\\\\(\\?:"))
- (concat "\\(" harmless-ch "\\|" esc-pair-not-lp
- "\\|" class "\\|" shy-lp "\\)*"))
- "Matches any part of a regular expression EXCEPT for non-shy
\"\\\\(\"s")
+;; XEmacs; added here. This is in subr.el in GNU.
+(defun-when-void subregexp-context-p (regexp pos &optional start)
+ "Return non-nil if POS is in a normal subregexp context in REGEXP.
+A subregexp context is one where a sub-regexp can appear.
+A non-subregexp context is for example within brackets, or within a
+repetition bounds operator `\\=\\{...\\}', or right after a `\\'.
+If START is non-nil, it should be a position in REGEXP, smaller
+than POS, and known to be in a subregexp context."
+ ;; Here's one possible implementation, with the great benefit that it
+ ;; reuses the regexp-matcher's own parser, so it understands all the
+ ;; details of the syntax. A disadvantage is that it needs to match the
+ ;; error string.
+ (condition-case err
+ (progn
+ (string-match (substring regexp (or start 0) pos) "")
+ t)
+ (invalid-regexp
+ (not (member (cadr err) '("Unmatched [ or [^"
+ "Unmatched \\{"
+ "Trailing backslash")))))
+ ;; An alternative implementation:
+ ;; (defconst re-context-re
+ ;; (let* ((harmless-ch "[^\\[]")
+ ;; (harmless-esc "\\\\[^{]")
+ ;; (class-harmless-ch "[^][]")
+ ;; (class-lb-harmless "[^]:]")
+ ;; (class-lb-colon-maybe-charclass ":\\([a-z]+:]\\)?")
+ ;; (class-lb (concat "\\[\\(" class-lb-harmless
+ ;; "\\|" class-lb-colon-maybe-charclass
"\\)"))
+ ;; (class
+ ;; (concat "\\[^?]?"
+ ;; "\\(" class-harmless-ch
+ ;; "\\|" class-lb "\\)*"
+ ;; "\\[?]")) ; special handling for bare [ at end of
re
+ ;; (braces "\\\\{[0-9,]+\\\\}"))
+ ;; (concat "\\`\\(" harmless-ch "\\|" harmless-esc
+ ;; "\\|" class "\\|" braces "\\)*\\'"))
+ ;; "Matches any prefix that corresponds to a normal subregexp context.")
+ ;; (string-match re-context-re (substring regexp (or start 0) pos))
+ )
;;;###autoload
(defun regexp-opt-depth (regexp)
"Return the depth of REGEXP.
-This means the number of regexp grouping constructs (parenthesised expressions)
-in REGEXP."
+This means the number of non-shy regexp grouping constructs
+\(parenthesized expressions) in REGEXP."
(save-match-data
;; Hack to signal an error if REGEXP does not have balanced parentheses.
(string-match regexp "")
;; Count the number of open parentheses in REGEXP.
- (let ((count 0) start)
- (while
- (progn
- (string-match regexp-opt-not-groupie*-re regexp start)
- (setq start ( + (match-end 0) 2)) ; +2 for "\\(" after match-end.
- (<= start (length regexp)))
- (setq count (1+ count)))
+ (let ((count 0) start last)
+ (while (string-match "\\\\(\\(\\?:\\)?" regexp start)
+ (setq start (match-end 0)) ; Start of next search.
+ (when (and (not (match-beginning 1))
+ (subregexp-context-p regexp (match-beginning 0) last))
+ ;; It's not a shy group and it's not inside brackets or after
+ ;; a backslash: it's really a group-open marker.
+ (setq last start) ; Speed up next regexp-opt-re-context-p.
+ (setq count (1+ count))))
count)))
;;; Workhorse functions.
@@ -156,6 +184,7 @@
(require 'cl))
(defun regexp-opt-group (strings &optional paren lax)
+ ;; XEmacs; docstring, not just a comment.
"Return a regexp to match a string in STRINGS.
If PAREN non-nil, output regexp parentheses around returned regexp.
If LAX non-nil, don't output parentheses if it doesn't require them.
@@ -190,45 +219,73 @@
(regexp-opt-group (cdr strings) t t) "?"
close-charset))
;;
- ;; If all are one-character strings, just return a character set.
- ((= (length strings) (apply '+ (mapcar 'length strings)))
- (concat open-charset
- (regexp-opt-charset strings)
- close-charset))
+ ;; If there are several one-char strings, use charsets
+ ((and (= (length (car strings)) 1)
+ (let ((strs (cdr strings)))
+ (while (and strs (/= (length (car strs)) 1))
+ (pop strs))
+ strs))
+ (let (letters rest)
+ ;; Collect one-char strings
+ (dolist (s strings)
+ (if (= (length s) 1) (push (string-to-char s) letters) (push s rest)))
+
+ (if rest
+ ;; several one-char strings: take them and recurse
+ ;; on the rest (first so as to match the longest).
+ (concat open-group
+ (regexp-opt-group (nreverse rest))
+ "\\|" (regexp-opt-charset letters)
+ close-group)
+ ;; all are one-char strings: just return a character set.
+ (concat open-charset
+ (regexp-opt-charset letters)
+ close-charset))))
;;
;; We have a list of different length strings.
(t
- (let ((prefix (try-completion "" (mapcar 'list strings)))
- (letters (let ((completion-regexp-list '("^.$")))
- (all-completions "" (mapcar 'list strings)))))
- (cond
- ;;
- ;; If there is a common prefix, remove it and recurse on the suffixes.
- ((> (length prefix) 0)
- (let* ((length (length prefix))
- (suffixes (mapcar (lambda (s) (substring s length)) strings)))
- (concat open-group
- (regexp-quote prefix) (regexp-opt-group suffixes t t)
- close-group)))
- ;;
- ;; If there are several one-character strings, remove them and recurse
- ;; on the rest (first so the final regexp finds the longest match).
- ((> (length letters) 1)
- (let ((rest (let ((completion-regexp-list '("^..+$")))
- (all-completions "" (mapcar 'list strings)))))
- (concat open-group
- (regexp-opt-group rest) "\\|" (regexp-opt-charset letters)
- close-group)))
- ;;
- ;; Otherwise, divide the list into those that start with a particular
- ;; letter and those that do not, and recurse on them.
- (t
- (let* ((char (substring (car strings) 0 1))
- (half1 (all-completions char (mapcar 'list strings)))
- (half2 (nthcdr (length half1) strings)))
- (concat open-group
- (regexp-opt-group half1) "\\|" (regexp-opt-group half2)
- close-group)))))))))
+ ;; XEmacs; our #'try-completion requires an alist.
+ (let ((prefix (try-completion "" (mapcar 'list strings))))
+ (if (> (length prefix) 0)
+ ;; common prefix: take it and recurse on the suffixes.
+ (let* ((n (length prefix))
+ (suffixes (mapcar (lambda (s) (substring s n)) strings)))
+ (concat open-group
+ (regexp-quote prefix)
+ (regexp-opt-group suffixes t t)
+ close-group))
+
+ (let* ((sgnirts (mapcar (lambda (s)
+ ;; XEmacs; our #'try-completion requires
+ ;; an alist.
+ (list
+ (concat (nreverse (string-to-list s)))))
+ strings))
+ (xiffus (try-completion "" sgnirts)))
+ (if (> (length xiffus) 0)
+ ;; common suffix: take it and recurse on the prefixes.
+ (let* ((n (- (length xiffus)))
+ (prefixes
+ ;; Sorting is necessary in cases such as ("ad" "d").
+ (sort (mapcar (lambda (s) (substring s 0 n)) strings)
+ 'string-lessp)))
+ (concat open-group
+ (regexp-opt-group prefixes t t)
+ (regexp-quote
+ (concat (nreverse (string-to-list xiffus))))
+ close-group))
+
+ ;; Otherwise, divide the list into those that start with a
+ ;; particular letter and those that do not, and recurse on them.
+ (let* ((char (char-to-string (string-to-char (car strings))))
+ ;; XEmacs; #'all-completions requires an alist.
+ (half1 (all-completions char (mapcar 'list strings)))
+ (half2 (nthcdr (length half1) strings)))
+ (concat open-group
+ (regexp-opt-group half1)
+ "\\|" (regexp-opt-group half2)
+ close-group))))))))))
+
(defun regexp-opt-charset (chars)
;;
@@ -237,14 +294,13 @@
;; The basic idea is to find character ranges. Also we take care in the
;; position of character set meta characters in the character set regexp.
;;
- (let* ((charwidth 256) ; Yeah, right.
- ;; XEmacs: use bit-vectors instead of bool-vectors
- (charmap (make-bit-vector charwidth 0))
+ (let* ((charmap (make-char-table 'generic)) ;; XEmacs; case-tables not suited.
+ (start -1) (end -2)
(charset "")
(bracket "") (dash "") (caret ""))
;;
;; Make a character map but extract character set meta characters.
- (dolist (char (mapcar 'string-to-char chars))
+ (dolist (char chars)
(case char
(?\]
(setq bracket "]"))
@@ -253,20 +309,27 @@
(?-
(setq dash "-"))
(otherwise
- ;; XEmacs: 1
- (aset charmap char 1))))
+ (put-char-table char t charmap)))) ;; XEmacs; not a sequence, no aset
;;
;; Make a character set from the map using ranges where applicable.
- (dotimes (char charwidth)
- (let ((start char))
- (while (and (< char charwidth)
- ;; XEmacs: (not (zerop ...))
- (not (zerop (aref charmap char))))
- (incf char))
- (cond ((> char (+ start 3))
- (setq charset (format "%s%c-%c" charset start (1- char))))
- ((> char start)
- (setq charset (format "%s%c" charset (setq char start)))))))
+ (map-char-table
+ (lambda (c v)
+ (when v
+ (if (= (1- c) end) (setq end c)
+ (if (> end (+ start 2))
+ (setq charset (format "%s%c-%c" charset start end))
+ (while (>= end start)
+ (setq charset (format "%s%c" charset start))
+ (incf start)))
+ (setq start c end c)))
+ nil) ;; XEmacs; don't end the loop with the first char
+ charmap)
+ (when (>= end start)
+ (if (> end (+ start 2))
+ (setq charset (format "%s%c-%c" charset start end))
+ (while (>= end start)
+ (setq charset (format "%s%c" charset start))
+ (incf start))))
;;
;; Make sure a caret is not first and a dash is first or last.
(if (and (string-equal charset "") (string-equal bracket ""))
@@ -275,4 +338,5 @@
(provide 'regexp-opt)
+;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370
;;; regexp-opt.el ends here
--
¿Dónde estará ahora mi sobrino Yoghurtu Nghé, que tuvo que huir
precipitadamente de la aldea por culpa de la escasez de rinocerontes?
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches