commit/xemacs-base: sperber: Add rx.el.

Thursday, 21 February 2013

        1 new commit in xemacs-base:

https://bitbucket.org/xemacs/xemacs-base/commits/8f81f441c070/
changeset:   8f81f441c070
user:        sperber
date:        2013-02-19 17:44:15
summary:     Add rx.el.

2013-02-19  Michael Sperber  <mike(a)xemacs.org&gt;

	* rx.el: Added port from GNU Emacs 24.2.
affected #:  3 files

diff -r 190b20ac84fc380fba1dabcf04b0181f3ac13353 -r
8f81f441c070adfd90fedb59c13bb6f69dab39a4 ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
＠＠ -1,3 +1,7 ＠＠
+2013-02-19  Michael Sperber  <mike(a)xemacs.org&gt;
+
+	* rx.el: Added port from GNU Emacs 24.2.
+
 2012-05-11  Norbert Koch  <viteno(a)xemacs.org&gt;

 	* Makefile (VERSION): XEmacs package 2.32 released.

diff -r 190b20ac84fc380fba1dabcf04b0181f3ac13353 -r
8f81f441c070adfd90fedb59c13bb6f69dab39a4 Makefile
--- a/Makefile
+++ b/Makefile
＠＠ -38,7 +38,7 ＠＠
 	helper.elc imenu.elc iso-syntax.elc macros.elc novice.elc outline.elc \
 	passwd.elc pp.elc regexp-opt.elc regi.elc ring.elc shell.elc \
 	skeleton.elc sort.elc thing.elc time-stamp.elc timer-funcs.elc \
-	timezone.elc tq.elc xbm-button.elc xpm-button.elc
+	timezone.elc tq.elc xbm-button.elc xpm-button.elc rx.elc

 DATA_FILES = etc/enriched.doc
 DATA_DEST = .

diff -r 190b20ac84fc380fba1dabcf04b0181f3ac13353 -r
8f81f441c070adfd90fedb59c13bb6f69dab39a4 rx.el
--- /dev/null
+++ b/rx.el
＠＠ -0,0 +1,1182 ＠＠
+;;; rx.el --- sexp notation for regular expressions
+
+;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
+
+;; Author: Gerd Moellmann <gerd(a)gnu.org&gt;
+;; Maintainer: FSF
+;; Keywords: strings, regexps, extensions
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; XEmacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs.  If not, see <http://www.gnu.org/licenses/>;.
+
+;;; Synched up with: Emacs 24.2
+
+;;; Commentary:
+
+;; This is another implementation of sexp-form regular expressions.
+;; It was unfortunately written without being aware of the Sregex
+;; package coming with Emacs, but as things stand, Rx completely
+;; covers all regexp features, which Sregex doesn't, doesn't suffer
+;; from the bugs mentioned in the commentary section of Sregex, and
+;; uses a nicer syntax (IMHO, of course :-).
+
+;; This significantly extended version of the original, is almost
+;; compatible with Sregex.  The only incompatibility I (fx) know of is
+;; that the `repeat' form can't have multiple regexp args.
+
+;; Now alternative forms are provided for a degree of compatibility
+;; with Shivers' attempted definitive SRE notation
+;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>;.  SRE forms not
+;; catered for include: dsm, uncase, w/case, w/nocase, ,＠<exp>,
+;; ,<exp>, (word ...), word+, posix-string, and character class forms.
+;; Some forms are inconsistent with SRE, either for historical reasons
+;; or because of the implementation -- simple translation into Emacs
+;; regexp strings.  These include: any, word.  Also, case-sensitivity
+;; and greediness are controlled by variables external to the regexp,
+;; and you need to feed the forms to the `posix-' functions to get
+;; SRE's POSIX semantics.  There are probably more difficulties.
+
+;; Rx translates a sexp notation for regular expressions into the
+;; usual string notation.  The translation can be done at compile-time
+;; by using the `rx' macro.  It can be done at run-time by calling
+;; function `rx-to-string'.  See the documentation of `rx' for a
+;; complete description of the sexp notation.
+;;
+;; Some examples of string regexps and their sexp counterparts:
+;;
+;; "^[a-z]*"
+;; (rx (and line-start (0+ (in "a-z"))))
+;;
+;; "\n[^ \t]"
+;; (rx (and "\n" (not blank))), or
+;; (rx (and "\n" (not (any " \t"))))
+;;
+;; "\\*\\*\\* EOOH \\*\\*\\*\n"
+;; (rx "*** EOOH ***\n")
+;;
+;; "\\<\\(catch\\|finally\\)\\>[^_]"
+;; (rx (and word-start (submatch (or "catch" "finally")) word-end
+;;          (not (any ?_))))
+;;
+;; "[ \t\n]*:\\([^:]+\\|$\\)"
+;; (rx (and (zero-or-more (in " \t\n")) ":"
+;;          (submatch (or line-end (one-or-more (not (any ?:)))))))
+;;
+;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
+;; (rx (and line-start
+;;          "content-transfer-encoding:"
+;;          (+ (? ?\n)) blank
+;;	    "quoted-printable"
+;;	    (+ (? ?\n)) blank))
+;;
+;; (concat "^\\(?:" something-else "\\)")
+;; (rx (and line-start (eval something-else))), statically or
+;; (rx-to-string '(and line-start ,something-else)), dynamically.
+;;
+;; (regexp-opt '(STRING1 STRING2 ...))
+;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically
+;; calls `regexp-opt' as needed.
+;;
+;; "^;;\\s-*\n\\|^\n"
+;; (rx (or (and line-start ";;" (0+ space) ?\n)
+;;         (and line-start ?\n)))
+;;
+;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
+;; (rx (and "$Id: "
+;;          (1+ (not (in " ")))
+;;          " "
+;;          (submatch (1+ (not (in " "))))
+;;          " "))
+;;
+;; "\\\\\\\\\\[\\w+"
+;; (rx (and ?\\ ?\\ ?\[ (1+ word)))
+;;
+;; etc.
+
+;;; History:
+;;
+
+;;; Code:
+
+(defconst rx-constituents
+  '((and		. (rx-and 1 nil))
+    (seq		. and)		; SRE
+    (:			. and)		; SRE
+    (sequence		. and)		; sregex
+    (or			. (rx-or 1 nil))
+    (|			. or)		; SRE
+    (not-newline	. ".")
+    (nonl		. not-newline)	; SRE
+    (anything		. (rx-anything 0 nil))
+    (any		. (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
+    (any		. ".")          ; sregex
+    (in			. any)
+    (char		. any)		; sregex
+    (not-char		. (rx-not-char 1 nil rx-check-any)) ; sregex
+    (not		. (rx-not 1 1 rx-check-not))
+    (repeat		. (rx-repeat 2 nil))
+    (=			. (rx-= 2 nil))	   ; SRE
+    (>=			. (rx->= 2 nil))   ; SRE
+    (**			. (rx-** 2 nil))   ; SRE
+    (submatch		. (rx-submatch 1 nil)) ; SRE
+    (group		. submatch)     ; sregex
+    (submatch-n		. (rx-submatch-n 2 nil))
+    (group-n		. submatch-n)
+    (zero-or-more	. (rx-kleene 1 nil))
+    (one-or-more	. (rx-kleene 1 nil))
+    (zero-or-one	. (rx-kleene 1 nil))
+    (\?			. zero-or-one)	; SRE
+    (\??		. zero-or-one)
+    (*			. zero-or-more)	; SRE
+    (*?			. zero-or-more)
+    (0+			. zero-or-more)
+    (+			. one-or-more)	; SRE
+    (+?			. one-or-more)
+    (1+			. one-or-more)
+    (optional		. zero-or-one)
+    (opt		. zero-or-one)	; sregex
+    (minimal-match	. (rx-greedy 1 1))
+    (maximal-match	. (rx-greedy 1 1))
+    (backref		. (rx-backref 1 1 rx-check-backref))
+    (line-start		. "^")
+    (bol		. line-start)	; SRE
+    (line-end		. "$")
+    (eol		. line-end)	; SRE
+    (string-start	. "\\`")
+    (bos		. string-start)	; SRE
+    (bot		. string-start)	; sregex
+    (string-end		. "\\'")
+    (eos		. string-end)	; SRE
+    (eot		. string-end)	; sregex
+    (buffer-start	. "\\`")
+    (buffer-end		. "\\'")
+    (point		. "\\=")
+    (word-start		. "\\<")
+    (bow		. word-start)	; SRE
+    (word-end		. "\\>")
+    (eow		. word-end)	; SRE
+    (word-boundary	. "\\b")
+    (not-word-boundary	. "\\B")	; sregex
+    (symbol-start       . "\\_<")
+    (symbol-end         . "\\_>")
+    (syntax		. (rx-syntax 1 1))
+    (not-syntax		. (rx-not-syntax 1 1)) ; sregex
+    (category		. (rx-category 1 1 rx-check-category))
+    (eval		. (rx-eval 1 1))
+    (regexp		. (rx-regexp 1 1 stringp))
+    (regex		. regexp)       ; sregex
+    (digit		. "[[:digit:]]")
+    (numeric		. digit)	; SRE
+    (num		. digit)	; SRE
+    (control		. "[[:cntrl:]]") ; SRE
+    (cntrl		. control)	 ; SRE
+    (hex-digit		. "[[:xdigit:]]") ; SRE
+    (hex		. hex-digit)	  ; SRE
+    (xdigit		. hex-digit)	  ; SRE
+    (blank		. "[[:blank:]]")  ; SRE
+    (graphic		. "[[:graph:]]")  ; SRE
+    (graph		. graphic)	  ; SRE
+    (printing		. "[[:print:]]")  ; SRE
+    (print		. printing)	  ; SRE
+    (alphanumeric	. "[[:alnum:]]")  ; SRE
+    (alnum		. alphanumeric)	  ; SRE
+    (letter		. "[[:alpha:]]")
+    (alphabetic		. letter)	; SRE
+    (alpha		. letter)	; SRE
+    (ascii		. "[[:ascii:]]") ; SRE
+    (nonascii		. "[[:nonascii:]]")
+    (lower		. "[[:lower:]]") ; SRE
+    (lower-case		. lower)	 ; SRE
+    (punctuation	. "[[:punct:]]") ; SRE
+    (punct		. punctuation)	 ; SRE
+    (space		. "[[:space:]]") ; SRE
+    (whitespace		. space)	 ; SRE
+    (white		. space)	 ; SRE
+    (upper		. "[[:upper:]]") ; SRE
+    (upper-case		. upper)	 ; SRE
+    (word		. "[[:word:]]")	 ; inconsistent with SRE
+    (wordchar		. word)		 ; sregex
+    (not-wordchar	. "\\W"))
+  "Alist of sexp form regexp constituents.
+Each element of the alist has the form (SYMBOL . DEFN).
+SYMBOL is a valid constituent of sexp regular expressions.
+If DEFN is a string, SYMBOL is translated into DEFN.
+If DEFN is a symbol, use the definition of DEFN, recursively.
+Otherwise, DEFN must be a list (FUNCTION MIN-ARGS MAX-ARGS PREDICATE).
+FUNCTION is used to produce code for SYMBOL.  MIN-ARGS and MAX-ARGS
+are the minimum and maximum number of arguments the function-form
+sexp constituent SYMBOL may have in sexp regular expressions.
+MAX-ARGS nil means no limit.  PREDICATE, if specified, means that
+all arguments must satisfy PREDICATE.")
+
+
+(defconst rx-syntax
+  '((whitespace		. ?-)
+    (punctuation	. ?.)
+    (word		. ?w)
+    (symbol		. ?_)
+    (open-parenthesis	. ?\()
+    (close-parenthesis	. ?\))
+    (expression-prefix	. ?\')
+    (string-quote	. ?\")
+    (paired-delimiter	. ?$)
+    (escape		. ?\\)
+    (character-quote	. ?/)
+    (comment-start	. ?<)
+    (comment-end	. ?>)
+    (string-delimiter	. ?|)
+    (comment-delimiter	. ?!))
+  "Alist mapping Rx syntax symbols to syntax characters.
+Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
+symbol in `(syntax SYMBOL)', and CHAR is the syntax character
+corresponding to SYMBOL, as it would be used with \\s or \\S in
+regular expressions.")
+
+
+(defconst rx-categories
+  '((consonant			. ?0)
+    (base-vowel			. ?1)
+    (upper-diacritical-mark	. ?2)
+    (lower-diacritical-mark	. ?3)
+    (tone-mark			. ?4)
+    (symbol			. ?5)
+    (digit			. ?6)
+    (vowel-modifying-diacritical-mark . ?7)
+    (vowel-sign			. ?8)
+    (semivowel-lower		. ?9)
+    (not-at-end-of-line		. ?<)
+    (not-at-beginning-of-line	. ?>)
+    (alpha-numeric-two-byte	. ?A)
+    (chinse-two-byte		. ?C)
+    (greek-two-byte		. ?G)
+    (japanese-hiragana-two-byte . ?H)
+    (indian-two-byte		. ?I)
+    (japanese-katakana-two-byte . ?K)
+    (korean-hangul-two-byte	. ?N)
+    (cyrillic-two-byte		. ?Y)
+    (combining-diacritic	. ?^)
+    (ascii			. ?a)
+    (arabic			. ?b)
+    (chinese			. ?c)
+    (ethiopic			. ?e)
+    (greek			. ?g)
+    (korean			. ?h)
+    (indian			. ?i)
+    (japanese			. ?j)
+    (japanese-katakana		. ?k)
+    (latin			. ?l)
+    (lao			. ?o)
+    (tibetan			. ?q)
+    (japanese-roman		. ?r)
+    (thai			. ?t)
+    (vietnamese			. ?v)
+    (hebrew			. ?w)
+    (cyrillic			. ?y)
+    (can-break			. ?|))
+  "Alist mapping symbols to category characters.
+Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
+symbol in `(category SYMBOL)', and CHAR is the category character
+corresponding to SYMBOL, as it would be used with `\\c' or `\\C' in
+regular expression strings.")
+
+
+(defvar rx-greedy-flag t
+  "Non-nil means produce greedy regular expressions for `zero-or-one',
+`zero-or-more', and `one-or-more'.  Dynamically bound.")
+
+
+(defun rx-info (op head)
+  "Return parsing/code generation info for OP.
+If OP is the space character ASCII 32, return info for the symbol `?'.
+If OP is the character `?', return info for the symbol `??'.
+See also `rx-constituents'.
+If HEAD is non-nil, then OP is the head of a sexp, otherwise it's
+a standalone symbol."
+  (cond ((eq op ? ) (setq op '\?))
+	((eq op ??) (setq op '\??)))
+  (let (old-op)
+    (while (and (not (null op)) (symbolp op))
+      (setq old-op op)
+      (setq op (cdr (assq op rx-constituents)))
+      (when (if head (stringp op) (consp op))
+        ;; We found something but of the wrong kind.  Let's look for an
+        ;; alternate definition for the other case.
+        (let ((new-op
+               (cdr (assq old-op (cdr (memq (assq old-op rx-constituents)
+                                            rx-constituents))))))
+          (if (and new-op (not (if head (stringp new-op) (consp new-op))))
+              (setq op new-op))))))
+  op)
+
+
+(defun rx-check (form)
+  "Check FORM according to its car's parsing info."
+  (unless (listp form)
+    (error "rx `%s' needs argument(s)" form))
+  (let* ((rx (rx-info (car form) 'head))
+	 (nargs (1- (length form)))
+	 (min-args (nth 1 rx))
+	 (max-args (nth 2 rx))
+	 (type-pred (nth 3 rx)))
+    (when (and (not (null min-args))
+	       (< nargs min-args))
+      (error "rx form `%s' requires at least %d args"
+	     (car form) min-args))
+    (when (and (not (null max-args))
+	       (> nargs max-args))
+      (error "rx form `%s' accepts at most %d args"
+	     (car form) max-args))
+    (when (not (null type-pred))
+      (dolist (sub-form (cdr form))
+	(unless (funcall type-pred sub-form)
+	  (error "rx form `%s' requires args satisfying `%s'"
+		 (car form) type-pred))))))
+
+
+(defun rx-group-if (regexp group)
+  "Put shy groups around REGEXP if seemingly necessary when GROUP
+is non-nil."
+  (cond
+   ;; for some repetition
+   ((eq group '*) (if (rx-atomic-p regexp) (setq group nil)))
+   ;; for concatenation
+   ((eq group ':)
+    (if (rx-atomic-p
+	 (if (string-match
+	      "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp)
+	     (substring regexp 0 (match-beginning 0))
+	   regexp))
+	(setq group nil)))
+   ;; for OR
+   ((eq group '|) (setq group nil))
+   ;; do anyway
+   ((eq group t))
+   ((rx-atomic-p regexp t) (setq group nil)))
+  (if group
+      (concat "\\(?:" regexp "\\)")
+    regexp))
+
+
+(defvar rx-parent)
+;; dynamically bound in some functions.
+
+
+(defun rx-and (form)
+  "Parse and produce code from FORM.
+FORM is of the form `(and FORM1 ...)'."
+  (rx-check form)
+  (rx-group-if
+   (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil)
+   (and (memq rx-parent '(* t)) rx-parent)))
+
+
+(defun rx-or (form)
+  "Parse and produce code from FORM, which is `(or FORM1 ...)'."
+  (rx-check form)
+  (rx-group-if
+   (if (memq nil (mapcar 'stringp (cdr form)))
+       (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|")
+     (regexp-opt (cdr form)))
+   (and (memq rx-parent '(: * t)) rx-parent)))
+
+
+(defun rx-anything (form)
+  "Match any character."
+  (if (consp form)
+      (error "rx `anything' syntax error: %s" form))
+  (rx-or (list 'or 'not-newline ?\n)))
+
+
+(defun rx-any-delete-from-range (char ranges)
+  "Delete by side effect character CHAR from RANGES.
+Only both edges of each range is checked."
+  (let (m)
+    (cond
+     ((memq char ranges) (setq ranges (delq char ranges)))
+     ((setq m (assq char ranges))
+      (if (eq (int-char (1+ char)) (cdr m))
+	  (setcar (memq m ranges) (int-char (1+ char)))
+	(setcar m (int-char (1+ char)))))
+     ((setq m (rassq char ranges))
+      (if (eq (int-char (1- char)) (car m))
+	  (setcar (memq m ranges) (int-char (1- char)))
+	(setcdr m (int-char (1- char))))))
+    ranges))
+
+
+(defun rx-any-condense-range (args)
+  "Condense by side effect ARGS as range for Rx `any'."
+  (let (str
+	l)
+    ;; set STR list of all strings
+    ;; set L list of all ranges
+    (mapc (lambda (e) (cond ((stringp e) (push e str))
+			    ((characterp e) (push (cons e e) l))
+			    (t (push e l))))
+	  args)
+    ;; condense overlapped ranges in L
+    (let ((tail (setq l (sort l #'car-less-than-car)))
+	  d)
+      (while (setq d (cdr tail))
+	(if (>= (cdar tail) (1- (caar d)))
+	    (progn
+	      (setcdr (car tail) (int-char (max (cdar tail) (cdar d))))
+	      (setcdr tail (cdr d)))
+	  (setq tail d))))
+    ;; Separate small ranges to single number, and delete dups.
+    (nconc
+     (apply #'nconc
+	    (mapcar (lambda (e)
+		      (cond
+		       ((= (car e) (cdr e)) (list (car e)))
+		       ((= (int-char (1+ (car e))) (cdr e)) (list (car e) (cdr e)))
+		       ((list e))))
+		    l))
+     (delete-dups str))))
+
+
+(defun rx-check-any-string (str)
+  "Check string argument STR for Rx `any'."
+  (let ((i 0)
+	c1 c2 l)
+    (if (= 0 (length str))
+	(error "String arg for Rx `any' must not be empty"))
+    (while (string-match ".-." str i)
+      ;; string before range: convert it to characters
+      (if (< i (match-beginning 0))
+	  (setq l (nconc
+		   l
+		   (append (substring str i (match-beginning 0)) nil))))
+      ;; range
+      (setq i (match-end 0)
+	    c1 (aref str (match-beginning 0))
+	    c2 (aref str (1- i)))
+      (cond
+       ((< c1 c2) (setq l (nconc l (list (cons c1 c2)))))
+       ((= c1 c2) (setq l (nconc l (list c1))))))
+    ;; rest?
+    (if (< i (length str))
+	(setq l (nconc l (append (substring str i) nil))))
+    l))
+
+
+(defun rx-check-any (arg)
+   "Check arg ARG for Rx `any'."
+   (cond
+    ((characterp arg) (list arg))
+    ((symbolp arg)
+     (let ((translation (condition-case nil
+			    (rx-form arg)
+			  (error nil))))
+       (if (or (null translation)
+	       (null (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" translation)))
+	   (error "Invalid char class `%s' in Rx `any'" arg))
+       (list (substring translation 1 -1)))) ; strip outer brackets
+    ((and (characterp (car-safe arg)) (characterp (cdr-safe arg)))
+     (list arg))
+    ((stringp arg) (rx-check-any-string arg))
+    ((error
+      "rx `any' requires string, character, char pair or char class
args"))))
+
+
+(defun rx-any (form)
+  "Parse and produce code from FORM, which is `(any ARG ...)'.
+ARG is optional."
+  (rx-check form)
+  (let* ((args (rx-any-condense-range
+		(apply
+		 #'nconc
+		 (mapcar #'rx-check-any (cdr form)))))
+	 m
+	 s)
+    (cond
+     ;; single close bracket
+     ;;	 => "[]...-]" or "[]...--.]"
+     ((memq ?\] args)
+      ;; set ] at the beginning
+      (setq args (cons ?\] (delq ?\] args)))
+      ;; set - at the end
+      (if (or (memq ?- args) (assq ?- args))
+	  (setq args (nconc (rx-any-delete-from-range ?- args)
+			    (list ?-)))))
+     ;; close bracket starts a range
+     ;;  => "[]-....-]" or "[]-.--....]"
+     ((setq m (assq ?\] args))
+      ;; bring it to the beginning
+      (setq args (cons m (delq m args)))
+      (cond ((memq ?- args)
+	     ;; to the end
+	     (setq args (nconc (delq ?- args) (list ?-))))
+	    ((setq m (assq ?- args))
+	     ;; next to the bracket's range, make the second range
+	     (setcdr args (cons m (delq m args))))))
+     ;; bracket in the end range
+     ;;	 => "[]...-]"
+     ((setq m (rassq ?\] args))
+      ;; set ] at the beginning
+      (setq args (cons ?\] (rx-any-delete-from-range ?\] args)))
+      ;; set - at the end
+      (if (or (memq ?- args) (assq ?- args))
+	  (setq args (nconc (rx-any-delete-from-range ?- args)
+			    (list ?-)))))
+     ;; {no close bracket appears}
+     ;;
+     ;; bring single bar to the beginning
+     ((memq ?- args)
+      (setq args (cons ?- (delq ?- args))))
+     ;; bar start a range, bring it to the beginning
+     ((setq m (assq ?- args))
+      (setq args (cons m (delq m args))))
+     ;;
+     ;; hat at the beginning?
+     ((or (eq (car args) ?^) (eq (car-safe (car args)) ?^))
+      (setq args (if (cdr args)
+		     `(,(cadr args) ,(car args) ,＠(cddr args))
+		   (nconc (rx-any-delete-from-range ?^ args)
+			  (list ?^))))))
+    ;; some 1-char?
+    (if (and (null (cdr args)) (characterp (car args))
+	     (or (= 1 (length
+		       (setq s (regexp-quote (string (car args))))))
+		 (and (equal (car args) ?^) ;; unnecessary predicate?
+		      (null (eq rx-parent '!)))))
+	s
+      (concat "["
+	      (mapconcat
+	       (lambda (e) (cond
+			    ((characterp e) (string e))
+			    ((consp e)
+			     (if (and (= (1+ (car e)) (cdr e))
+                                      ;; rx-any-condense-range should
+                                      ;; prevent this case from happening.
+				      (null (memq (car e) '(?\] ?-)))
+                                      (null (memq (cdr e) '(?\] ?-))))
+				 (string (car e) (cdr e))
+			       (string (car e) ?- (cdr e))))
+			    (e)))
+	       args
+	       nil)
+	      "]"))))
+
+
+(defun rx-check-not (arg)
+  "Check arg ARG for Rx `not'."
+  (unless (or (and (symbolp arg)
+		   (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'"
+				 (condition-case nil
+				     (rx-form arg)
+				   (error ""))))
+	      (eq arg 'word-boundary)
+	      (and (consp arg)
+		   (memq (car arg) '(not any in syntax category))))
+    (error "rx `not' syntax error: %s" arg))
+  t)
+
+
+(defun rx-not (form)
+  "Parse and produce code from FORM.  FORM is `(not ...)'."
+  (rx-check form)
+  (let ((result (rx-form (cadr form) '!))
+	case-fold-search)
+    (cond ((string-match "\\`\\[^" result)
+	   (cond
+	    ((equal result "[^]") "[^^]")
+	    ((and (= (length result) 4) (null (eq rx-parent '!)))
+	     (regexp-quote (substring result 2 3)))
+	    ((concat "[" (substring result 2)))))
+	  ((eq ?\[ (aref result 0))
+	   (concat "[^" (substring result 1)))
+	  ((string-match "\\`\\\\[scbw]" result)
+	   (concat (upcase (substring result 0 2))
+		   (substring result 2)))
+	  ((string-match "\\`\\\\[SCBW]" result)
+	   (concat (downcase (substring result 0 2))
+		   (substring result 2)))
+	  (t
+	   (concat "[^" result "]")))))
+
+
+(defun rx-not-char (form)
+  "Parse and produce code from FORM.  FORM is `(not-char ...)'."
+  (rx-check form)
+  (rx-not `(not (in ,＠(cdr form)))))
+
+
+(defun rx-not-syntax (form)
+  "Parse and produce code from FORM.  FORM is `(not-syntax SYNTAX)'."
+  (rx-check form)
+  (rx-not `(not (syntax ,＠(cdr form)))))
+
+
+(defun rx-trans-forms (form &optional skip)
+  "If FORM's length is greater than two, transform it to length two.
+A form (HEAD REST ...) becomes (HEAD (and REST ...)).
+If SKIP is non-nil, allow that number of items after the head, i.e.
+`(= N REST ...)' becomes `(= N (and REST ...))' if SKIP is 1."
+  (unless skip (setq skip 0))
+  (let ((tail (nthcdr (1+ skip) form)))
+    (if (= (length tail) 1)
+	form
+      (let ((form (copy-sequence form)))
+	(setcdr (nthcdr skip form) (list (cons 'and tail)))
+	form))))
+
+
+(defun rx-= (form)
+  "Parse and produce code from FORM `(= N ...)'."
+  (rx-check form)
+  (setq form (rx-trans-forms form 1))
+  (unless (and (integerp (nth 1 form))
+	       (> (nth 1 form) 0))
+    (error "rx `=' requires positive integer first arg"))
+  (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
+
+
+(defun rx->= (form)
+  "Parse and produce code from FORM `(>= N ...)'."
+  (rx-check form)
+  (setq form (rx-trans-forms form 1))
+  (unless (and (integerp (nth 1 form))
+	       (> (nth 1 form) 0))
+    (error "rx `>=' requires positive integer first arg"))
+  (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
+
+
+(defun rx-** (form)
+  "Parse and produce code from FORM `(** N M ...)'."
+  (rx-check form)
+  (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*))
+
+
+(defun rx-repeat (form)
+  "Parse and produce code from FORM.
+FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
+  (rx-check form)
+  (if (> (length form) 4)
+      (setq form (rx-trans-forms form 2)))
+  (if (null (nth 2 form))
+      (setq form (cons (nth 0 form) (cons (nth 1 form) (nthcdr 3 form)))))
+  (cond ((= (length form) 3)
+	 (unless (and (integerp (nth 1 form))
+		      (> (nth 1 form) 0))
+	   (error "rx `repeat' requires positive integer first arg"))
+	 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
+	((or (not (integerp (nth 2 form)))
+	     (< (nth 2 form) 0)
+	     (not (integerp (nth 1 form)))
+	     (< (nth 1 form) 0)
+	     (< (nth 2 form) (nth 1 form)))
+	 (error "rx `repeat' range error"))
+	(t
+	 (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*)
+		 (nth 1 form) (nth 2 form)))))
+
+
+(defun rx-submatch (form)
+  "Parse and produce code from FORM, which is `(submatch ...)'."
+  (concat "\\("
+          (if (= 2 (length form))
+              ;; Only one sub-form.
+              (rx-form (cadr form))
+            ;; Several sub-forms implicitly concatenated.
+            (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil))
+          "\\)"))
+
+(defun rx-submatch-n (form)
+  "Parse and produce code from FORM, which is `(submatch-n N ...)'."
+  (let ((n (nth 1 form)))
+    (concat "\\(?" (number-to-string n) ":"
+	    (if (= 3 (length form))
+		;; Only one sub-form.
+		(rx-form (nth 2 form))
+	      ;; Several sub-forms implicitly concatenated.
+	      (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil))
+	    "\\)")))
+
+(defun rx-backref (form)
+  "Parse and produce code from FORM, which is `(backref N)'."
+  (rx-check form)
+  (format "\\%d" (nth 1 form)))
+
+(defun rx-check-backref (arg)
+  "Check arg ARG for Rx `backref'."
+  (or (and (integerp arg) (>= arg 1) (<= arg 9))
+      (error "rx `backref' requires numeric 1<=arg<=9: %s" arg)))
+
+(defun rx-kleene (form)
+  "Parse and produce code from FORM.
+FORM is `(OP FORM1)', where OP is one of the `zero-or-one',
+`zero-or-more' etc.  operators.
+If OP is one of `*', `+', `?', produce a greedy regexp.
+If OP is one of `*?', `+?', `??', produce a non-greedy regexp.
+If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
+is non-nil."
+  (rx-check form)
+  (setq form (rx-trans-forms form))
+  (let ((suffix (cond ((memq (car form) '(* + ?\s)) "")
+		      ((memq (car form) '(*? +? ??)) "?")
+		      (rx-greedy-flag "")
+		      (t "?")))
+	(op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
+		  ((memq (car form) '(+ +? 1+ one-or-more))  "+")
+		  (t "?"))))
+    (rx-group-if
+     (concat (rx-form (cadr form) '*) op suffix)
+     (and (memq rx-parent '(t *)) rx-parent))))
+
+
+(defun rx-atomic-p (r &optional lax)
+  "Return non-nil if regexp string R is atomic.
+An atomic regexp R is one such that a suffix operator
+appended to R will apply to all of R.  For example, \"a\"
+\"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\",
+\"[ab]c\", and \"ab\\|ab*c\" are not atomic.
+
+This function may return false negatives, but it will not
+return false positives.  It is nevertheless useful in
+situations where an efficiency shortcut can be taken only if a
+regexp is atomic.  The function can be improved to detect
+more cases of atomic regexps.  Presently, this function
+detects the following categories of atomic regexp;
+
+  a group or shy group:  \\(...\\)
+  a character class:     [...]
+  a single character:    a
+
+On the other hand, false negatives will be returned for
+regexps that are atomic but end in operators, such as
+\"a+\".  I think these are rare.  Probably such cases could
+be detected without much effort.  A guarantee of no false
+negatives would require a theoretic specification of the set
+of all atomic regexps."
+  (let ((l (length r)))
+    (cond
+     ((<= l 1))
+     ((= l 2) (= (aref r 0) ?\\))
+     ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
+     ((null lax)
+      (cond
+       ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^\]]\\)*\\]\\'" r))
+       ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^\)]\\)*\\\\)\\'"
r)))))))
+
+
+(defun rx-syntax (form)
+  "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
+  (rx-check form)
+  (let* ((sym (cadr form))
+	 (syntax (cdr (assq sym rx-syntax))))
+    (unless syntax
+      ;; Try sregex compatibility.
+      (cond
+       ((characterp sym) (setq syntax sym))
+       ((symbolp sym)
+        (let ((name (symbol-name sym)))
+          (if (= 1 (length name))
+              (setq syntax (aref name 0))))))
+      (unless syntax
+	(error "Unknown rx syntax `%s'" sym)))
+    (format "\\s%c" syntax)))
+
+
+(defun rx-check-category (form)
+  "Check the argument FORM of a `(category FORM)'."
+  (unless (or (characterp form)
+	      (cdr (assq form rx-categories)))
+    (error "Unknown category `%s'" form))
+  t)
+
+
+(defun rx-category (form)
+  "Parse and produce code from FORM, which is `(category SYMBOL)'."
+  (rx-check form)
+  (let ((char (if (characterp (cadr form))
+		  (cadr form)
+		(cdr (assq (cadr form) rx-categories)))))
+    (format "\\c%c" char)))
+
+
+(defun rx-eval (form)
+  "Parse and produce code from FORM, which is `(eval FORM)'."
+  (rx-check form)
+  (rx-form (eval (cadr form)) rx-parent))
+
+
+(defun rx-greedy (form)
+  "Parse and produce code from FORM.
+If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
+`+', and `?' operators will be used in FORM1.  If FORM is
+'(maximal-match FORM1)', greedy operators will be used."
+  (rx-check form)
+  (let ((rx-greedy-flag (eq (car form) 'maximal-match)))
+    (rx-form (cadr form) rx-parent)))
+
+
+(defun rx-regexp (form)
+  "Parse and produce code from FORM, which is `(regexp STRING)'."
+  (rx-check form)
+  (rx-group-if (cadr form) rx-parent))
+
+
+(defun rx-form (form &optional rx-parent)
+  "Parse and produce code for regular expression FORM.
+FORM is a regular expression in sexp form.
+RX-PARENT shows which type of expression calls and controls putting of
+shy groups around the result and some more in other functions."
+  (if (stringp form)
+      (rx-group-if (regexp-quote form)
+		   (if (and (eq rx-parent '*) (< 1 (length form)))
+		       rx-parent))
+    (cond ((characterp form)
+	   (regexp-quote (char-to-string form)))
+	  ((symbolp form)
+	   (let ((info (rx-info form nil)))
+	     (cond ((stringp info)
+		    info)
+		   ((null info)
+		    (error "Unknown rx form `%s'" form))
+		   (t
+		    (funcall (nth 0 info) form)))))
+	  ((consp form)
+	   (let ((info (rx-info (car form) 'head)))
+	     (unless (consp info)
+	       (error "Unknown rx form `%s'" (car form)))
+	     (funcall (nth 0 info) form)))
+	  (t
+	   (error "rx syntax error at `%s'" form)))))
+
+
+;;;###autoload
+(defun rx-to-string (form &optional no-group)
+  "Parse and produce code for regular expression FORM.
+FORM is a regular expression in sexp form.
+NO-GROUP non-nil means don't put shy groups around the result."
+  (rx-group-if (rx-form form) (null no-group)))
+
+
+;;;###autoload
+(defmacro rx (&rest regexps)
+  "Translate regular expressions REGEXPS in sexp form to a regexp string.
+REGEXPS is a non-empty sequence of forms of the sort listed below.
+
+Note that `rx' is a Lisp macro; when used in a Lisp program being
+ compiled, the translation is performed by the compiler.
+See `rx-to-string' for how to do such a translation at run-time.
+
+The following are valid subforms of regular expressions in sexp
+notation.
+
+STRING
+     matches string STRING literally.
+
+CHAR
+     matches character CHAR literally.
+
+`not-newline', `nonl'
+     matches any character except a newline.
+
+`anything'
+     matches any character
+
+`(any SET ...)'
+`(in SET ...)'
+`(char SET ...)'
+     matches any character in SET ....  SET may be a character or string.
+     Ranges of characters can be specified as `A-Z' in strings.
+     Ranges may also be specified as conses like `(?A . ?Z)'.
+
+     SET may also be the name of a character class: `digit',
+     `control', `hex-digit', `blank', `graph', `print', `alnum',
+     `alpha', `ascii', `nonascii', `lower', `punct', `space',
`upper',
+     `word', or one of their synonyms.
+
+`(not (any SET ...))'
+     matches any character not in SET ...
+
+`line-start', `bol'
+     matches the empty string, but only at the beginning of a line
+     in the text being matched
+
+`line-end', `eol'
+     is similar to `line-start' but matches only at the end of a line
+
+`string-start', `bos', `bot'
+     matches the empty string, but only at the beginning of the
+     string being matched against.
+
+`string-end', `eos', `eot'
+     matches the empty string, but only at the end of the
+     string being matched against.
+
+`buffer-start'
+     matches the empty string, but only at the beginning of the
+     buffer being matched against.  Actually equivalent to `string-start'.
+
+`buffer-end'
+     matches the empty string, but only at the end of the
+     buffer being matched against.  Actually equivalent to `string-end'.
+
+`point'
+     matches the empty string, but only at point.
+
+`word-start', `bow'
+     matches the empty string, but only at the beginning of a word.
+
+`word-end', `eow'
+     matches the empty string, but only at the end of a word.
+
+`word-boundary'
+     matches the empty string, but only at the beginning or end of a
+     word.
+
+`(not word-boundary)'
+`not-word-boundary'
+     matches the empty string, but not at the beginning or end of a
+     word.
+
+`symbol-start'
+     matches the empty string, but only at the beginning of a symbol.
+
+`symbol-end'
+     matches the empty string, but only at the end of a symbol.
+
+`digit', `numeric', `num'
+     matches 0 through 9.
+
+`control', `cntrl'
+     matches ASCII control characters.
+
+`hex-digit', `hex', `xdigit'
+     matches 0 through 9, a through f and A through F.
+
+`blank'
+     matches space and tab only.
+
+`graphic', `graph'
+     matches graphic characters--everything except ASCII control chars,
+     space, and DEL.
+
+`printing', `print'
+     matches printing characters--everything except ASCII control chars
+     and DEL.
+
+`alphanumeric', `alnum'
+     matches letters and digits.  (But at present, for multibyte characters,
+     it matches anything that has word syntax.)
+
+`letter', `alphabetic', `alpha'
+     matches letters.  (But at present, for multibyte characters,
+     it matches anything that has word syntax.)
+
+`ascii'
+     matches ASCII (unibyte) characters.
+
+`nonascii'
+     matches non-ASCII (multibyte) characters.
+
+`lower', `lower-case'
+     matches anything lower-case.
+
+`upper', `upper-case'
+     matches anything upper-case.
+
+`punctuation', `punct'
+     matches punctuation.  (But at present, for multibyte characters,
+     it matches anything that has non-word syntax.)
+
+`space', `whitespace', `white'
+     matches anything that has whitespace syntax.
+
+`word', `wordchar'
+     matches anything that has word syntax.
+
+`not-wordchar'
+     matches anything that has non-word syntax.
+
+`(syntax SYNTAX)'
+     matches a character with syntax SYNTAX.  SYNTAX must be one
+     of the following symbols, or a symbol corresponding to the syntax
+     character, e.g. `\\.' for `\\s.'.
+
+     `whitespace'		(\\s- in string notation)
+     `punctuation'		(\\s.)
+     `word'			(\\sw)
+     `symbol'			(\\s_)
+     `open-parenthesis'		(\\s()
+     `close-parenthesis'	(\\s))
+     `expression-prefix'	(\\s')
+     `string-quote'		(\\s\")
+     `paired-delimiter'		(\\s$)
+     `escape'			(\\s\\)
+     `character-quote'		(\\s/)
+     `comment-start'		(\\s<)
+     `comment-end'		(\\s>)
+     `string-delimiter'		(\\s|)
+     `comment-delimiter'	(\\s!)
+
+`(not (syntax SYNTAX))'
+     matches a character that doesn't have syntax SYNTAX.
+
+`(category CATEGORY)'
+     matches a character with category CATEGORY.  CATEGORY must be
+     either a character to use for C, or one of the following symbols.
+
+     `consonant'			(\\c0 in string notation)
+     `base-vowel'			(\\c1)
+     `upper-diacritical-mark'		(\\c2)
+     `lower-diacritical-mark'		(\\c3)
+     `tone-mark'		        (\\c4)
+     `symbol'			        (\\c5)
+     `digit'			        (\\c6)
+     `vowel-modifying-diacritical-mark'	(\\c7)
+     `vowel-sign'			(\\c8)
+     `semivowel-lower'			(\\c9)
+     `not-at-end-of-line'		(\\c<)
+     `not-at-beginning-of-line'		(\\c>)
+     `alpha-numeric-two-byte'		(\\cA)
+     `chinse-two-byte'			(\\cC)
+     `greek-two-byte'			(\\cG)
+     `japanese-hiragana-two-byte'	(\\cH)
+     `indian-tow-byte'			(\\cI)
+     `japanese-katakana-two-byte'	(\\cK)
+     `korean-hangul-two-byte'		(\\cN)
+     `cyrillic-two-byte'		(\\cY)
+     `combining-diacritic'		(\\c^)
+     `ascii'				(\\ca)
+     `arabic'				(\\cb)
+     `chinese'				(\\cc)
+     `ethiopic'				(\\ce)
+     `greek'				(\\cg)
+     `korean'				(\\ch)
+     `indian'				(\\ci)
+     `japanese'				(\\cj)
+     `japanese-katakana'		(\\ck)
+     `latin'				(\\cl)
+     `lao'				(\\co)
+     `tibetan'				(\\cq)
+     `japanese-roman'			(\\cr)
+     `thai'				(\\ct)
+     `vietnamese'			(\\cv)
+     `hebrew'				(\\cw)
+     `cyrillic'				(\\cy)
+     `can-break'			(\\c|)
+
+`(not (category CATEGORY))'
+     matches a character that doesn't have category CATEGORY.
+
+`(and SEXP1 SEXP2 ...)'
+`(: SEXP1 SEXP2 ...)'
+`(seq SEXP1 SEXP2 ...)'
+`(sequence SEXP1 SEXP2 ...)'
+     matches what SEXP1 matches, followed by what SEXP2 matches, etc.
+
+`(submatch SEXP1 SEXP2 ...)'
+`(group SEXP1 SEXP2 ...)'
+     like `and', but makes the match accessible with `match-end',
+     `match-beginning', and `match-string'.
+
+`(submatch-n N SEXP1 SEXP2 ...)'
+`(group-n N SEXP1 SEXP2 ...)'
+     like `group', but make it an explicitly-numbered group with
+     group number N.
+
+`(or SEXP1 SEXP2 ...)'
+`(| SEXP1 SEXP2 ...)'
+     matches anything that matches SEXP1 or SEXP2, etc.  If all
+     args are strings, use `regexp-opt' to optimize the resulting
+     regular expression.
+
+`(minimal-match SEXP)'
+     produce a non-greedy regexp for SEXP.  Normally, regexps matching
+     zero or more occurrences of something are \"greedy\" in that they
+     match as much as they can, as long as the overall regexp can
+     still match.  A non-greedy regexp matches as little as possible.
+
+`(maximal-match SEXP)'
+     produce a greedy regexp for SEXP.  This is the default.
+
+Below, `SEXP ...' represents a sequence of regexp forms, treated as if
+enclosed in `(and ...)'.
+
+`(zero-or-more SEXP ...)'
+`(0+ SEXP ...)'
+     matches zero or more occurrences of what SEXP ... matches.
+
+`(* SEXP ...)'
+     like `zero-or-more', but always produces a greedy regexp, independent
+     of `rx-greedy-flag'.
+
+`(*? SEXP ...)'
+     like `zero-or-more', but always produces a non-greedy regexp,
+     independent of `rx-greedy-flag'.
+
+`(one-or-more SEXP ...)'
+`(1+ SEXP ...)'
+     matches one or more occurrences of SEXP ...
+
+`(+ SEXP ...)'
+     like `one-or-more', but always produces a greedy regexp.
+
+`(+? SEXP ...)'
+     like `one-or-more', but always produces a non-greedy regexp.
+
+`(zero-or-one SEXP ...)'
+`(optional SEXP ...)'
+`(opt SEXP ...)'
+     matches zero or one occurrences of A.
+
+`(? SEXP ...)'
+     like `zero-or-one', but always produces a greedy regexp.
+
+`(?? SEXP ...)'
+     like `zero-or-one', but always produces a non-greedy regexp.
+
+`(repeat N SEXP)'
+`(= N SEXP ...)'
+     matches N occurrences.
+
+`(>= N SEXP ...)'
+     matches N or more occurrences.
+
+`(repeat N M SEXP)'
+`(** N M SEXP ...)'
+     matches N to M occurrences.
+
+`(backref N)'
+     matches what was matched previously by submatch N.
+
+`(eval FORM)'
+     evaluate FORM and insert result.  If result is a string,
+     `regexp-quote' it.
+
+`(regexp REGEXP)'
+     include REGEXP in string notation in the result."
+  (cond ((null regexps)
+	 (error "No regexp"))
+	((cdr regexps)
+	 (rx-to-string `(and ,＠regexps) t))
+	(t
+	 (rx-to-string (car regexps) t))))
+
+;; ;; sregex.el replacement
+
+;; ;;;###autoload (provide 'sregex)
+;; ;;;###autoload (autoload 'sregex "rx")
+;; (defalias 'sregex 'rx-to-string)
+;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro)
+;; (defalias 'sregexq 'rx)
+
+(provide 'rx)
+
+;;; rx.el ends here

Repository URL: https://bitbucket.org/xemacs/xemacs-base/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

commit/xemacs-base: sperber: Add rx.el.