commit: Add a new optional ESCAPE-CHAR argument to #'split-string-by-char.
Aidan Kehoe
kehoea at parhasard.net
Fri Feb 19 18:48:15 EST 2010
changeset: 5035:b1e48555be7d
user: Aidan Kehoe <kehoea at parhasard.net>
date: Sun Feb 07 12:24:03 2010 +0000
files: src/ChangeLog src/fns.c tests/ChangeLog tests/automated/lisp-tests.el
description:
Add a new optional ESCAPE-CHAR argument to #'split-string-by-char.
src/ChangeLog addition:
2010-02-07 Aidan Kehoe <kehoea at parhasard.net>
* fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE
and ESCAPECHAR arguments.
(split_external_path, split_env_path, Fsplit_string_by_char)
(Fsplit_path):
Pass the new arguments to split_string_by_ichar_1(); take a new
optional argument, ESCAPE-CHAR, in #'split-string-by-char,
allowing SEPCHAR to be escaped.
tests/ChangeLog addition:
2010-02-07 Aidan Kehoe <kehoea at parhasard.net>
* automated/lisp-tests.el (split-string-by-char):
Test this function, and its new ESCAPE-CHAR argument.
diff -r 1b96882bdf37 -r b1e48555be7d src/ChangeLog
--- a/src/ChangeLog Fri Feb 19 23:21:27 2010 +0000
+++ b/src/ChangeLog Sun Feb 07 12:24:03 2010 +0000
@@ -3450,6 +3450,16 @@
prototypes of complex_vars_of_*() in symsinit.h, for cosmetic
reasons.
+
+2010-02-07 Aidan Kehoe <kehoea at parhasard.net>
+
+ * fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE
+ and ESCAPECHAR arguments.
+ (split_external_path, split_env_path, Fsplit_string_by_char)
+ (Fsplit_path):
+ Pass the new arguments to split_string_by_ichar_1(); take a new
+ optional argument, ESCAPE-CHAR, in #'split-string-by-char,
+ allowing SEPCHAR to be escaped.
2010-01-09 Didier Verna <didier at xemacs.org>
diff -r 1b96882bdf37 -r b1e48555be7d src/fns.c
--- a/src/fns.c Fri Feb 19 23:21:27 2010 +0000
+++ b/src/fns.c Sun Feb 07 12:24:03 2010 +0000
@@ -1053,31 +1053,129 @@
}
/* Split STRING into a list of substrings. The substrings are the
- parts of original STRING separated by SEPCHAR. */
+ parts of original STRING separated by SEPCHAR.
+
+ If UNESCAPE is non-zero, ESCAPECHAR specifies a character that will quote
+ SEPCHAR, and cause it not to split STRING. A double ESCAPECHAR is
+ necessary for ESCAPECHAR to appear once in a substring. */
+
static Lisp_Object
split_string_by_ichar_1 (const Ibyte *string, Bytecount size,
- Ichar sepchar)
+ Ichar sepchar, int unescape, Ichar escapechar)
{
Lisp_Object result = Qnil;
const Ibyte *end = string + size;
- while (1)
- {
- const Ibyte *p = string;
- while (p < end)
- {
- if (itext_ichar (p) == sepchar)
- break;
- INC_IBYTEPTR (p);
- }
- result = Fcons (make_string (string, p - string), result);
- if (p < end)
- {
- string = p;
- INC_IBYTEPTR (string); /* skip sepchar */
- }
- else
- break;
+ if (unescape)
+ {
+ Ibyte unescape_buffer[64], *unescape_buffer_ptr = unescape_buffer,
+ escaped[MAX_ICHAR_LEN], *unescape_cursor;
+ int deleting_escapes, previous_escaped, escaped_len;
+ Ichar pchar, unescape_buffer_size = countof (unescape_buffer);
+
+ escaped_len = set_itext_ichar (escaped, escapechar);
+
+ while (1)
+ {
+ const Ibyte *p = string, *cursor;
+ deleting_escapes = 0;
+ previous_escaped = 0;
+
+ while (p < end)
+ {
+ pchar = itext_ichar (p);
+
+ if (pchar == sepchar)
+ {
+ if (!previous_escaped)
+ {
+ break;
+ }
+ }
+ else if (pchar == escapechar
+ /* Doubled escapes don't escape: */
+ && !previous_escaped)
+ {
+ ++deleting_escapes;
+ previous_escaped = 1;
+ }
+ else
+ {
+ previous_escaped = 0;
+ }
+
+ INC_IBYTEPTR (p);
+ }
+
+ if (deleting_escapes)
+ {
+ if (((p - string) - (escaped_len * deleting_escapes))
+ > unescape_buffer_size)
+ {
+ unescape_buffer_size =
+ ((p - string) - (escaped_len * deleting_escapes)) * 1.5;
+ unescape_buffer_ptr = alloca_ibytes (unescape_buffer_size);
+ }
+
+ cursor = string;
+ unescape_cursor = unescape_buffer_ptr;
+ previous_escaped = 0;
+
+ while (cursor < p)
+ {
+ pchar = itext_ichar (cursor);
+
+ if (pchar != escapechar || previous_escaped)
+ {
+ memcpy (unescape_cursor, cursor,
+ itext_ichar_len (cursor));
+ INC_IBYTEPTR (unescape_cursor);
+ }
+
+ previous_escaped = !previous_escaped
+ && (pchar == escapechar);
+
+ INC_IBYTEPTR (cursor);
+ }
+
+ result = Fcons (make_string (unescape_buffer_ptr,
+ unescape_cursor
+ - unescape_buffer_ptr),
+ result);
+ }
+ else
+ {
+ result = Fcons (make_string (string, p - string), result);
+ }
+ if (p < end)
+ {
+ string = p;
+ INC_IBYTEPTR (string); /* skip sepchar */
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ const Ibyte *p = string;
+ while (p < end)
+ {
+ if (itext_ichar (p) == sepchar)
+ break;
+ INC_IBYTEPTR (p);
+ }
+ result = Fcons (make_string (string, p - string), result);
+ if (p < end)
+ {
+ string = p;
+ INC_IBYTEPTR (string); /* skip sepchar */
+ }
+ else
+ break;
+ }
}
return Fnreverse (result);
}
@@ -1102,7 +1200,7 @@
if (!newlen)
return Qnil;
- return split_string_by_ichar_1 (newpath, newlen, SEPCHAR);
+ return split_string_by_ichar_1 (newpath, newlen, SEPCHAR, 0, 0);
}
Lisp_Object
@@ -1115,22 +1213,34 @@
path = default_;
if (!path)
return Qnil;
- return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR);
+ return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR, 0, 0);
}
/* Ben thinks this function should not exist or be exported to Lisp.
We use it to define split-path-string in subr.el (not!). */
-DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 2, 0, /*
+DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 3, 0, /*
Split STRING into a list of substrings originally separated by SEPCHAR.
-*/
- (string, sepchar))
-{
+
+With optional ESCAPE-CHAR, any instances of SEPCHAR preceded by that
+character will not split the string, and a double instance of ESCAPE-CHAR
+will be necessary for a single ESCAPE-CHAR to appear in the output string.
+*/
+ (string, sepchar, escape_char))
+{
+ Ichar escape_ichar = 0;
+
CHECK_STRING (string);
CHECK_CHAR (sepchar);
+ if (!NILP (escape_char))
+ {
+ CHECK_CHAR (escape_char);
+ escape_ichar = XCHAR (escape_char);
+ }
return split_string_by_ichar_1 (XSTRING_DATA (string),
- XSTRING_LENGTH (string),
- XCHAR (sepchar));
+ XSTRING_LENGTH (string),
+ XCHAR (sepchar),
+ !NILP (escape_char), escape_ichar);
}
/* #### This was supposed to be in subr.el, but is used VERY early in
@@ -1154,7 +1264,7 @@
return (split_string_by_ichar_1
(XSTRING_DATA (path), XSTRING_LENGTH (path),
- itext_ichar (XSTRING_DATA (Vpath_separator))));
+ itext_ichar (XSTRING_DATA (Vpath_separator)), 0, 0));
}
diff -r 1b96882bdf37 -r b1e48555be7d tests/ChangeLog
--- a/tests/ChangeLog Fri Feb 19 23:21:27 2010 +0000
+++ b/tests/ChangeLog Sun Feb 07 12:24:03 2010 +0000
@@ -273,6 +273,11 @@
* automated/mule-tests.el (featurep):
Use utf-8 as file-name-coding-system under Cygwin 1.7+.
+
+2010-02-07 Aidan Kehoe <kehoea at parhasard.net>
+
+ * automated/lisp-tests.el (split-string-by-char):
+ Test this function, and its new ESCAPE-CHAR argument.
2010-01-01 Aidan Kehoe <kehoea at parhasard.net>
diff -r 1b96882bdf37 -r b1e48555be7d tests/automated/lisp-tests.el
--- a/tests/automated/lisp-tests.el Fri Feb 19 23:21:27 2010 +0000
+++ b/tests/automated/lisp-tests.el Sun Feb 07 12:24:03 2010 +0000
@@ -1,4 +1,4 @@
-;; Copyright (C) 1998 Free Software Foundation, Inc.
+;; Copyright (C) 1998 Free Software Foundation, Inc. -*- coding: iso-8859-1 -*-
;; Author: Martin Buchholz <martin at xemacs.org>
;; Maintainer: Martin Buchholz <martin at xemacs.org>
@@ -1075,6 +1075,76 @@
'("foo" "bar" ""))
(Assert-equal (split-string "foobar" split-string-default-separators)
'("foobar"))
+
+;;-----------------------------------------------------
+;; Test split-string-by-char
+;;-----------------------------------------------------
+
+(Assert
+ (equal
+ (split-string-by-char
+ #r"re\:ee:this\\is\\text\\\\:oo\ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\: Geister brüten.\\
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. "
+ ?: ?\\)
+ '("re:ee" "this\\is\\text\\\\" "oops" "
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen: Geister brüten.\\
+tocopherol
+Vitamin E" " any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. ")))
+(Assert
+ (equal
+ (split-string-by-char
+ #r"re\:ee:this\\is\\text\\\\:oo\ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\: Geister brüten.\\
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. "
+ ?: ?\x00)
+ '("re\\" "ee" "this\\\\is\\\\text\\\\\\\\" "oo\\ps" "
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\\" " Geister brüten.\\\\
+tocopherol
+Vitamin E" " any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. ")))
+(Assert
+ (equal
+ (split-string-by-char
+ #r"re\:ee:this\\is\\text\\\\:oo\ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\: Geister brüten.\\
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. " ?\\)
+ '("re" ":ee:this" "" "is" "" "text" "" "" "" ":oo" "ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen" ": Geister brüten." "" "
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. ")))
;;-----------------------------------------------------
;; Test near-text buffer functions.
More information about the XEmacs-Patches
mailing list