Nothing too complex, and even the fragile test suite passes. Here are some
profiling numbers, where charsets-in-region is the implementation in C and
charsets-in-region0 is the implementation in Lisp:
10,000 calls in *scratch*, vanilla startup
Function Name Ticks/Total %Usage Calls GC-Usage/ Total
===============================/===== ====== ===== ========/=======
charsets-in-region 74/ 88 77.083 10000 99576/ 120000
(profile overhead) 18/ 18 18.750 20424/ 0
charsets-in-region-*-10000 4/ 96 4.167 1 40/ 120040
#<compiled-function (expr &optional arg) "...(36)" [was-profiling arg
expr profiling-active-p ((byte-code ¬~Á ~Á~ [was-profiling stop-profiling] 1))
start-profiling eval profile-results clear-profiling-info ((stop-profiling))] 2
(/usr/local/lib/xemacs/xemacs-packages/lisp/xemacs-devel/profile.elc . 9000) (list (read
(read-string Expression to profile: )) current-prefix-arg)>
0/ 96 0.000 0/ 120028
Function Name Ticks/Total %Usage Calls GC-Usage/ Total
================================/===== ====== ===== ========/=======
charsets-in-region0 1999/ 2304 89.281 10000 117000/ 120064
(profile overhead) 128/ 128 5.717 3092/ 0
char-charset 110/ 92 4.913 3950000
charsets-in-region0-*-10000 2/ 2306 0.089 1 0/ 120104
#<compiled-function (expr &optional arg) "...(36)" [was-profiling arg
expr profiling-active-p ((byte-code ¬~Á ~Á~ [was-profiling stop-profiling] 1))
start-profiling eval profile-results clear-profiling-info ((stop-profiling))] 2
(/usr/local/lib/xemacs/xemacs-packages/lisp/xemacs-devel/profile.elc . 9000) (list (read
(read-string Expression to profile: )) current-prefix-arg)>
10,000 calls on HELLO, vanilla startup
Function Name Ticks/Total %Usage Calls GC-Usage/ Total
====================================/===== ====== ===== ========/=======
charsets-in-region 2620/ 3351 80.690 10000 2040000/2040000
(in char-byte conversion) 576/ 514 17.739 11570000
(in garbage collection) 47/ 47 1.447 1
charsets-in-region-*-10000 4/ 3403 0.123 1 24/2040024
#<compiled-function (expr &optional arg) "...(36)" [was-profiling arg
expr profiling-active-p ((byte-code ¬Á Á [was-profiling stop-profiling] 1))
start-profiling eval profile-results clear-profiling-info ((stop-profiling))] 2
(/usr/local/lib/xemacs/xemacs-packages/lisp/xemacs-devel/profile.elc . 9000) (list (read
(read-string Expression to profile: )) current-prefix-arg)>
(a call with charsets-in-region0 crashed in the profiler)
5,000 calls in HELLO, vanilla startup
Function Name Ticks/Total %Usage Calls GC-Usage/ Total
====================================/===== ====== ===== ========/=======
charsets-in-region 2567/ 3332 79.969 10000 2040000/2040000
(in char-byte conversion) 568/ 479 17.695 11570000
(in garbage collection) 43/ 43 1.340 1
(profile overhead) 28/ 28 0.872
charsets-in-region-*-5000 4/ 3379 0.125 1 64/2040064
Function Name Ticks/Total %Usage Calls GC-Usage/ Total
====================================/===== ====== ===== ========/=======
charsets-in-region0 11693/13187 91.295 10000 2041944/2041944
(in char-byte conversion) 735/ 597 5.739 23450000
char-charset 326/ 246 2.545 11740000
(in garbage collection) 46/ 46 0.359 1
charsets-in-region0-*-5000 8/13195 0.062 1 40/2041984
lisp/ChangeLog addition:
2006-11-14 Aidan Kehoe <kehoea(a)parhasard.net>
* mule/mule-charset.el:
* mule/mule-charset.el (charsets-in-string):
Implement it in terms of charsets-in-string.
* mule/mule-charset.el (charsets-in-region): Removed. It's now in
C.
src/ChangeLog addition:
2006-11-14 Aidan Kehoe <kehoea(a)parhasard.net>
* mule-charset.c:
* mule-charset.c (Fcharsets_in_region):
Added a charsets-in-region implementation in C.
XEmacs Trunk source patch:
Diff command: cvs -q diff -u
Files affected: src/text.c
===================================================================
RCS src/mule-charset.c
===================================================================
RCS lisp/mule/mule-charset.el
===================================================================
RCS
Index: lisp/mule/mule-charset.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/mule/mule-charset.el,v
retrieving revision 1.19
diff -u -r1.19 mule-charset.el
--- lisp/mule/mule-charset.el 2006/11/05 22:31:38 1.19
+++ lisp/mule/mule-charset.el 2006/11/14 13:49:48
@@ -38,42 +38,16 @@
;;;; Classifying text according to charsets
-;; the old version was broken in a couple of ways
-;; this is one of several versions, I tried a hash as well as the
-;; `prev-charset' cache used in the old version, but this was definitely
-;; faster than the hash version and marginally faster than the prev-charset
-;; version
-;; #### this really needs to be moved into C
-(defun charsets-in-region (start end &optional buffer)
- "Return a list of the charsets in the region between START and END.
-BUFFER defaults to the current buffer if omitted."
- (let (list)
- (save-excursion
- (if buffer
- (set-buffer buffer))
- (save-restriction
- (narrow-to-region start end)
- (goto-char (point-min))
- (while (not (eobp))
- ;; the first test will usually succeed on testing the
- ;; car of the list; don't waste time let-binding.
- (or (memq (char-charset (char-after (point))) list)
- (setq list (cons (char-charset (char-after (point))) list)))
- (forward-char))))
- list))
-
(defun charsets-in-string (string)
"Return a list of the charsets in STRING."
- (let (list)
- (mapc (lambda (ch)
- ;; the first test will usually succeed on testing the
- ;; car of the list; don't waste time let-binding.
- (or (memq (char-charset ch) list)
- (setq list (cons (char-charset ch) list))))
- string)
- list))
+ (let (res)
+ (with-string-as-buffer-contents string
+ ;; charsets-in-region now in C.
+ (setq res (charsets-in-region (point-min) (point-max))))
+ res))
(defalias 'find-charset-string 'charsets-in-string)
+
(defalias 'find-charset-region 'charsets-in-region)
Index: src/mule-charset.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/mule-charset.c,v
retrieving revision 1.51
diff -u -r1.51 mule-charset.c
--- src/mule-charset.c 2006/11/12 13:40:08 1.51
+++ src/mule-charset.c 2006/11/14 13:49:48
@@ -937,6 +937,39 @@
return Qnil;
}
+DEFUN ("charsets-in-region", Fcharsets_in_region, 2, 3, 0, /*
+Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted.
+*/
+ (start, end, buffer))
+{
+ /* This function can GC */
+ struct buffer *buf = decode_buffer (buffer, 1);
+ Charbpos pos, stop; /* Limits of the region. */
+ Lisp_Object res = Qnil;
+ int charsets[NUM_LEADING_BYTES];
+ Ibyte lb;
+ struct gcpro gcpro1;
+
+ memset(charsets, 0, sizeof(charsets));
+ get_buffer_range_char (buf, start, end, &pos, &stop, 0);
+
+ GCPRO1 (res);
+ while (pos < stop)
+ {
+ lb = ichar_leading_byte(BUF_FETCH_CHAR (buf, pos));
+ if (0 == charsets[lb - MIN_LEADING_BYTE])
+ {
+ charsets[lb - MIN_LEADING_BYTE] = 1;
+ res = Fcons (XCHARSET_NAME(charset_by_leading_byte(lb)), res);
+ }
+ ++pos;
+ }
+ UNGCPRO;
+
+ return res;
+}
+
/************************************************************************/
/* memory usage */
@@ -1029,6 +1062,7 @@
DEFSUBR (Fcharset_id);
DEFSUBR (Fset_charset_ccl_program);
DEFSUBR (Fset_charset_registries);
+ DEFSUBR (Fcharsets_in_region);
#ifdef MEMORY_USAGE_STATS
DEFSUBR (Fcharset_memory_usage);
--
Santa Maradona, priez pour moi!
_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches