User: stephent
Date: 06/05/11 10:58:03
Modified: xemacs/src ChangeLog mule-coding.c
Log:
Improve detection of ISO-8-1 coding systems.
<874pzx2bn2.fsf(a)tleepslib.sk.tsukuba.ac.jp>
Revision Changes Path
1.960 +5 -0 XEmacs/xemacs/src/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/ChangeLog,v
retrieving revision 1.959
retrieving revision 1.960
diff -u -p -r1.959 -r1.960
--- ChangeLog 2006/05/10 15:03:35 1.959
+++ ChangeLog 2006/05/11 08:57:59 1.960
@@ -1,3 +1,8 @@
+2006-02-27 Joachim Schrod <jschrod(a)acm.org>
+
+ * mule-coding.c (iso2022_detect): Handle Latin-1 encoded files
+ that have several high-byte chars in a row.
+
2006-05-08 Jerry James <james(a)xemacs.org>
* number.c (Fcanonicalize_number): Use EMACS_INT instead of int,
1.37 +14 -1 XEmacs/xemacs/src/mule-coding.c
Index: mule-coding.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/mule-coding.c,v
retrieving revision 1.36
retrieving revision 1.37
diff -u -p -r1.36 -r1.37
--- mule-coding.c 2005/11/22 07:19:32 1.36
+++ mule-coding.c 2006/05/11 08:58:01 1.37
@@ -2927,7 +2927,20 @@ iso2022_detect (struct detection_state *
}
else if (data->odd_high_byte_groups > 0 &&
data->even_high_byte_groups > 0)
- SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY);
+ {
+ /* Well, this could be a Latin-1 text, with most high-byte
+ characters single, but sometimes two are together, though
+ this happens not as often. This is common for Western
+ European languages like German, French, Danish, Swedish, etc.
+ Then we would either have a rather small file and
+ even_high_byte_groups would be low.
+ Or we would have a larger file and the ratio of odd to even
+ groups would be very high. */
+ SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY);
+ if (data->even_high_byte_groups <= 3 ||
+ data->odd_high_byte_groups >= 10 * data->even_high_byte_groups)
+ DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY;
+ }
else
SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY);
}
Show replies by date