[PATCH] Take European, Cyrillic, Greek syntax classes from Latin-1

Tuesday, 19 December 2006

 Ar an t-ochtú lá déag de mí na Nollaig, scríobh Aidan Kehoe: 

...
 My strategy w.r.t. syntax tables was wrong in that patch. What I
should have
 done was copied the syntax from the corresponding Latin 1 character; and the
 same should be done for most of the characters in european.el. 
The below does that (sorry, no ChangeLog yet). I intend moving the content
of european.el to latin.el as the comment suggests before committing it. 

Index: lisp/mule/cyrillic.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/mule/cyrillic.el,v
retrieving revision 1.12
diff -u -r1.12 cyrillic.el
--- lisp/mule/cyrillic.el	2006/12/17 13:41:49	1.12
+++ lisp/mule/cyrillic.el	2006/12/18 16:32:31
＠＠ -29,16 +29,19 ＠＠
 ;; The character set ISO8859-5 is supported.  KOI-8 and ALTERNATIVNYJ are
 ;; converted to ISO8859-5 internally.

-;; Windows-1251 support deleted because XEmacs has automatic support.
+;; [Windows-1251 support deleted because XEmacs has automatic support.]

-;;; Code:
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; CYRILLIC
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; #### We only have automatic support on Windows; that needs to be put
+;; back. Also, the Russian Wikipedia articles on KOI-8 list several other
+;; related encodings--KOI8-U (Ukrainian), KOI8-RU (simultaneous support for
+;; Russian, Belorussian, and Ukrainian), KOI8-C (for languages of the
+;; Caucasus), KOI8-O (Old Church Slavonic)--and it would be nice to have
+;; them. Beyond that, we're currently trashing lots of code points with
+;; KOI-8 R; it would be nice to leverage the Unicode support to not do that. 

-;; ISO-8859-5
+;;; Code:

+;; Case table:
 (loop
   for (upper lower)
   in '((#xcf #xef) ; YA
＠＠ -94,14 +97,22 ＠＠
 		       case-table))

 ;; The default character syntax is now word. Pay attention to the
-;; exceptions in ISO-8859-5. 
-(dolist (code '(#xAD	;; SOFT HYPHEN
-		#xF0	;; NUMERO SIGN
-		#xFD))  ;; SECTION SIGN
-  (modify-syntax-entry (make-char 'cyrillic-iso8859-5 code) "."))
-
-;; NO-BREAK SPACE
-(modify-syntax-entry (make-char 'cyrillic-iso8859-5 #xA0) " ")
+;; exceptions in ISO-8859-5, copying them from ISO-8859-1. 
+(loop
+  for (latin-1 cyrillic) 
+  in '((#xAD #xAD)  ;; SOFT HYPHEN
+       (#xA7 #xFD)  ;; SECTION SIGN
+       (#xA0 #xA0)) ;; NO BREAK SPACE
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry
+      (make-char 'cyrillic-iso8859-5 cyrillic)
+      (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
+      syntax-table))
+  
+;; Take NUMERO SIGN's syntax from #. 
+(modify-syntax-entry (make-char 'cyrillic-iso8859-5 #xF0)
+                     (string (char-syntax ?\# (standard-syntax-table)))
+                     (standard-syntax-table))

 (make-coding-system
  'iso-8859-5 'iso2022
＠＠ -110,8 +121,7 ＠＠
    charset-g1 cyrillic-iso8859-5
    charset-g2 t
    charset-g3 t
-   mnemonic "ISO8/Cyr"
-   ))
+   mnemonic "ISO8/Cyr"))

 (set-language-info-alist
  "Cyrillic-ISO" '((charset cyrillic-iso8859-5)
＠＠ -155,12 +165,10 ＠＠
       (let* ((ch (aref cyrillic-koi8-r-decode-table i))
 	     (split (split-char ch)))
 	(cond ((eq (car split) 'cyrillic-iso8859-5)
-	       (aset table (logior (nth 1 split) 128) i)
-	       )
+	       (aset table (logior (nth 1 split) 128) i))
 	      ((eq ch 32))
 	      ((eq (car split) 'ascii)
-	       (aset table ch i)
-	       )))
+	       (aset table ch i))))
       (setq i (1+ i)))
     table)
   "Cyrillic KOI8-R encoding table.")
Index: lisp/mule/european.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/mule/european.el,v
retrieving revision 1.13
diff -u -r1.13 european.el
--- lisp/mule/european.el	2006/11/05 22:31:37	1.13
+++ lisp/mule/european.el	2006/12/18 16:32:31
＠＠ -4,7 +4,7 ＠＠
 ;; Licensed to the Free Software Foundation.
 ;; Copyright (C) 1997 MORIOKA Tomohiko
 ;; Copyright (C) 2001 Ben Wing.
-;; Copyright (C) 2002, 2005 Free Software Foundation
+;; Copyright (C) 2002, 2005, 2006 Free Software Foundation

 ;; Keywords: multilingual, European

＠＠ -30,97 +30,135 ＠＠
 ;; For Roman-alphabet-using Europeans, eight coded character sets,
 ;; ISO8859-1,2,3,4,9,14,15,16 are supported.

-;; #### latin.el would be a better name for this file.
+
+
+;; Latin-1's case is dealt with in iso8859-1.el, which see. Its syntax is
+;; initialised in syntax.c:complex_vars_of_syntax.
+
+
+;; Latin-2 (ISO-8859-2). Central Europe; Czech, Slovak, Hungarian, Polish,
+;; Croatian, other languages.
+;;
+;; (Yes, it really is Central European. German written in Latin 2 and using
+;; only Umlaute and the sharp S in its non-ASCII repertoire is bit-for-bit
+;; identical with the same text in Latin-1.)
+
+(make-coding-system
+ 'iso-8859-2 'iso2022 "ISO-8859-2 (Latin-2)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-2
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-2"))
+
+;; The default character syntax is now word. Pay attention to the
+;; exceptions in ISO-8859-2, copying them from ISO-8859-1. 
+(loop
+  for (latin-2 latin-1) 
+  in '((#xA0 #xA0)  ;; NO BREAK SPACE
+       (#xA2 #xB4)  ;; BREVE, ACUTE ACCENT
+       (#xA4 #xA4)  ;; CURRENCY SIGN
+       (#xA7 #xA7)  ;; SECTION SIGN
+       (#xA8 #xA8)  ;; DIAERESIS
+       (#xAD #xAD)  ;; SOFT HYPHEN
+       (#xB0 #xB0)  ;; DEGREE SIGN
+       (#xB2 #xB4)  ;; OGONEK, ACUTE ACCENT
+       (#xB4 #xB4)  ;; ACUTE ACCENT
+       (#xB7 #xB4)  ;; CARON, ACUTE ACCENT
+       (#xB8 #xB8)  ;; CEDILLA
+       (#xBD #xB4)  ;; DOUBLE ACUTE ACCENT, ACUTE ACCENT
+       (#xD7 #xD7)  ;; MULTIPLICATION SIGN
+       (#xF7 #xF7)  ;; DIVISION SIGN
+       (#xFF #xB4)) ;; DOT ABOVE, ACUTE ACCENT
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry
+      (make-char 'latin-iso8859-2 latin-2)
+      (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
+      syntax-table))
+
+;; 
+;; Latin-3 (ISO-8859-3). Esperanto, Maltese and Turkish. Obsolescent.
+
+(make-coding-system
+ 'iso-8859-3 'iso2022 "ISO-8859-3 (Latin-3)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-3
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-3"))
+
+;; Initialise the non-word syntax codes in ISO-8859-3, copying them from
+;; ISO-8859-1.
+(loop
+  for (latin-3 latin-1) 
+  in '((#xA0 #xA0)  ;; NO BREAK SPACE
+       (#xA2 #xB4)  ;; BREVE, ACUTE ACCENT
+       (#xA3 #xA3)  ;; POUND SIGN
+       (#xA4 #xA4)  ;; CURRENCY SIGN
+       (#xA7 #xA7)  ;; SECTION SIGN
+       (#xA8 #xA8)  ;; DIAERESIS
+       (#xAD #xAD)  ;; SOFT HYPHEN
+       (#xB0 #xB0)  ;; DEGREE SIGN
+       (#xB2 #xB2)  ;; SUPERSCRIPT TWO
+       (#xB3 #xB3)  ;; SUPERSCRIPT THREE
+       (#xB4 #xB4)  ;; ACUTE ACCENT
+       (#xB5 #xB5)  ;; MICRO SIGN
+       (#xB7 #xB7)  ;; MIDDLE DOT
+       (#xB8 #xB8)  ;; CEDILLA
+       (#xBD #xBD)  ;; VULGAR FRACTION ONE HALF
+       (#xD7 #xD7)  ;; MULTIPLICATION SIGN
+       (#xF7 #xF7)  ;; DIVISION SIGN
+       (#xFF #xB4)) ;; DOT ABOVE, ACUTE ACCENT
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry
+      (make-char 'latin-iso8859-3 latin-3)
+      (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
+      syntax-table))
+
+;; Latin-4 (ISO-8859-4)

-;;; Code:
-; (make-charset 'latin-iso8859-1 
-; 	      "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100"
-; 	      '(dimension
-; 		1
-; 		registry "ISO8859-1"
-; 		chars 96
-; 		columns 1
-; 		direction l2r
-; 		final ?A
-; 		graphic 1
-; 		short-name "RHP of Latin-1"
-; 		long-name "RHP of Latin-1 (ISO 8859-1): ISO-IR-100"
-; 		))
-
-; (make-charset 'latin-iso8859-2 
-; 	      "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101"
-; 	      '(dimension
-; 		1
-; 		registry "ISO8859-2"
-; 		chars 96
-; 		columns 1
-; 		direction l2r
-; 		final ?B
-; 		graphic 1
-; 		short-name "RHP of Latin-2"
-; 		long-name "RHP of Latin-2 (ISO 8859-2): ISO-IR-101"
-; 		))
-
-; (make-charset 'latin-iso8859-3 
-; 	      "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109"
-; 	      '(dimension
-; 		1
-; 		registry "ISO8859-3"
-; 		chars 96
-; 		columns 1
-; 		direction l2r
-; 		final ?C
-; 		graphic 1
-; 		short-name "RHP of Latin-3"
-; 		long-name "RHP of Latin-3 (ISO 8859-3): ISO-IR-109"
-; 		))
-
-; (make-charset 'latin-iso8859-4 
-; 	      "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110"
-; 	      '(dimension
-; 		1
-; 		registry "ISO8859-4"
-; 		chars 96
-; 		columns 1
-; 		direction l2r
-; 		final ?D
-; 		graphic 1
-; 		short-name "RHP of Latin-4"
-; 		long-name "RHP of Latin-4 (ISO 8859-4): ISO-IR-110"
-; 		))
-
-; (make-charset 'latin-iso8859-9 
-; 	      "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148"
-; 	      '(dimension
-; 		1
-; 		registry "ISO8859-9"
-; 		chars 96
-; 		columns 1
-; 		direction l2r
-; 		final ?M
-; 		graphic 1
-; 		short-name "RHP of Latin-5"
-; 		long-name "RHP of Latin-5 (ISO 8859-9): ISO-IR-148"
-; 		))
-
-; (make-charset 'latin-iso8859-15 
-; 	      "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203"
-; 	      '(dimension
-; 		1
-; 		registry "ISO8859-15"
-; 		chars 96
-; 		columns 1
-; 		direction l2r
-; 		final ?b
-; 		graphic 1
-; 		short-name "RHP of Latin-9"
-; 		long-name "RHP of Latin-9 (ISO 8859-15): ISO-IR-203"
-; 		))
+;; Estonian, Latvian, Lithuanian, Greenlandic, and Sami. Obsolescent.

+(make-coding-system
+ 'iso-8859-4 'iso2022 "ISO-8859-4 (Latin-4)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-4
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-4"))
+
+;; The default character syntax is now word. Pay attention to the
+;; exceptions in ISO-8859-4, copying them from ISO-8859-1. 
+(loop
+  for (latin-4 latin-1) 
+  in '((#xA0 #xA0)  ;; NO BREAK SPACE
+       (#xA4 #xA4)  ;; CURRENCY SIGN
+       (#xA7 #xA7)  ;; SECTION SIGN
+       (#xA8 #xA8)  ;; DIAERESIS
+       (#xAD #xAD)  ;; SOFT HYPHEN
+       (#xB0 #xB0)  ;; DEGREE SIGN
+       (#xB2 #xB4)  ;; OGONEK, ACUTE ACCENT
+       (#xB4 #xB4)  ;; ACUTE ACCENT
+       (#xB7 #xB4)  ;; CARON, ACUTE ACCENT
+       (#xB8 #xB8)  ;; CEDILLA
+       (#xD7 #xD7)  ;; MULTIPLICATION SIGN
+       (#xF7 #xF7)  ;; DIVISION SIGN
+       (#xFF #xB4)) ;; DOT ABOVE, ACUTE ACCENT
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry
+      (make-char 'latin-iso8859-4 latin-4)
+      (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
+      syntax-table))
+
+
+;; Latin-8 (ISO 8859-14) Celtic.
+
+;; Never widely used. Current-orthography Gaelic, both Irish and Scots, is
+;; easily written with Latin-1. Wikipedia says the same about Welsh.
+
 (make-charset 'latin-iso8859-14 
 	      "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14)"
-	      '(dimension
-		1
+	      '(dimension 1
 		registries ["ISO8859-14"]
 		chars 96
 		columns 1
＠＠ -128,32 +166,10 ＠＠
 		final ?_
 		graphic 1
 		short-name "RHP of Latin-8"
-		long-name "RHP of Latin-8 (ISO 8859-14)"
-		))
+		long-name "RHP of Latin-8 (ISO 8859-14)"))

-(make-charset 'latin-iso8859-16
-	      "Right-Hand Part of Latin Alphabet 10 (ISO/IEC 8859-16)"
-	      '(dimension
-		1
-		registries ["ISO8859-16"]
-		chars 96
-		columns 1
-		direction l2r
-		final ?f			; octet 06/06; cf ISO-IR 226
-		graphic 1
-		short-name "RHP of Latin-10"
-		long-name "RHP of Latin-10 (ISO 8859-16)"
-		))
-
-;; Latin-1 is dealt with in iso8859-1.el, which see. 
-
-;; ISO 8859-14. 
 ;; 
-;; Initialise all characters to word syntax.
-(loop for c from #xa0 to #xff
-  do (modify-syntax-entry (make-char 'latin-iso8859-14 c) "w"))
-
-;; Now, the exceptions. There's just punctuation in this character set. 
+;; Character syntax defaults to word. The exceptions here shared with Latin-1.
 (dolist (code '(#xa0	;; NO BREAK SPACE
 		#xa3	;; POUND SIGN
 		#xa7	;; SECTION SIGN
＠＠ -161,142 +177,42 ＠＠
 		#xad	;; SOFT HYPHEN
 		#xae	;; REGISTERED
 		#xb6))	;; PILCROW SIGN
-  (modify-syntax-entry (make-char 'latin-iso8859-14 code) "_"))
-;; end of ISO 8859-14.
+  (modify-syntax-entry (make-char 'latin-iso8859-14 code)
+                       (string (char-syntax (make-char 'latin-iso8859-1 code)))
+                       (standard-syntax-table)))

-;; ISO 8859-16.
-;;
-;; Initialise all of iso-8859-16 to word syntax. 
-(loop for c from #xa0 to #xff
-  do (modify-syntax-entry (make-char 'latin-iso8859-16 c) "w"))
-
-;; And then do the exceptions. First, the punctuation (following the model
-;; of Latin-1):
-(dolist (code '(#xa0	;; NO BREAK SPACE
-		#xa4	;; EURO SIGN
-		#xa7	;; SECTION SIGN
-		#xa9	;; COPYRIGHT
-		#xad	;; SOFT HYPHEN
-		#xb0	;; DEGREE
-		#xb1	;; PLUS-MINUS SIGN
-		#xb6	;; PILCROW SIGN
-		#xb7)) ;; MIDDLE DOT 
-  (modify-syntax-entry (make-char 'latin-iso8859-16 code) "_"))
+
+;; The syntax table code for ISO 8859-15 and ISO 8859-16 requires that the
+;; guillemets not have parenthesis syntax, which they used to have in the
+;; past. See syntax.c:complex_vars_of_syntax.

-;; Mark the DOUBLE LOW-9 QUOTATION MARK and its closing character as
-;; quotation marks.
-(modify-syntax-entry (make-char 'latin-iso8859-16 #xa5) "\"")
-(modify-syntax-entry (make-char 'latin-iso8859-16 #xb5) "\"")
-
-;; For some crazy reason--well, in truth, probably because Jamie never used
-;; them in anger--the guillemets have open- and close-parenthesis syntax in
-;; Latin 1. We will probably change that in the future; for the moment, I'm
-;; preserving it.
-(modify-syntax-entry (make-char 'latin-iso8859-16 #xab) 
-		     (format "(%c" (make-char 'latin-iso8859-16 #xbb)))
-(modify-syntax-entry (make-char 'latin-iso8859-16 #xbb) 
-		     (format ")%c" (make-char 'latin-iso8859-16 #xab)))
+(assert (not (memq (char-syntax (make-char 'latin-iso8859-1 #xAB)) '(?\( ?\))))
+        t "This code assumes \xAB does not have parenthesis syntax.  ")

-;; end of ISO 8859-16. 
+(assert (not (memq (char-syntax (make-char 'latin-iso8859-1 #xBB)) '(?\( ?\))))
+        t "This code assumes \xBB does not have parenthesis syntax.  ")

-;; ISO 8859-15. 
+
+;; Latin-9 (ISO-8859-15)
+;;
+;; Latin-1 plus Euro, plus a few accented characters for the sake of correct
+;; Finnish and French orthography. Only ever widely used on Unix. 
 ;; 
 ;; Based on Latin-1 and differences therefrom.
 ;; 
 ;; First, initialise the syntax from the corresponding Latin-1 characters. 
-(loop for c from #xa0 to #xff
-      do (modify-syntax-entry 
-	  (make-char 'latin-iso8859-15 c)
-	  (string (char-syntax (make-char 'latin-iso8859-1 c)))))
-;; Now, the exceptions
-(loop for c in '(?Š ?š ?Ž ?ž ?Œ ?œ ?Ÿ)
-      do (modify-syntax-entry c "w"))
-
-;; Again, perpetuating insanity with the guillemets.
-(modify-syntax-entry (make-char 'latin-iso8859-16 #xab) 
-		     (format "(%c" (make-char 'latin-iso8859-16 #xbb)))
-(modify-syntax-entry (make-char 'latin-iso8859-16 #xbb) 
-		     (format ")%c" (make-char 'latin-iso8859-16 #xab)))
-;; end of ISO 8859-15. 
-
-;; For syntax of Latin-2
-(loop for c in '(?Ą ?Ł ?Ľ ?Ś ?Š ?Ş ?Ť ?Ź ?Ž ?Ż ?ą ?ł ?ľ ?ś ?š ?ş ?ť ?ź)
-      do (modify-syntax-entry c "w"))
-
-(loop for c from 62 to 126
-      do (modify-syntax-entry (make-char 'latin-iso8859-2 c) "w"))
-
-(modify-syntax-entry (make-char 'latin-iso8859-2 32) "w") ; no-break space
-(modify-syntax-entry ?× ".")
-(modify-syntax-entry ?÷ ".")
-
-;; For syntax of Latin-3
-(loop for c in '(?Ħ ?Ĥ ?İ ?Ş ?Ğ ?Ĵ ?Ż ?ħ ?µ ?ĥ ?ş ?ğ ?ĵ ?ż)
-  do (modify-syntax-entry c "w"))
-
-(loop for c from 64 to 126
-  do (modify-syntax-entry (make-char 'latin-iso8859-3 c) "w"))
-
-(modify-syntax-entry (make-char 'latin-iso8859-3 32) "w") ; no-break space
-(modify-syntax-entry ?× ".")
-(modify-syntax-entry ?÷ ".")
-
-;; For syntax of Latin-4
-(loop for c in '(?Ą ?ĸ ?Ŗ ?Ĩ ?Ļ ?Š ?Ē ?Ģ ?Ŧ ?Ž ?ą ?ŗ ?ĩ ?ļ ?š ?ē ?ģ ?ŧ ?Ŋ ?ž ?ŋ)
-  do (modify-syntax-entry c "w"))
-
-(loop for c from 64 to 126
-  do (modify-syntax-entry (make-char 'latin-iso8859-4 c) "w"))
-
-(modify-syntax-entry (make-char 'latin-iso8859-4 32) "w") ; no-break space
-(modify-syntax-entry ?× ".")
-(modify-syntax-entry ?÷ ".")
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; EUROPEANS
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-
-;; Latin-1 (ISO-8859-1)
-
-;; (make-coding-system
-;;  'iso-latin-1 2 ?1
-;;  "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)"
-;;  '(ascii latin-iso8859-1 nil nil
-;;    nil nil nil nil nil nil nil nil nil nil nil nil t)
-;;  '((safe-charsets ascii latin-iso8859-1)
-;;    (mime-charset . iso-8859-1)))
-
-;; (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
-;; (define-coding-system-alias 'latin-1 'iso-latin-1)
-
-;; (make-coding-system
-;;  'compound-text 2 ?1
-;;  "ISO 2022 based encoding used in inter client communication of X"
-;;  '((ascii t) (latin-iso8859-1 t) nil nil
-;;    nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil nil t)
-;;  '((safe-charsets . t)))
-
-;; (define-coding-system-alias 'ctext 'compound-text)
-
-;; "Hello, Hej, Tere, Hei, Bonjour, Grüß Gott, Ciao, ¡Hola!"
-
-
-;; Latin-9 (ISO-8859-15)
-;; Latin-1 plus Euro, plus a few accented characters
-
-;; (make-charset 'latin-iso8859-15
-;;   "Latin-9, aka Latin-1 with Euro etc"
-;;   '(short-name "Latin 9"
-;;     long-name  "Latin-9 (typically GR of ISO 8859/15)"
-;;     registry   "iso8859-15"
-;;     dimension  1
-;;     columns    1
-;;     chars      96
-;;     final      ?b                  ; ISO-IR-203
-;;     graphic    1
-;;     direction  l2r))
+(loop 
+  for c from #xa0 to #xff
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry (make-char 'latin-iso8859-15 c)
+                          (string (char-syntax (make-char 'latin-iso8859-1 c)))
+                          syntax-table))
+
+;; Now, the exceptions. The Euro sign retains the syntax of CURRENCY SIGN.
+(loop
+  for c in '(?Š ?š ?Ž ?ž ?Œ ?œ ?Ÿ)
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry c "w" syntax-table))

 (make-coding-system
  'iso-8859-15 'iso2022
＠＠ -306,32 +222,70 ＠＠
     charset-g0 ascii
     charset-g1 latin-iso8859-15
     charset-g2 t
-    charset-g3 t
-    ))
-
+    charset-g3 t))
+;; end of ISO 8859-15. 

-;; Latin-2 (ISO-8859-2)
+;;
+;; Latin-10 (ISO 8859-16).
+;;
+;; "South-Eastern European." Not, to my knowledge, ever widely used. 

-;; (make-coding-system
-;;  'iso-latin-2 2 ?2
-;;  "ISO 2022 based 8-bit encoding (MIME:ISO-8859-2)"
-;;  '(ascii latin-iso8859-2 nil nil
-;;    nil nil nil nil nil nil nil)
-;;  '((safe-charsets ascii latin-iso8859-2)
-;;    (mime-charset . iso-8859-2)))
+(make-charset 'latin-iso8859-16
+	      "Right-Hand Part of Latin Alphabet 10 (ISO/IEC 8859-16)"
+	      '(dimension 1
+		registries ["ISO8859-16"]
+		chars 96
+		columns 1
+		direction l2r
+		final ?f			; octet 06/06; cf ISO-IR 226
+		graphic 1
+		short-name "RHP of Latin-10"
+		long-name "RHP of Latin-10 (ISO 8859-16)"))

-;; (define-coding-system-alias 'iso-8859-2 'iso-latin-2)
-;; (define-coding-system-alias 'latin-2 'iso-latin-2)
+;; Copy over the non-word syntax this charset has in common with Latin 1.
+(dolist (code '(#xa0	;; NO BREAK SPACE
+		#xa7	;; SECTION SIGN
+		#xa9	;; COPYRIGHT
+                #xab    ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+		#xad	;; SOFT HYPHEN
+		#xb0	;; DEGREE
+		#xb1	;; PLUS-MINUS SIGN
+		#xb6	;; PILCROW SIGN
+		#xb7    ;; MIDDLE DOT 
+                #xbb))  ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+  (modify-syntax-entry (make-char 'latin-iso8859-16 code)
+                       (string (char-syntax (make-char 'latin-iso8859-1 code)))
+                       (standard-syntax-table)))
+
+;; EURO SIGN. Take its syntax from the pound sign. 
+(modify-syntax-entry (make-char 'latin-iso8859-16 #xa4)
+                     (string (char-syntax (make-char 'latin-iso8859-1 #xa3)))
+                     (standard-syntax-table))
+
+;; Take DOUBLE LOW-9 QUOTATION MARK's syntax from that of LEFT-POINTING
+;; DOUBLE ANGLE QUOTATION MARK.
+(modify-syntax-entry (make-char 'latin-iso8859-16 #xa5) 
+                     (string (char-syntax (make-char 'latin-iso8859-1 #xab)))
+                     (standard-syntax-table))
+
+;; Take RIGHT DOUBLE QUOTATION MARK's syntax from that of RIGHT-POINTING
+;; DOUBLE ANGLE QUOTATION MARK.
+(modify-syntax-entry (make-char 'latin-iso8859-16 #xb5)
+                     (string (char-syntax (make-char 'latin-iso8859-1 #xbb)))
+                     (standard-syntax-table))

+;; Add a coding system for ISO 8859-16.
 (make-coding-system
- 'iso-8859-2 'iso2022 "ISO-8859-2 (Latin-2)"
+ 'iso-8859-16 'iso2022 "MIME ISO-8859-16"
  '(charset-g0 ascii
-   charset-g1 latin-iso8859-2
-   charset-g2 t
-   charset-g3 t
-   mnemonic "MIME/Ltn-2"
-   ))
+   charset-g1 latin-iso8859-16
+   charset-g2 t			; grrr
+   charset-g3 t			; grrr
+   mnemonic "MIME/Ltn-10"))

+;; end of ISO 8859-16. 
+
+
 (provide 'romanian)

 ;; Czech support originally from czech.el
＠＠ -347,66 +301,21 ＠＠
 ;; Maintainer: Milan Zamazal <pdm(a)fi.muni.cz&gt;

 (provide 'slovenian)
-

-;; Latin-3 (ISO-8859-3)
-
-;; (make-coding-system
-;;  'iso-latin-3 2 ?3
-;;  "ISO 2022 based 8-bit encoding (MIME:ISO-8859-3)"
-;;  '(ascii latin-iso8859-3 nil nil
-;;    nil nil nil nil nil nil nil)
-;;  '((safe-charsets ascii latin-iso8859-3)
-;;    (mime-charset . iso-8859-3)))
-
-;; (define-coding-system-alias 'iso-8859-3 'iso-latin-3)
-;; (define-coding-system-alias 'latin-3 'iso-latin-3)
-
-(make-coding-system
- 'iso-8859-3 'iso2022 "ISO-8859-3 (Latin-3)"
- '(charset-g0 ascii
-   charset-g1 latin-iso8859-3
-   charset-g2 t
-   charset-g3 t
-   mnemonic "MIME/Ltn-3"
-   ))
-
-
-;; Latin-4 (ISO-8859-4)
-
-;; (make-coding-system
-;;  'iso-latin-4 2 ?4
-;;  "ISO 2022 based 8-bit encoding (MIME:ISO-8859-4)"
-;;  '(ascii latin-iso8859-4 nil nil
-;;    nil nil nil nil nil nil nil)
-;;  '((safe-charsets ascii latin-iso8859-4)
-;;    (mime-charset . iso-8895-4)))
-
-;; (define-coding-system-alias 'iso-8859-4 'iso-latin-4)
-;; (define-coding-system-alias 'latin-4 'iso-latin-4)
-
-(make-coding-system
- 'iso-8859-4 'iso2022 "ISO-8859-4 (Latin-4)"
- '(charset-g0 ascii
-   charset-g1 latin-iso8859-4
-   charset-g2 t
-   charset-g3 t
-   mnemonic "MIME/Ltn-4"
-   ))
-
-
 ;; Latin-5 (ISO-8859-9)
-
-;; (make-coding-system
-;;  'iso-latin-5 2 ?9
-;;  "ISO 2022 based 8-bit encoding (MIME:ISO-8859-9)"
-;;  '(ascii latin-iso8859-9 nil nil
-;;    nil nil nil nil nil nil nil)
-;;  '((safe-charsets ascii latin-iso8859-9)
-;;    (mime-charset . iso-8859-9)))

-;; (define-coding-system-alias 'iso-8859-9 'iso-latin-5)
-;; (define-coding-system-alias 'latin-5 'iso-latin-5)
+;; Turkish (more generally Turkic.) This is identical to Latin-1, with the
+;; exception that the Icelandic-specific letters have been replaced by
+;; Turkish-specific letters. As such, we can simply copy the Latin-1 syntax
+;; table. However, the case table isn't yet enabled--see latin.el.
+
+(loop
+  for i from #xA0 to #xFF
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry
+      (make-char 'latin-iso8859-9 i)
+      (string (char-syntax (make-char 'latin-iso8859-1 i)))
+      syntax-table))

 (make-coding-system
  'iso-8859-9 'iso2022 "ISO-8859-9 (Latin-5)"
＠＠ -414,25 +323,15 ＠＠
    charset-g1 latin-iso8859-9
    charset-g2 t
    charset-g3 t
-   mnemonic "MIME/Ltn-5"
-   ))
-
-;; Add a coding system for ISO 8859-16.
-(make-coding-system
- 'iso-8859-16 'iso2022 "MIME ISO-8859-16"
- '(charset-g0 ascii
-   charset-g1 latin-iso8859-16
-   charset-g2 t			; grrr
-   charset-g3 t			; grrr
-   mnemonic "MIME/Ltn-10"))
+   mnemonic "MIME/Ltn-5"))

-(loop for ((charset codesys default-input nice-charset-1 nice-charset-2
-		    supported-langs ;; a list if the doc string is replaced
-				    ;; entirely
-		    )
-	   langenvs) in
-  '(
-    ((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1"
"ISO-8859-1"
+(loop 
+  for ((charset codesys default-input nice-charset-1 nice-charset-2
+                ;; supported-langs is a list if the doc string is replaced
+                ;; entirely
+                supported-langs) 
+       langenvs) in
+  '(((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1"
"ISO-8859-1"
 " Danish, Dutch, English, Faeroese, Finnish, French, German, Icelandic,
  Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.")
      (("Danish" "da")
＠＠ -464,7 +363,7 ＠＠
     ((latin-iso8859-2 iso-8859-2 "latin-2-prefix" "Latin-2"
"ISO-8859-2"
 " Albanian, Czech, English, German, Hungarian, Polish, Romanian,
  Serbian, Croatian, Slovak, Slovene, Sorbian (upper and lower),
- and Swedish.")
+ and Swedish.") ;; " (fontification got screwed up, CVS-20061203)
      (("Albanian" nil)
       ("Croatian" ("hrvatski" "hr")
"TUTORIAL.hr")
       ("Czech" ("cs" "cz") "TUTORIAL.cs"
"Přejeme vám hezký den!"
Index: lisp/mule/greek.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/mule/greek.el,v
retrieving revision 1.6
diff -u -r1.6 greek.el
--- lisp/mule/greek.el	2006/12/17 13:23:50	1.6
+++ lisp/mule/greek.el	2006/12/18 16:32:31
＠＠ -29,6 +29,7 ＠＠

 ;;; Code:

+;; Case table:
 (loop
   for (upper lower)
   in '((#xdb #xfb) ;; UPSILON WITH DIALYTIKA
＠＠ -75,32 +76,45 ＠＠
   (put-case-table-pair (make-char 'greek-iso8859-7 upper)
                        (make-char 'greek-iso8859-7 lower) case-table))

-;; Now, syntax.
-(dolist (code '(#xA1    ;; LEFT SINGLE QUOTATION MARK
-                #xA2	;; RIGHT SINGLE QUOTATION MARK
-                #xA3	;; POUND SIGN
-                #xA6	;; BROKEN BAR
-                #xA7	;; SECTION SIGN
-                #xA8	;; DIAERESIS
-                #xA9	;; COPYRIGHT SIGN
-                #xAB	;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-                #xAC	;; NOT SIGN
-                #xAD	;; SOFT HYPHEN
-                #xAF	;; HORIZONTAL BAR
-                #xB0	;; DEGREE SIGN
-                #xB1	;; PLUS-MINUS SIGN
-                #xB7	;; MIDDLE DOT
-                #xBB))  ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-  (modify-syntax-entry (make-char 'greek-iso8859-7 code) "."))
-
-;; NO-BREAK SPACE
-(modify-syntax-entry (make-char 'greek-iso8859-7 #xA0) " ")
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; GREEK
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Now, syntax. Copy from appropriate characters in Latin 1. 

-
+;; This code requires that the guillemets not have parenthesis syntax.
+
+(assert (not (memq (char-syntax (make-char 'latin-iso8859-1 #xAB)) '(?\( ?\))))
+        t "This code assumes \xAB does not have parenthesis syntax.  ")
+
+(assert (not (memq (char-syntax (make-char 'latin-iso8859-1 #xBB)) '(?\( ?\))))
+        t "This code assumes \xBB does not have parenthesis syntax.  ")
+
+(loop
+  for (greek latin-1) 
+  in '((#xA0 #xA0)  ;; NO BREAK SPACE
+       (#xA1 #xAB)  ;; LEFT SINGLE QUOTATION MARK, LEFT DOUBLE ANGLE QUOTE
+       (#xA2 #xBB)  ;; RIGHT SINGLE QUOTATION MARK, RIGHT DOUBLE ANGLE QUOTE
+       (#xA3 #xA3)  ;; POUND SIGN
+       (#xA6 #xA6)  ;; BROKEN BAR
+       (#xA7 #xA7)  ;; SECTION SIGN
+       (#xA8 #xA8)  ;; DIAERESIS
+       (#xA9 #xA9)  ;; COPYRIGHT SIGN
+       (#xAB #xAB)  ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+       (#xAC #xAC)  ;; NOT SIGN
+       (#xAD #xAD)  ;; SOFT HYPHEN
+       (#xAF #xA6)  ;; HORIZONTAL BAR, BROKEN BAR
+       (#xB0 #xB0)  ;; DEGREE SIGN
+       (#xB1 #xB1)  ;; PLUS-MINUS SIGN
+       (#xB2 #xB2)  ;; SUPERSCRIPT TWO
+       (#xB3 #xB3)  ;; SUPERSCRIPT THREE
+       (#xB4 #xB4)  ;; GREEK TONOS, ACUTE ACCENT
+       (#xB5 #xB4)  ;; GREEK DIALYTIKA TONOS, ACUTE ACCENT
+       (#xB7 #xB7)  ;; MIDDLE DOT
+       (#xBB #xBB)  ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+       (#xBD #xBD))  ;; VULGAR FRACTION ONE HALF
+  with syntax-table = (standard-syntax-table)
+  do (modify-syntax-entry
+      (make-char 'greek-iso8859-7 greek)
+      (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
+      syntax-table))
+
 (make-coding-system
  'iso-8859-7 'iso2022 "ISO-8859-7 (Greek)"
  '(charset-g0 ascii

-- 
When I was in the scouts, the leader told me to pitch a tent. I couldn't
find any pitch, so I used creosote.

_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://calypso.tux.org/cgi-bin/mailman/listinfo/xemacs-patches

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

[PATCH] Take European, Cyrillic, Greek syntax classes from Latin-1