It’s a trivial file, and I don’t think it merits an entire package to
itself, but there’s no real “miscellaneous-Mule” package it to put it
in. If I don’t get a suggestion I will make a separate package for it, but I
feel pretty strongly that wouldn’t be ideal.
;;; url-coding.el --- Support for the web's URL encoding.
;; Copyright (C) 2004 Aidan Kehoe
;; Keywords: HTTP, urlencode
;; This file is not part of XEmacs, but its licence terms apply to it
;; nevertheless.
;; XEmacs is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; XEmacs is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with XEmacs; see the file COPYING. If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.
;; This is broken for non-ASCII characters, but we'll ignore that for the
;; moment.
(eval-and-compile
;; This is a table that maps from an octet's value to the ASCII encoding of its
;; first four bits, when those four bits are treated as a single hexadecimal
;; digit.
(defvar url-coding-high-order-nybble-as-ascii
(let ((val (make-vector 256 0))
(i 0))
(while (< i (length val))
(aset val i (char-to-int (aref (format "%02X" i) 0)))
(setq i (1+ i)))
val)
"Table to find an ASCII version of an octet's most significant 4 bits.")
;; Almost the same thing, but this time it's a map from the octet's value to
;; the ASCII encoding of its low-order four bits, when those bits are treated
;; as a hexadecimal digit.
;;
(defvar url-coding-low-order-nybble-as-ascii
(let ((val (make-vector 256 0))
(i 0))
(while (< i (length val))
(aset val i (char-to-int (aref (format "%02X" i) 1)))
(setq i (1+ i)))
val)
"Table to find an ASCII version of an octet's least significant 4 bits.")
(defvar url-coding-escape-character-code (char-to-int ?%)
"The code point for the percentage sign, in ASCII.")
(defvar url-coding-escaped-space-code (char-to-int ?+)
"The URL-encoded value of the space character, that is, +.")
(defvar url-coding-hex-digit-table
(let ((i 0)
(val (make-vector 16 0)))
(while (< i 16)
(aset val i (char-to-int (aref (format "%X" i) 0)))
(setq i (1+ i)))
val)
"A map from a hexadecimal digit's numeric value to its encoding in
ASCII.")
(defvar url-coding-latin-1-as-hex-table
(let ((val (make-vector 256 0))
(i 0))
(while (< i (length val))
;; Get a hex val for this ASCII character.
(aset val i (string-to-int (format "%c" i) 16))
(setq i (1+ i)))
val)
"A map from Latin 1 code points to their values as hexadecimal digits.")
(defvar url-coding-should-preserve-table
(let ((preserve
(list ?- ?_ ?. ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o
?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z ?A ?B ?C ?D ?E ?F ?G
?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y
?Z ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9))
(i 0)
(res (make-vector 256 0)))
(while (< i 256)
(when (member (int-char i) preserve)
(aset res i 1))
(setq i (1+ i)))
res)
"A 256-entry array of binary values, indicating whether to preserve an
octet as ASCII.")
) ;; end of eval-and-compile.
(define-ccl-program ccl-decode-urlcoding
`(1
((read r0)
(loop
(if (r0 == ,url-coding-escape-character-code)
((read r2 r3)
;; Assign the value at offset r2 in the url-coding-hex-digit-table
;; to r3.
(r2 = r2 ,url-coding-latin-1-as-hex-table)
(r3 = r3 ,url-coding-latin-1-as-hex-table)
(r2 <<= 4)
(r3 |= r2)
(write r3))
(if (r0 == ,url-coding-escaped-space-code)
(write #x20)
(write r0)))
(read r0)
(repeat))))
"CCL program to take URL-encoded ISO 8859-1 text and transform it to our
internal encoding.")
(define-ccl-program ccl-encode-urlcoding
`(3
((read r0)
(loop
(r1 = r0 ,url-coding-should-preserve-table)
;; If we should preserve the value, just write the octet directly.
(if r1
(write r0)
;; else, write a percentage sign, and the hex value of the octet, in
;; an ASCII-friendly format.
((write ,url-coding-escape-character-code)
(write r0 ,url-coding-high-order-nybble-as-ascii)
(write r0 ,url-coding-low-order-nybble-as-ascii)))
(read r0)
(repeat))))
"CCL program to encode octets (almost) according to RFC 1738")
(make-coding-system
'url-coding 'ccl
"The coding used by application/x-www-form-urlencoded HTTP applications.
This coding form doesn't specify anything about non-ASCII characters, so
make sure you've transformed to a seven-bit coding system first."
'(decode ccl-decode-urlcoding
encode ccl-encode-urlcoding
mnemonic "URLenc"))
--
“I, for instance, am gung-ho about open source because my family is being
held hostage in Rob Malda’s basement. But who fact-checks me, or Enderle,
when we say something in public? No-one!” -- Danny O’Brien