/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations)
Thu Oct 25 15:17:07 2007 UTC (6 years, 6 months ago) by rtoy
Branch: MAIN
Import Paul Foley's external-formats support.

New files:
o code/extfmts.lisp
o pcl/simple-streams/external-formats/iso8859-1.lisp
o pcl/simple-streams/external-formats/void.lisp

code/exports.lisp:
o Export the new symbols STRING-TO-OCTETS, OCTETS-TO-STRING,
  *DEFAULT-EXTERNAL-FORMAT*, ENCODE-STRING, and DECODE-STRING from the
  STREAM package
o Make the symbols in the EXT package too.

pcl/simple-streams/internal.lisp:
o Move the implementation of STRING-TO-OCTETS and friends to a new
  file (extfmts.lisp).

pcl/simple-streams/external-formats/utf-8.lisp:
o New implementation.

tools/make-main-dist.sh:
o Create new target directory to hold external formats
o Copy all of the external formats to the new directory.

tools/pclcom.lisp:
o Compile new code

tools/worldcom.lisp:
o Compile code/extfmts.lisp

tools/worldload.lisp:
o Load code/extfmts.lisp
1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written by Paul Foley and has been placed in the public
5 ;;; domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.1 2007/10/25 15:17:07 rtoy Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Implementation of external-formats
13
14 (in-package "STREAM")
15
16 (export '(string-to-octets octets-to-string *default-external-format*
17 encode-string decode-string))
18
19 (defvar *default-external-format* :iso8859-1)
20
21 (defvar *external-formats* (make-hash-table))
22 (defvar *external-format-aliases* (make-hash-table))
23
24 (define-condition external-format-not-implemented (error)
25 ()
26 (:report
27 (lambda (condition stream)
28 (declare (ignore condition))
29 (format stream "Attempting unimplemented external-format I/O."))))
30
31 (defun %efni (a b c d)
32 (declare (ignore a b c d))
33 (error 'external-format-not-implemented))
34
35 (defstruct (external-format
36 (:conc-name ef-)
37 (:print-function %print-external-format)
38 (:constructor make-external-format (name
39 &optional slots slotd
40 octets-to-code
41 code-to-octets)))
42 (name (ext:required-argument) :type keyword :read-only t)
43 (slots #() :type simple-vector :read-only t)
44 (slotd nil :type list :read-only t)
45 (octets-to-code #'%efni :type function :read-only t)
46 (code-to-octets #'%efni :type function :read-only t))
47
48 (defun %print-external-format (ef stream depth)
49 (declare (ignore depth))
50 (print-unreadable-object (ef stream :type t :identity t)
51 (princ (ef-name ef) stream)))
52
53 (defmacro define-external-format (name octets-to-code code-to-octets)
54 (let ((tmp (gensym)))
55 `(macrolet ((octets-to-code ((state input unput) &body body)
56 `(lambda (,',tmp ,state ,input ,unput)
57 (declare (type (function () (unsigned-byte 8)) ,input)
58 (type (function (lisp::index) t) ,unput)
59 (ignore ,',tmp)
60 (ignorable ,state ,unput)
61 (values (unsigned-byte 31) lisp::index t))
62 ,@body))
63 (code-to-octets ((code state output) &body body)
64 `(lambda (,',tmp ,code ,state ,output)
65 (declare (type (unsigned-byte 31) ,code)
66 (type (function ((unsigned-byte 8)) t) ,output)
67 (ignore ,',tmp)
68 (ignorable ,state ,output)
69 (values t))
70 ,@body)))
71 (setf (gethash ,name *external-formats*)
72 (make-external-format ,name #() '()
73 ,octets-to-code ,code-to-octets)))))
74
75 (defun load-external-format-aliases ()
76 (let ((*package* (find-package "KEYWORD")))
77 (with-open-file (stm "library:ext-formats/aliases" :if-does-not-exist nil)
78 (when stm
79 (do ((alias (read stm nil stm) (read stm nil stm))
80 (value (read stm nil stm) (read stm nil stm)))
81 ((or (eq alias stm) (eq value stm))
82 (unless (eq alias stm)
83 (warn "External-format aliases file ends early.")))
84 (if (and (keywordp alias) (keywordp value))
85 (setf (gethash alias *external-format-aliases*) value)
86 (warn "Bad entry in external-format aliases file: ~S => ~S."
87 alias value)))))))
88
89 (defun find-external-format (name &optional (error-p t))
90 (when (external-format-p name)
91 (return-from find-external-format name))
92
93 (when (eq name :default)
94 (setq name *default-external-format*))
95
96 #+(or)
97 (unless (ext:search-list-defined-p "ef:")
98 (setf (ext:search-list "ef:") '("library:ef/")))
99
100 (when (zerop (hash-table-count *external-format-aliases*))
101 (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
102 (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
103 (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
104 (load-external-format-aliases))
105
106 (do ((tmp (gethash name *external-format-aliases*)
107 (gethash tmp *external-format-aliases*))
108 (cnt 0 (1+ cnt)))
109 ((or (null tmp) (= cnt 50))
110 (unless (null tmp)
111 (error "External-format aliasing depth exceeded.")))
112 (setq name tmp))
113
114 (or (gethash name *external-formats*)
115 (and (let ((*package* (find-package "STREAM")))
116 (load (format nil "library:ext-formats/~(~A~)" name) :if-does-not-exist nil))
117 (gethash name *external-formats*))
118 (if error-p (error "External format ~S not found." name) nil)))
119
120
121 (define-condition void-external-format (error)
122 ()
123 (:report
124 (lambda (condition stream)
125 (declare (ignore condition))
126 (format stream "Attempting I/O through void external-format."))))
127
128 (define-external-format :void
129 (octets-to-code (state input unput)
130 (declare (ignore input))
131 (error 'void-external-format))
132 (code-to-octets (code state output)
133 (declare (ignore code))
134 (error 'void-external-format)))
135
136 (define-external-format :iso8859-1
137 (octets-to-code (state input unput)
138 (declare (optimize (speed 3) (space 0) (safety 0) (debug 0)))
139 (values (funcall input) 1 nil))
140 (code-to-octets (code state output)
141 (declare (optimize (speed 3) (space 0) (safety 0) (debug 0)))
142 (funcall output (if (> code 255) #x3F code))
143 nil))
144
145
146 (defmacro octets-to-codepoint (external-format state count input unput)
147 (let ((tmp1 (gensym)) (tmp2 (gensym)) (tmp3 (gensym)))
148 `(multiple-value-bind (,tmp1 ,tmp2 ,tmp3)
149 (funcall (ef-octets-to-code ,external-format) ,external-format
150 ,state ,input ,unput)
151 (declare (type (unsigned-byte 31) ,tmp1) (type lisp::index ,tmp2))
152 (setf ,state ,tmp3 ,count ,tmp2)
153 ,tmp1)))
154
155 (defmacro codepoint-to-octets (external-format code state output)
156 `(progn
157 (setf ,state (funcall (ef-code-to-octets ,external-format)
158 ,external-format ,code ,state ,output))
159 nil))
160
161
162
163 (defmacro octets-to-char (external-format state count input unput)
164 `(let ((code (octets-to-codepoint ,external-format
165 ,state ,count ,input ,unput)))
166 (declare (type (unsigned-byte 31) code))
167 (if (< code #x100) (code-char code) #\?)))
168
169 (defmacro char-to-octets (external-format char state output)
170 `(codepoint-to-octets ,external-format (char-code ,char) ,state ,output))
171
172 (defun string-to-octets (string &key (start 0) end (external-format :default))
173 (declare (type string string)
174 (type lisp::index start)
175 (type (or null lisp::index) end)
176 #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#)
177 (let ((ef (find-external-format external-format))
178 (buffer (make-array (length string) :element-type '(unsigned-byte 8)))
179 (ptr 0)
180 (state nil))
181 (declare (type external-format ef)
182 (type (simple-array (unsigned-byte 8) (*)) buffer)
183 (type lisp::index ptr))
184 (flet ((out (b)
185 (when (= ptr (length buffer))
186 (setq buffer (adjust-array buffer (* 2 ptr))))
187 (setf (aref buffer (1- (incf ptr))) b)))
188 (dotimes (i (- (or end (length string)) start))
189 (declare (type lisp::index i))
190 (char-to-octets ef (char string (+ start i)) state #'out))
191 (lisp::shrink-vector buffer ptr))))
192
193 (defun octets-to-string (octets &key (start 0) end (external-format :default))
194 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
195 (type lisp::index start)
196 (type (or null lisp::index) end)
197 #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#)
198 (let ((ef (find-external-format external-format))
199 (end (1- (or end (length octets))))
200 (string (make-string (length octets)))
201 (ptr (1- start))
202 (pos -1)
203 (count 0)
204 (state nil))
205 (declare (type external-format ef)
206 (type lisp::index end count)
207 (type (integer -1 (#.array-dimension-limit)) pos ptr)
208 (type simple-base-string string))
209 (flet ((input ()
210 (aref octets (incf ptr)))
211 (unput (n)
212 (decf ptr (the lisp::index n))))
213 (loop until (>= ptr end)
214 ;; increasing size of string shouldn't ever be necessary, unless
215 ;; someone implements an encoding smaller than the source string...
216 do (setf (schar string (incf pos))
217 (octets-to-char ef state count #'input #'unput))))
218 (lisp::shrink-vector string (1+ pos))))
219
220
221
222 (defun encode-string (string external-format &optional (start 0) end)
223 (declare (type string string)
224 (type lisp::index start)
225 (type (or null lisp::index) end)
226 #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#)
227 (let ((ef (find-external-format external-format))
228 (result (make-string (length string)))
229 (ptr 0)
230 (state nil))
231 (declare (type external-format ef)
232 (type simple-base-string result)
233 (type lisp::index ptr))
234 (flet ((out (b)
235 (when (= ptr (length result))
236 (setq result (adjust-array result (* 2 ptr))))
237 (setf (char result (1- (incf ptr))) (code-char b))))
238 (dotimes (i (- (or end (length string)) start))
239 (declare (type lisp::index i))
240 (char-to-octets ef (char string (+ start i)) state #'out))
241 (lisp::shrink-vector result ptr))))
242
243 (defun decode-string (string external-format &optional (start 0) end)
244 (declare (type string string)
245 (type lisp::index start)
246 (type (or null lisp::index) end)
247 #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#)
248 (let ((ef (find-external-format external-format))
249 (end (1- (or end (length string))))
250 (result (make-string (length string)))
251 (ptr (1- start))
252 (pos -1)
253 (count 0)
254 (state nil))
255 (declare (type external-format ef)
256 (type lisp::index end count)
257 (type (integer -1 (#.array-dimension-limit)) pos ptr)
258 (type simple-base-string result))
259 (flet ((input ()
260 (char-code (char string (incf ptr))))
261 (unput (n)
262 (decf ptr (the lisp::index n))))
263 (loop until (>= ptr end)
264 ;; increasing size of result shouldn't ever be necessary, unless
265 ;; someone implements an encoding smaller than the source string...
266 do (setf (schar result (incf pos))
267 (octets-to-char ef state count #'input #'unput))))
268 (lisp::shrink-vector result (1+ pos))))
269
270
271 (provide :external-formats)

  ViewVC Help
Powered by ViewVC 1.1.5