/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2.4.3 - (hide annotations)
Mon Jun 23 15:03:31 2008 UTC (5 years, 10 months ago) by rtoy
Branch: unicode-utf16-branch
CVS Tags: unicode-utf16-sync-2008-12, unicode-utf16-sync-2008-07, unicode-utf16-sync-2008-09, unicode-utf16-sync-label-2009-03-16, unicode-utf16-char-support-2009-03-26, unicode-utf16-char-support-2009-03-25, unicode-utf16-sync-2008-11
Branch point for: unicode-utf16-extfmt-branch
Changes since 1.2.4.2: +66 -39 lines
Sync to HEAD branch.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.2.4.3 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.2.4.3 2008/06/23 15:03:31 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16     (export '(string-to-octets octets-to-string *default-external-format*
17 rtoy 1.2.4.1 string-encode string-decode))
18 rtoy 1.1
19     (defvar *default-external-format* :iso8859-1)
20    
21 rtoy 1.2.4.1 (defvar *external-formats* (make-hash-table :test 'equal))
22 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
23    
24 rtoy 1.2.4.1 (defconstant +ef-os+ 2)
25     (defconstant +ef-so+ 3)
26     (defconstant +ef-en+ 4)
27     (defconstant +ef-de+ 5)
28     (defconstant +ef-max+ 6)
29    
30 rtoy 1.1 (define-condition external-format-not-implemented (error)
31     ()
32     (:report
33     (lambda (condition stream)
34     (declare (ignore condition))
35     (format stream "Attempting unimplemented external-format I/O."))))
36    
37     (defun %efni (a b c d)
38     (declare (ignore a b c d))
39     (error 'external-format-not-implemented))
40    
41 rtoy 1.2.4.3 (defstruct efx
42     (octets-to-code #'%efni :type function :read-only t)
43     (code-to-octets #'%efni :type function :read-only t)
44     (cache nil :type (or null simple-vector)))
45    
46 rtoy 1.1 (defstruct (external-format
47     (:conc-name ef-)
48     (:print-function %print-external-format)
49 rtoy 1.2.4.3 (:constructor make-external-format (name efx composingp
50     &optional slots slotd)))
51 rtoy 1.2.4.1 (name (ext:required-argument) :type (or keyword cons) :read-only t)
52 rtoy 1.2.4.3 (efx (ext:required-argument) :type efx :read-only t)
53 rtoy 1.2.4.1 (composingp (ext:required-argument) :type boolean :read-only t)
54 rtoy 1.1 (slots #() :type simple-vector :read-only t)
55 rtoy 1.2.4.3 (slotd nil :type list :read-only t))
56 rtoy 1.1
57     (defun %print-external-format (ef stream depth)
58     (declare (ignore depth))
59     (print-unreadable-object (ef stream :type t :identity t)
60     (princ (ef-name ef) stream)))
61    
62 rtoy 1.2.4.1 (defun %whatsit (ef)
63     (setf (gethash (ef-name ef) *external-formats*) ef))
64    
65 rtoy 1.2.4.3 (declaim (inline ef-octets-to-code ef-code-to-octets ef-cache))
66    
67     (defun ef-octets-to-code (ef)
68     (efx-octets-to-code (ef-efx ef)))
69    
70     (defun ef-code-to-octets (ef)
71     (efx-code-to-octets (ef-efx ef)))
72    
73     (defun ef-cache (ef)
74     (efx-cache (ef-efx ef)))
75    
76 rtoy 1.1 (defmacro define-external-format (name octets-to-code code-to-octets)
77 rtoy 1.2.4.1 (let ((tmp1 (gensym)) (tmp2 (gensym)))
78     `(macrolet ((octets-to-code ((state input unput &rest vars) body)
79     `(lambda (,',tmp1 ,state ,input ,unput)
80     (declare (ignore ,',tmp1)
81     (ignorable ,state ,input ,unput)
82     (optimize (ext:inhibit-warnings 3)))
83     (let ((,input `(the (or (unsigned-byte 8) null) ,,input))
84     ,@(loop for var in vars collect `(,var (gensym))))
85     ,body)))
86     (code-to-octets ((code state output &rest vars) body)
87     `(lambda (,',tmp1 ,',tmp2 ,state ,output)
88     (declare (ignore ,',tmp1)
89 rtoy 1.1 (ignorable ,state ,output)
90 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
91     (let ((,code ',code)
92     ,@(loop for var in vars collect `(,var (gensym))))
93     `(let ((,',code (the (unsigned-byte 31) ,,',tmp2)))
94     (declare (ignorable ,',code))
95     ,,body)))))
96 rtoy 1.2.4.3 (%whatsit (make-external-format ,name
97     (make-efx :octets-to-code ,octets-to-code
98     :code-to-octets ,code-to-octets
99     :cache (make-array +ef-max+ :initial-element nil))
100     nil
101     #() '())))))
102 rtoy 1.2.4.1
103     (defmacro define-composing-external-format (name input output)
104     (let ((tmp1 (gensym)) (tmp2 (gensym)))
105     `(macrolet ((input ((state input unput &rest vars) body)
106     `(lambda (,',tmp1 ,state ,input ,unput)
107     (declare (ignore ,',tmp1)
108     (ignorable ,state ,input ,unput)
109     (optimize (ext:inhibit-warnings 3)))
110     (let ((,input `(the (values (or (unsigned-byte 31) null)
111     lisp::index)
112     ,,input))
113     ,@(loop for var in vars collect `(,var (gensym))))
114     ,body)))
115     (output ((code state output &rest vars) body)
116     `(lambda (,',tmp1 ,',tmp2 ,state ,output)
117     (declare (ignore ,',tmp1)
118     (ignorable ,state ,output)
119     (optimize (ext:inhibit-warnings 3)))
120     (let ((,code ',code)
121     ,@(loop for var in vars collect `(,var (gensym))))
122     `(let ((,',code (the (unsigned-byte 31) ,,',tmp2)))
123     (declare (ignorable ,',code))
124     ,,body)))))
125 rtoy 1.2.4.3 (%whatsit (make-external-format ,name
126     (make-efx :octets-to-code ,input
127     :code-to-octets ,output)
128     t
129     #() '())))))
130 rtoy 1.1
131     (defun load-external-format-aliases ()
132     (let ((*package* (find-package "KEYWORD")))
133 rtoy 1.2.4.2 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil)
134 rtoy 1.1 (when stm
135     (do ((alias (read stm nil stm) (read stm nil stm))
136     (value (read stm nil stm) (read stm nil stm)))
137     ((or (eq alias stm) (eq value stm))
138     (unless (eq alias stm)
139     (warn "External-format aliases file ends early.")))
140     (if (and (keywordp alias) (keywordp value))
141     (setf (gethash alias *external-format-aliases*) value)
142     (warn "Bad entry in external-format aliases file: ~S => ~S."
143     alias value)))))))
144    
145 rtoy 1.2.4.1 (defun %find-external-format (name)
146 rtoy 1.1 (when (zerop (hash-table-count *external-format-aliases*))
147     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
148     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
149     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
150     (load-external-format-aliases))
151    
152     (do ((tmp (gethash name *external-format-aliases*)
153     (gethash tmp *external-format-aliases*))
154     (cnt 0 (1+ cnt)))
155     ((or (null tmp) (= cnt 50))
156     (unless (null tmp)
157     (error "External-format aliasing depth exceeded.")))
158     (setq name tmp))
159    
160     (or (gethash name *external-formats*)
161 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
162     (lisp::*enable-package-locked-errors* nil))
163 rtoy 1.2.4.2 (load (format nil "ext-formats:~(~A~)" name)
164 rtoy 1.2 :if-does-not-exist nil))
165 rtoy 1.2.4.1 (gethash name *external-formats*))))
166    
167     (defun %composed-ef-name (a b)
168     (if (consp a) (append a (list b)) (list a b)))
169    
170     (defun %compose-external-formats (a b &optional name)
171     (when (ef-composingp a)
172     (error "~S is a Composing-External-Format." (ef-name a)))
173     (unless (ef-composingp b)
174     (error "~S is not a Composing-External-Format." (ef-name b)))
175 rtoy 1.2.4.3 (when name
176     (setf (getf name *external-format-aliases*)
177     (%composed-ef-name (ef-name a) (ef-name b))))
178     (make-external-format
179     (%composed-ef-name (ef-name a) (ef-name b))
180     (make-efx
181     :octets-to-code (lambda (tmp state input unput)
182     (declare (ignore tmp))
183     (funcall (ef-octets-to-code b) (ef-slots b)
184     state
185     (funcall (ef-octets-to-code a) (ef-slots a)
186     state
187     input
188     unput)
189     unput))
190     :code-to-octets (lambda (tmp code state output)
191     (declare (ignore tmp))
192     (funcall (ef-code-to-octets b) (ef-slots b)
193     code
194     state
195     `(lambda (x)
196     ,(funcall (ef-code-to-octets a)
197     (ef-slots a)
198     'x state output))))
199     :cache (make-array +ef-max+ :initial-element nil))
200     nil #() '()))
201 rtoy 1.2.4.1
202     (defun find-external-format (name &optional (error-p t))
203     (when (external-format-p name)
204     (return-from find-external-format name))
205    
206     (or (if (consp name) (every #'keywordp name) (keywordp name))
207     (error "~S is not a valid external format name." name))
208 rtoy 1.1
209 rtoy 1.2.4.1 (when (eq name :default)
210     (setq name *default-external-format*))
211    
212     (when (and (consp name) (not (cdr name)))
213     (setq name (car name)))
214    
215     (if (consp name)
216     (let ((efs (mapcar #'%find-external-format name)))
217     (if (member nil efs)
218     (if error-p (error "External format ~S not found." name) nil)
219     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
220     (or (gethash name *external-formats*)
221     (%whatsit (reduce #'%compose-external-formats efs))))))
222     (or (%find-external-format name)
223     (if error-p (error "External format ~S not found." name) nil))))
224 rtoy 1.1
225     (define-condition void-external-format (error)
226     ()
227     (:report
228     (lambda (condition stream)
229     (declare (ignore condition))
230     (format stream "Attempting I/O through void external-format."))))
231    
232     (define-external-format :void
233     (octets-to-code (state input unput)
234 rtoy 1.2.4.1 `(error 'void-external-format))
235 rtoy 1.1 (code-to-octets (code state output)
236 rtoy 1.2.4.1 `(error 'void-external-format)))
237 rtoy 1.1
238     (define-external-format :iso8859-1
239     (octets-to-code (state input unput)
240 rtoy 1.2.4.1 `(values ,input 1))
241 rtoy 1.1 (code-to-octets (code state output)
242 rtoy 1.2.4.1 `(,output (if (> ,code 255) #x3F ,code))))
243 rtoy 1.1
244     (defmacro octets-to-codepoint (external-format state count input unput)
245 rtoy 1.2.4.1 (let ((tmp1 (gensym)) (tmp2 (gensym)))
246 rtoy 1.2.4.3 `(let ((body (funcall (ef-octets-to-code ,external-format)
247     (ef-slots ,external-format)
248 rtoy 1.2.4.1 ',state ',input ',unput)))
249     `(multiple-value-bind (,',tmp1 ,',tmp2) ,body
250     (setf ,',count (the lisp::index ,',tmp2))
251     (the (or (unsigned-byte 31) null) ,',tmp1)))))
252 rtoy 1.1
253     (defmacro codepoint-to-octets (external-format code state output)
254 rtoy 1.2.4.3 `(funcall (ef-code-to-octets ,external-format) (ef-slots ,external-format)
255 rtoy 1.2.4.1 ',code ',state ',output))
256    
257    
258    
259     (defvar *ef-base* +ef-max+)
260     (defvar *ef-extensions* '())
261    
262     (defun ensure-cache (ef id reqd)
263     (let ((base (or (getf *ef-extensions* id)
264     (setf (getf *ef-extensions* id)
265     (prog1 *ef-base* (incf *ef-base* reqd))))))
266     (when (< (length (ef-cache ef)) (+ base reqd))
267 rtoy 1.2.4.3 (setf (efx-cache (ef-efx ef))
268 rtoy 1.2.4.1 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
269     base))
270    
271     (defmacro def-ef-macro (name (ef id reqd idx) body)
272     (let ((tmp (gensym)))
273     `(defun ,name (,ef)
274     (let ((,tmp ,(if (eq id 'lisp::lisp)
275     idx
276     `(+ (ensure-cache ,ef ',id ,reqd) ,idx))))
277     (or (aref (ef-cache ,ef) ,tmp)
278     (setf (aref (ef-cache ,ef) ,tmp)
279     (let ((*compile-print* nil)) (compile nil ,body))))))))
280 rtoy 1.1
281    
282    
283     (defmacro octets-to-char (external-format state count input unput)
284 rtoy 1.2.4.1 `(let ((body (octets-to-codepoint ,external-format
285 rtoy 1.1 ,state ,count ,input ,unput)))
286 rtoy 1.2.4.1 `(let ((code ,body))
287     (declare (type (unsigned-byte 31) code))
288 rtoy 1.2.4.3 (if (< code char-code-limit) (code-char code) #\?))))
289 rtoy 1.1
290     (defmacro char-to-octets (external-format char state output)
291     `(codepoint-to-octets ,external-format (char-code ,char) ,state ,output))
292    
293 rtoy 1.2.4.1 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
294     `(lambda (string start end buffer &aux (ptr 0) (state nil))
295     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
296     (type simple-string string)
297     (type lisp::index start end ptr)
298     (type (simple-array (unsigned-byte 8) (*)) buffer)
299     (ignorable state))
300     (dotimes (i (- end start) (values buffer ptr))
301     (declare (type lisp::index i))
302     ,(char-to-octets extfmt (schar string (+ start i)) state
303     (lambda (b)
304     (when (= ptr (length buffer))
305     (setq buffer (adjust-array buffer (* 2 ptr))))
306     (setf (aref buffer (1- (incf ptr))) b))))))
307    
308 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
309     (buffer nil bufferp))
310 rtoy 1.1 (declare (type string string)
311 rtoy 1.2.4.1 (type lisp::index start)
312     (type (or lisp::index null) end)
313     (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
314     (multiple-value-bind (buffer ptr)
315     (lisp::with-array-data ((string string) (start start) (end end))
316     (funcall (ef-string-to-octets (find-external-format external-format))
317     string start end
318     (or buffer (make-array (length string)
319     :element-type '(unsigned-byte 8)))))
320     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr)))
321    
322     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
323     `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil))
324     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
325     (type (simple-array (unsigned-byte 8) (*)) octets)
326     (type lisp::index end count)
327     (type (integer -1 (#.array-dimension-limit)) ptr pos)
328     (type simple-string string)
329     (ignorable state))
330     (loop until (>= ptr end)
331     do (when (= pos (length string))
332     (setq string (adjust-array string (* 2 pos))))
333     (setf (schar string (incf pos))
334     ,(octets-to-char extfmt state count
335     (aref octets (incf ptr)) ;;@@ EOF??
336     (lambda (n) (decf ptr n))))
337 rtoy 1.2.4.3 finally (return (values string (1+ pos))))))
338 rtoy 1.1
339 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
340     (string nil stringp))
341 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
342 rtoy 1.2.4.1 (type lisp::index start)
343     (type (or lisp::index null) end)
344     (type (or simple-string null) string))
345     (multiple-value-bind (string pos)
346     (funcall (ef-octets-to-string (find-external-format external-format))
347     octets (1- start) (1- (or end (length octets)))
348     (or string (make-string (length octets))))
349 rtoy 1.2 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
350 rtoy 1.1
351    
352    
353 rtoy 1.2.4.1 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
354     `(lambda (string start end result &aux (ptr 0) (state nil))
355     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
356     (type simple-string string)
357     (type lisp::index start end ptr)
358     (type simple-base-string result)
359     (ignorable state))
360     (dotimes (i (- end start) (values result ptr))
361     (declare (type lisp::index i))
362     ,(char-to-octets extfmt (schar string (+ start i)) state
363     (lambda (b)
364     (when (= ptr (length result))
365     (setq result (adjust-array result (* 2 ptr))))
366     (setf (aref result (1- (incf ptr)))
367     (code-char b)))))))
368    
369     (defun string-encode (string external-format &optional (start 0) end)
370     (multiple-value-bind (result ptr)
371     (lisp::with-array-data ((string string) (start start) (end end))
372     (funcall (ef-encode (find-external-format external-format))
373     string start end
374     (make-string (length string) :element-type 'base-char)))
375     (lisp::shrink-vector result ptr)))
376    
377     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
378     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
379     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
380     (type simple-string string)
381     (type lisp::index end count)
382     (type (integer -1 (#.array-dimension-limit)) ptr pos)
383     (type simple-string result)
384     (ignorable state))
385     (loop until (>= ptr end)
386     ;; increasing size of result shouldn't ever be necessary, unless
387     ;; someone implements an encoding smaller than the source string...
388     do (setf (schar result (incf pos))
389     ,(octets-to-char extfmt state count
390     ;; note the need to return NIL for EOF
391     (if (= (1+ ptr) (length string))
392     nil
393     (char-code (char string (incf ptr))))
394     (lambda (n) (decf ptr n))))
395 rtoy 1.2.4.3 finally (return (values result (1+ pos))))))
396 rtoy 1.2.4.1
397     (defun string-decode (string external-format &optional (start 0) end)
398     (multiple-value-bind (result pos)
399     (lisp::with-array-data ((string string) (start start) (end end))
400     (funcall (ef-decode (find-external-format external-format))
401     string (1- start) (1- end) (make-string (length string))))
402 rtoy 1.2.4.3 (lisp::shrink-vector result pos)))

  ViewVC Help
Powered by ViewVC 1.1.5