/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15.2.1 - (hide annotations)
Wed Aug 26 20:41:12 2009 UTC (4 years, 7 months ago) by rtoy
Branch: RELEASE-20A-BRANCH
Changes since 1.15: +25 -4 lines
Fix issue with file-string-length where computing the length changed
the state of the external format when it shouldn't.

code/extfmts.lisp:
o Add new slot to hold function to copy the external-format state.
o Update DEFINE-EXTERNAL-FORMAT to allow COPY-STATE function.
o Add macro to run the copy-state function.

code/fd-stream.lisp:
o In ef-strlen, save the fd-stream co state before computing the
  length and restore the state afterwards.

pcl/simple-streams/external-formats/utf-16.lisp:
pcl/simple-streams/external-formats/utf-32.lisp:
o Add copy-state function to copy the state.  (I think these are the
  only formats that have a state for output.)
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.15.2.1 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.15.2.1 2009/08/26 20:41:12 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16     (export '(string-to-octets octets-to-string *default-external-format*
17 rtoy 1.15 string-encode string-decode set-system-external-format
18     +replacement-character-code+))
19 rtoy 1.1
20     (defvar *default-external-format* :iso8859-1)
21    
22 rtoy 1.3 (defvar *external-formats* (make-hash-table :test 'equal))
23 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
24    
25 rtoy 1.15 (vm::defenum (:prefix "+EF-" :suffix "+" :start 1)
26     str ; string length
27     cin ; input a character
28     cout ; output a character
29     sin ; input string
30     sout ; output string
31     os ; octets to string
32     so ; string to octets
33     en ; encode
34     de ; decode
35     flush ; flush state
36     max)
37 rtoy 1.6
38     ;; Unicode replacement character U+FFFD
39     (defconstant +replacement-character-code+ #xFFFD)
40 rtoy 1.3
41 rtoy 1.1 (define-condition external-format-not-implemented (error)
42     ()
43     (:report
44     (lambda (condition stream)
45     (declare (ignore condition))
46     (format stream "Attempting unimplemented external-format I/O."))))
47    
48     (defun %efni (a b c d)
49     (declare (ignore a b c d))
50     (error 'external-format-not-implemented))
51    
52 rtoy 1.5 (defstruct efx
53 rtoy 1.15 ;;
54     ;; Function to read a sequence of octets from a stream and convert
55     ;; them a code point.
56 rtoy 1.5 (octets-to-code #'%efni :type function :read-only t)
57 rtoy 1.15 ;;
58     ;; Function to convert a codepoint to a sequence of octets and write
59     ;; them to an output stream.
60 rtoy 1.5 (code-to-octets #'%efni :type function :read-only t)
61 rtoy 1.15 ;;
62     ;; Function (or NIL) to force any state in the external format to be
63     ;; flushed to the output stream. A NIL value means the external
64     ;; format does not need to do anything special.
65     (flush-state nil :type (or null function) :read-only t)
66 rtoy 1.15.2.1 ;;
67     ;; Function to copy the state of the external-format. If NIL, then
68     ;; there is no state to be copied.
69     (copy-state nil :type (or null function) :read-only t)
70 rtoy 1.6 (cache nil :type (or null simple-vector))
71 rtoy 1.15 ;;
72     ;; Minimum number of octets needed to form a codepoint
73 rtoy 1.6 (min 1 :type kernel:index :read-only t)
74 rtoy 1.15 ;;
75     ;; Maximum number of octest needed to form a codepoint.
76 rtoy 1.6 (max 1 :type kernel:index :read-only t))
77 rtoy 1.5
78 rtoy 1.1 (defstruct (external-format
79     (:conc-name ef-)
80     (:print-function %print-external-format)
81 rtoy 1.5 (:constructor make-external-format (name efx composingp
82     &optional slots slotd)))
83 rtoy 1.3 (name (ext:required-argument) :type (or keyword cons) :read-only t)
84 rtoy 1.5 (efx (ext:required-argument) :type efx :read-only t)
85 rtoy 1.3 (composingp (ext:required-argument) :type boolean :read-only t)
86 rtoy 1.1 (slots #() :type simple-vector :read-only t)
87 rtoy 1.5 (slotd nil :type list :read-only t))
88 rtoy 1.1
89     (defun %print-external-format (ef stream depth)
90     (declare (ignore depth))
91     (print-unreadable-object (ef stream :type t :identity t)
92     (princ (ef-name ef) stream)))
93    
94 rtoy 1.6 (defun %intern-ef (ef)
95 rtoy 1.3 (setf (gethash (ef-name ef) *external-formats*) ef))
96    
97 rtoy 1.15.2.1 (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
98 rtoy 1.15 ef-cache ef-min-octets ef-max-octets))
99 rtoy 1.5
100     (defun ef-octets-to-code (ef)
101     (efx-octets-to-code (ef-efx ef)))
102    
103     (defun ef-code-to-octets (ef)
104     (efx-code-to-octets (ef-efx ef)))
105    
106 rtoy 1.15 (defun ef-flush-state (ef)
107     (efx-flush-state (ef-efx ef)))
108    
109 rtoy 1.15.2.1 (defun ef-copy-state (ef)
110     (efx-copy-state (ef-efx ef)))
111    
112 rtoy 1.5 (defun ef-cache (ef)
113     (efx-cache (ef-efx ef)))
114    
115 rtoy 1.6 (defun ef-min-octets (ef)
116     (efx-min (ef-efx ef)))
117    
118     (defun ef-max-octets (ef)
119     (efx-max (ef-efx ef)))
120    
121     (eval-when (:compile-toplevel :load-toplevel :execute)
122     (defun %merge-slots (old new)
123     (let* ((pos (length old))
124     (tmp (mapcar (lambda (x)
125     (let* ((name (if (consp x) (first x) x))
126     (init (if (consp x) (second x) nil))
127     (list (if (consp x) (nthcdr 2 x) nil))
128     (prev (assoc name old))
129     (posn (if prev (second prev) (1- (incf pos)))))
130     (list name posn init (getf list :type t))))
131     new)))
132     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
133     :key #'second))))
134    
135     ;;; DEFINE-EXTERNAL-FORMAT -- Public
136     ;;;
137 rtoy 1.15 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets flush-state
138 rtoy 1.6 ;;; Define a new external format. Min/Max/Size are the minimum and
139     ;;; maximum number of octets that make up a character (:size N is just
140     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
141     ;;; similar to defstruct.
142     ;;;
143     ;;; name (base) (&rest slots)
144     ;;; Define an external format based on a previously-defined external
145     ;;; format, Base. The slot names used in Slots must match those in Base.
146     ;;;
147     ;;; octets-to-code (state input unput &rest vars)
148     ;;; Defines a form to be used by the external format to convert
149     ;;; octets to a code point. State is a form that can be used by the
150     ;;; body to access the state variable of the stream. Input is a
151     ;;; form that can be used to read one more octets from the input
152     ;;; strema. Similarly, Unput is a form to put back one octet to the
153     ;;; input stream. Vars is a list of vars that need to be defined
154     ;;; for any symbols used within the form.
155     ;;;
156     ;;; This should return two values: the code and the number of octets
157     ;;; read to form the code.
158     ;;;
159     ;;; code-to-octets (code state output &rest vars)
160     ;;; Defines a form to be used by the external format to convert a
161     ;;; code point to octets for output. Code is the code point to be
162     ;;; converted. State is a form to access the current value of the
163     ;;; stream's state variable. Output is a form that writes one octet
164     ;;; to the output stream.
165     ;;;
166 rtoy 1.15 ;;; flush-state (state output &rest vars)
167     ;;; Defines a form to be used by the external format to flush out
168     ;;; any state when an output stream is closed. Similar to
169     ;;; CODE-TO-OCTETS, but there is no code.
170     ;;;
171 rtoy 1.6 ;;; Note: external-formats work on code-points, not
172     ;;; characters, so that the entire 31 bit ISO-10646 range can be
173     ;;; used internally regardless of the size of a character recognized
174     ;;; by Lisp and external formats can be useful to people who want to
175     ;;; process characters outside the Lisp range (see
176     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
177     ;;;
178     (defmacro define-external-format (name (&rest args) (&rest slots)
179 rtoy 1.15.2.1 &optional octets-to-code code-to-octets
180     flush-state copy-state)
181 rtoy 1.6 (when (and (oddp (length args)) (not (= (length args) 1)))
182     (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
183     (let* ((tmp (gensym))
184     (min (if (evenp (length args))
185     (or (getf args :min) (getf args :size) 1)
186     1))
187     (max (if (evenp (length args))
188     (or (getf args :max) (getf args :size) 6)
189     6))
190     (base (if (= (length args) 1)
191     (find-external-format (first args))
192     nil))
193     (bslotd (if base (ef-slotd base) nil))
194     (slotd (%merge-slots bslotd slots))
195     (slotb (loop for slot in slotd
196     collect `(,(first slot)
197     `(the ,',(fourth slot)
198     ;; IDENTITY is here to protect against SETF
199     (identity (svref %slots% ,',(second slot))))))))
200 rtoy 1.3 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
201 rtoy 1.6 `(lambda (,state ,input ,unput)
202     (declare (ignorable ,state ,input ,unput)
203 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
204 rtoy 1.6 (let (,@',slotb
205     (,input `(the (or (unsigned-byte 8) null) ,,input))
206 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
207     ,body)))
208     (code-to-octets ((code state output &rest vars) body)
209 rtoy 1.6 `(lambda (,',tmp ,state ,output)
210     (declare (ignorable ,state ,output)
211 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
212 rtoy 1.6 (let (,@',slotb
213     (,code ',code)
214 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
215 rtoy 1.6 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
216 rtoy 1.3 (declare (ignorable ,',code))
217 rtoy 1.15 ,,body))))
218     (flush-state ((state output &rest vars) body)
219     `(lambda (,state ,output)
220     (declare (ignorable ,state ,output))
221     (let (,@',slotb
222     ,@(loop for var in vars collect `(,var (gensym))))
223 rtoy 1.15.2.1 ,body)))
224     (copy-state ((state &rest vars) body)
225     `(lambda (,state)
226     (declare (ignorable ,state))
227     (let (,@',slotb
228     ,@(loop for var in vars collect `(,var (gensym))))
229 rtoy 1.15 ,body))))
230 rtoy 1.6 (%intern-ef (make-external-format ,name
231     ,(if base
232     `(ef-efx (find-external-format ,(ef-name base)))
233     `(make-efx :octets-to-code ,octets-to-code
234     :code-to-octets ,code-to-octets
235 rtoy 1.15.2.1 :flush-state ,flush-state
236     :copy-state ,copy-state
237 rtoy 1.6 :cache (make-array +ef-max+
238     :initial-element nil)
239     :min ,(min min max) :max ,(max min max)))
240     nil
241     (let* ,(loop for x in slotd
242     collect (list (first x) (third x)))
243     (vector ,@(mapcar #'first slotd)))
244     ',slotd)))))
245 rtoy 1.3
246 rtoy 1.6 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
247     ;;;
248     ;;; A composing-external-format differs from an (ordinary) external-format
249     ;;; in that it translates characters (really codepoints, of course) into
250     ;;; other characters, rather than translating between characters and binary
251     ;;; octets. They have to be composed with a non-composing external-format
252     ;;; to be of any use.
253     ;;;
254     (defmacro define-composing-external-format (name (&key min max size)
255     input output)
256     (let ((tmp (gensym))
257     (min (or min size 1))
258     (max (or max size 1)))
259 rtoy 1.3 `(macrolet ((input ((state input unput &rest vars) body)
260 rtoy 1.6 `(lambda (,state ,input ,unput)
261     (declare (ignorable ,state ,input ,unput)
262 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
263 rtoy 1.6 (let ((,input `(the (values (or (unsigned-byte 21) null)
264     kernel:index)
265 rtoy 1.3 ,,input))
266     ,@(loop for var in vars collect `(,var (gensym))))
267     ,body)))
268     (output ((code state output &rest vars) body)
269 rtoy 1.6 `(lambda (,',tmp ,state ,output)
270     (declare (ignorable ,state ,output)
271 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
272     (let ((,code ',code)
273     ,@(loop for var in vars collect `(,var (gensym))))
274 rtoy 1.6 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
275 rtoy 1.3 (declare (ignorable ,',code))
276     ,,body)))))
277 rtoy 1.6 (%intern-ef (make-external-format ,name
278     (make-efx :octets-to-code ,input
279     :code-to-octets ,output
280     :min ,(min min max) :max ,(max min max))
281     t
282     #() '())))))
283 rtoy 1.1
284     (defun load-external-format-aliases ()
285 rtoy 1.6 (let ((*package* (find-package "KEYWORD"))
286     (unix::*filename-encoding* :iso8859-1))
287 rtoy 1.11 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
288     :external-format :iso8859-1)
289 rtoy 1.1 (when stm
290 rtoy 1.6 (do ((alias (read stm nil stm) (read stm nil stm))
291     (value (read stm nil stm) (read stm nil stm)))
292     ((or (eq alias stm) (eq value stm))
293     (unless (eq alias stm)
294     (warn "External-format aliases file ends early.")))
295 rtoy 1.11 (if (and (keywordp alias) (or (keywordp value)
296     (and (consp value)
297     (every #'keywordp value))))
298 rtoy 1.6 (setf (gethash alias *external-format-aliases*) value)
299     (warn "Bad entry in external-format aliases file: ~S => ~S."
300     alias value)))))))
301 rtoy 1.1
302 rtoy 1.3 (defun %find-external-format (name)
303 rtoy 1.6 ;; avoid loading files, etc., early in the boot sequence
304     (when (or (eq name :iso8859-1)
305     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
306     (return-from %find-external-format
307     (gethash :iso8859-1 *external-formats*)))
308    
309 rtoy 1.1 (when (zerop (hash-table-count *external-format-aliases*))
310     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
311     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
312     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
313     (load-external-format-aliases))
314    
315     (do ((tmp (gethash name *external-format-aliases*)
316     (gethash tmp *external-format-aliases*))
317     (cnt 0 (1+ cnt)))
318     ((or (null tmp) (= cnt 50))
319     (unless (null tmp)
320     (error "External-format aliasing depth exceeded.")))
321     (setq name tmp))
322    
323     (or (gethash name *external-formats*)
324 rtoy 1.11 (and (consp name) (find-external-format name))
325 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
326 rtoy 1.12 (lisp::*enable-package-locked-errors* nil)
327     (s (open (format nil "ext-formats:~(~A~).lisp" name)
328     :if-does-not-exist nil :external-format :iso8859-1)))
329     (when s
330     (null (nth-value 1 (ext:compile-from-stream s)))))
331 rtoy 1.3 (gethash name *external-formats*))))
332    
333     (defun %composed-ef-name (a b)
334     (if (consp a) (append a (list b)) (list a b)))
335    
336 rtoy 1.6 (defun %compose-external-formats (a b)
337 rtoy 1.3 (when (ef-composingp a)
338     (error "~S is a Composing-External-Format." (ef-name a)))
339     (unless (ef-composingp b)
340     (error "~S is not a Composing-External-Format." (ef-name b)))
341 rtoy 1.5 (make-external-format
342     (%composed-ef-name (ef-name a) (ef-name b))
343     (make-efx
344 rtoy 1.6 :octets-to-code (lambda (state input unput)
345 rtoy 1.11 (let ((nstate (gensym "STATE-")))
346     `(let ((,nstate ,state))
347     (when (null ,nstate)
348     (setq ,nstate (setf ,state (cons nil nil))))
349     ,(funcall (ef-octets-to-code b) `(car ,nstate)
350     (funcall (ef-octets-to-code a)
351     `(cdr ,nstate) input unput)
352     unput))))
353 rtoy 1.6 :code-to-octets (lambda (code state output)
354 rtoy 1.11 (let ((nstate (gensym "STATE-")))
355     `(let ((,nstate ,state))
356     (when (null ,nstate)
357     (setq ,nstate (setf ,state (cons nil nil))))
358     ,(funcall (ef-code-to-octets b) code `(car ,nstate)
359     `(lambda (x)
360     ,(funcall (ef-code-to-octets a)
361     'x `(cdr ,nstate) output))))))
362 rtoy 1.6 :cache (make-array +ef-max+ :initial-element nil)
363     :min (* (ef-min-octets a) (ef-min-octets b))
364     :max (* (ef-max-octets a) (ef-max-octets b)))
365 rtoy 1.5 nil #() '()))
366 rtoy 1.3
367     (defun find-external-format (name &optional (error-p t))
368     (when (external-format-p name)
369     (return-from find-external-format name))
370    
371     (or (if (consp name) (every #'keywordp name) (keywordp name))
372     (error "~S is not a valid external format name." name))
373 rtoy 1.1
374 rtoy 1.3 (when (eq name :default)
375     (setq name *default-external-format*))
376    
377     (when (and (consp name) (not (cdr name)))
378     (setq name (car name)))
379    
380 rtoy 1.6 (flet ((not-found ()
381     (when (equal *default-external-format* name)
382     (setq *default-external-format* :iso8859-1))
383     (if error-p (error "External format ~S not found." name) nil)))
384     (if (consp name)
385     (let ((efs (mapcar #'%find-external-format name)))
386     (if (member nil efs)
387     (not-found)
388     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
389     (or (gethash name *external-formats*)
390     (%intern-ef (reduce #'%compose-external-formats efs))))))
391     (or (%find-external-format name) (not-found)))))
392    
393     (defun flush-external-formats ()
394     (maphash (lambda (name ef)
395     (declare (ignore name))
396     (fill (ef-cache ef) nil))
397     *external-formats*))
398    
399     (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
400    
401     (defun invert-table (table)
402     (declare (type (or (simple-array (unsigned-byte 31) *)
403     (simple-array (unsigned-byte 16) *))
404     table)
405     (optimize (speed 3) (space 0) (safety 0) (debug 0)
406     (ext:inhibit-warnings 3)))
407     (or (gethash table *.table-inverse.*)
408     (let* ((mbits (if (= (array-total-size table) 128) 7 8))
409     (lbits (cond ((> (array-total-size table) 256) 3)
410     ((< (array-total-size table) 100) 6)
411     (t 5)))
412     (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
413     :element-type '(unsigned-byte 16)
414     :initial-element #xFFFF))
415     (mvec (make-array 0 :element-type '(unsigned-byte 16)))
416     (lvec (make-array 0 :element-type '(unsigned-byte 16)))
417     (width (array-dimension table 0))
418     (power (1- (array-rank table)))
419     (base (if (= width 94) 1 0))
420     hx mx lx)
421     (assert (and (< power 2) (<= width 256)))
422     (dotimes (i (array-total-size table))
423     (declare (type (integer 0 (#.array-dimension-limit)) i))
424     (let ((tmp i) (val (row-major-aref table i)) (z 0))
425     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
426     (type (unsigned-byte 16) z))
427     (unless (= val #xFFFE)
428     (when (plusp power)
429     (multiple-value-bind (x y) (floor tmp width)
430     (setq tmp x)
431     (setq z (logior z (ash (the (integer 0 255) (+ y base))
432     (the (integer 0 24)
433     (* 8 power)))))))
434     (setq hx (ash val (- 0 mbits lbits)))
435     (when (= (aref hvec hx) #xFFFF)
436     (setf (aref hvec hx) (length mvec))
437     (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
438     :element-type '(unsigned-byte 16)
439     :initial-element #xFFFF)))
440     (replace tmp mvec)
441     (setq mvec tmp)))
442     (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
443 rtoy 1.7 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
444     (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
445 rtoy 1.6 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
446     :element-type '(unsigned-byte 16)
447     :initial-element #xFFFF)))
448     (replace tmp lvec)
449     (setq lvec tmp)))
450     (setq lx (logand val (lognot (ash -1 lbits))))
451 rtoy 1.7 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
452 rtoy 1.6 (logior z (+ tmp base))))))
453     (setf (gethash table *.table-inverse.*)
454     (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
455     :hvec hvec :mvec mvec :lvec lvec)))))
456    
457     (declaim (inline get-inverse))
458     (defun get-inverse (ntrie code)
459 rtoy 1.11 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
460 rtoy 1.6 (let ((n (lisp::qref ntrie code)))
461     (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
462     (if (= m #xFFFF) nil m)))))
463    
464 rtoy 1.1
465     (define-condition void-external-format (error)
466     ()
467     (:report
468     (lambda (condition stream)
469     (declare (ignore condition))
470     (format stream "Attempting I/O through void external-format."))))
471    
472 rtoy 1.6 (define-external-format :void (:size 0) ()
473 rtoy 1.1 (octets-to-code (state input unput)
474 rtoy 1.3 `(error 'void-external-format))
475 rtoy 1.1 (code-to-octets (code state output)
476 rtoy 1.3 `(error 'void-external-format)))
477 rtoy 1.1
478 rtoy 1.6 (define-external-format :iso8859-1 (:size 1) ()
479 rtoy 1.1 (octets-to-code (state input unput)
480 rtoy 1.3 `(values ,input 1))
481 rtoy 1.1 (code-to-octets (code state output)
482 rtoy 1.3 `(,output (if (> ,code 255) #x3F ,code))))
483 rtoy 1.1
484 rtoy 1.6 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
485     ;;;
486     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
487     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
488     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
489     ;;; to read or write texts containing characters not supported by your Lisp,
490     ;;; these macros can be used instead.
491 rtoy 1.1 (defmacro octets-to-codepoint (external-format state count input unput)
492 rtoy 1.6 (let ((tmp1 (gensym)) (tmp2 (gensym))
493     (ef (find-external-format external-format)))
494     `(multiple-value-bind (,tmp1 ,tmp2)
495     ,(funcall (ef-octets-to-code ef) state input unput)
496     (setf ,count (the kernel:index ,tmp2))
497     (the (or (unsigned-byte 21) null) ,tmp1))))
498 rtoy 1.1
499     (defmacro codepoint-to-octets (external-format code state output)
500 rtoy 1.6 (let ((ef (find-external-format external-format)))
501     (funcall (ef-code-to-octets ef) code state output)))
502 rtoy 1.3
503    
504    
505     (defvar *ef-base* +ef-max+)
506     (defvar *ef-extensions* '())
507    
508     (defun ensure-cache (ef id reqd)
509     (let ((base (or (getf *ef-extensions* id)
510     (setf (getf *ef-extensions* id)
511     (prog1 *ef-base* (incf *ef-base* reqd))))))
512     (when (< (length (ef-cache ef)) (+ base reqd))
513 rtoy 1.5 (setf (efx-cache (ef-efx ef))
514 rtoy 1.3 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
515     base))
516    
517 rtoy 1.6 ;;; DEF-EF-MACRO -- Public
518     ;;;
519     ;;;
520 rtoy 1.3 (defmacro def-ef-macro (name (ef id reqd idx) body)
521 rtoy 1.11 (let* ((tmp1 (gensym))
522     (tmp2 (gensym))
523     (blknm (nth-value 1 (lisp::valid-function-name-p name)))
524     (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
525 rtoy 1.6 `(progn
526     (defun ,%name (,ef)
527     (let* ((,tmp1 (find-external-format ,ef))
528     (,tmp2 ,(if (eq id 'lisp::lisp)
529     idx
530     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
531     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
532     (setf (aref (ef-cache ,tmp1) ,tmp2)
533     (let ((*compile-print* nil)
534     ;; Set default format when we compile so we
535     ;; can see compiler messages. If we don't,
536     ;; we run into a problem that we might be
537     ;; changing the default format while we're
538     ;; compiling, and we don't know how to output
539     ;; the compiler messages.
540 rtoy 1.11 #|(*default-external-format* :iso8859-1)|#)
541 rtoy 1.6 (compile nil `(lambda (%slots%)
542     (declare (ignorable %slots%))
543 rtoy 1.11 (block ,',blknm
544     ,,body))))))
545 rtoy 1.6 (ef-slots ,tmp1))))
546     (declaim (inline ,name))
547     (defun ,name (,tmp1)
548     (let ((,tmp2 (load-time-value (cons nil nil))))
549     (when (eq ,tmp1 :default)
550     (setq ,tmp1 *default-external-format*))
551     (if (eq ,tmp1 (car ,tmp2))
552     (cdr ,tmp2)
553     (setf (car ,tmp2) ,tmp1
554     (cdr ,tmp2) (,%name ,tmp1))))))))
555 rtoy 1.1
556    
557    
558 rtoy 1.6 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
559     ;;;
560     ;;; Read and write one character through an external-format
561     ;;;
562 rtoy 1.1 (defmacro octets-to-char (external-format state count input unput)
563 rtoy 1.11 (let ((nstate (gensym)))
564     `(let ((,nstate ,state))
565     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
566     (if (car ,nstate)
567 rtoy 1.13 ;; Return the trailing surrgate. Must set count to 0 to
568     ;; tell the stream code we didn't consume any octets!
569 rtoy 1.11 (prog1 (the character (car ,nstate))
570     (setf (car ,nstate) nil ,count 0))
571     (let ((code (octets-to-codepoint ,external-format
572     (cdr ,nstate) ,count ,input ,unput)))
573     (declare (type (unsigned-byte 21) code))
574     ;;@@ on non-Unicode builds, limit to 8-bit chars
575     ;;@@ if unicode-bootstrap, can't use #\u+fffd
576 rtoy 1.13 (cond ((or (<= #xD800 code #xDFFF) (> code #x10FFFF))
577     #-(and unicode (not unicode-bootstrap)) #\?
578     #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
579     #+unicode
580 rtoy 1.11 ((> code #xFFFF)
581     (multiple-value-bind (hi lo) (surrogates code)
582     (setf (car ,nstate) lo)
583 rtoy 1.6 hi))
584 rtoy 1.11 (t (code-char code))))))))
585 rtoy 1.6
586 rtoy 1.1 (defmacro char-to-octets (external-format char state output)
587 rtoy 1.11 (let ((nchar (gensym))
588     (nstate (gensym))
589     (wryte (gensym))
590     (ch (gensym)))
591     `(let ((,nchar ,char)
592     (,nstate ,state))
593     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
594 rtoy 1.14 (if (lisp::surrogatep (char-code ,nchar) :high)
595 rtoy 1.11 (setf (car ,nstate) ,nchar)
596     (flet ((,wryte (,ch)
597     (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
598     ,output)))
599     (declare (dynamic-extent #',wryte))
600     (if (car ,nstate)
601 rtoy 1.14 (prog1
602     ;; Invalid surrogate sequences get replaced with
603     ;; the replacement character.
604     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
605     (surrogates-to-codepoint (car ,nstate) ,nchar)
606     +replacement-character-code+))
607 rtoy 1.11 (setf (car ,nstate) nil))
608 rtoy 1.14 ;; A lone trailing (low) surrogate gets replaced with
609     ;; the replacement character.
610     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
611     +replacement-character-code+
612     (char-code ,nchar)))))))))
613 rtoy 1.6
614 rtoy 1.15 (defmacro flush-state (external-format state output)
615     (let* ((ef (find-external-format external-format))
616     (f (ef-flush-state ef)))
617     (when f
618     (funcall f state output))))
619    
620 rtoy 1.15.2.1 (defmacro copy-state (external-format state)
621     (let* ((ef (find-external-format external-format))
622     (f (ef-copy-state ef)))
623     (when f
624     (funcall f state))))
625    
626 rtoy 1.3 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
627 rtoy 1.11 `(lambda (string start end buffer &aux (ptr 0) (state nil))
628 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
629     (type simple-string string)
630 rtoy 1.6 (type kernel:index start end ptr)
631 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) buffer)
632     (ignorable state))
633 rtoy 1.11 (dotimes (i (- end start) (values buffer ptr))
634     (declare (type kernel:index i))
635     (char-to-octets ,extfmt (schar string (+ start i)) state
636     (lambda (b)
637     (when (= ptr (length buffer))
638     (setq buffer (adjust-array buffer (* 2 ptr))))
639     (setf (aref buffer (1- (incf ptr))) b))))))
640 rtoy 1.3
641 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
642     (buffer nil bufferp))
643 rtoy 1.1 (declare (type string string)
644 rtoy 1.6 (type kernel:index start)
645     (type (or kernel:index null) end)
646 rtoy 1.3 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
647 rtoy 1.6 (let* ((buffer (or buffer (make-array (length string)
648 rtoy 1.3 :element-type '(unsigned-byte 8)))))
649 rtoy 1.6 (multiple-value-bind (buffer ptr)
650     (lisp::with-array-data ((string string) (start start) (end end))
651     (funcall (ef-string-to-octets external-format)
652     string start end buffer))
653     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
654 rtoy 1.3
655     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
656 rtoy 1.11 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil))
657 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
658     (type (simple-array (unsigned-byte 8) (*)) octets)
659 rtoy 1.6 (type kernel:index end count)
660 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
661     (type simple-string string)
662     (ignorable state))
663     (loop until (>= ptr end)
664     do (when (= pos (length string))
665     (setq string (adjust-array string (* 2 pos))))
666 rtoy 1.11 (setf (schar string (incf pos))
667     (octets-to-char ,extfmt state count
668 rtoy 1.6 (aref octets (incf ptr)) ;;@@ EOF??
669     (lambda (n) (decf ptr n))))
670 rtoy 1.5 finally (return (values string (1+ pos))))))
671 rtoy 1.1
672 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
673     (string nil stringp))
674 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
675 rtoy 1.6 (type kernel:index start)
676     (type (or kernel:index null) end)
677 rtoy 1.3 (type (or simple-string null) string))
678     (multiple-value-bind (string pos)
679 rtoy 1.6 (funcall (ef-octets-to-string external-format)
680 rtoy 1.3 octets (1- start) (1- (or end (length octets)))
681     (or string (make-string (length octets))))
682 rtoy 1.2 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
683 rtoy 1.1
684    
685    
686 rtoy 1.3 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
687     `(lambda (string start end result &aux (ptr 0) (state nil))
688     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
689     (type simple-string string)
690 rtoy 1.6 (type kernel:index start end ptr)
691 rtoy 1.3 (type simple-base-string result)
692     (ignorable state))
693 rtoy 1.11 (dotimes (i (- end start) (values result ptr))
694     (declare (type kernel:index i))
695     (char-to-octets ,extfmt (schar string (+ start i)) state
696     (lambda (b)
697     (when (= ptr (length result))
698     (setq result (adjust-array result (* 2 ptr))))
699     (setf (aref result (1- (incf ptr)))
700     (code-char b)))))))
701 rtoy 1.3
702     (defun string-encode (string external-format &optional (start 0) end)
703 rtoy 1.6 "Encode the given String using External-Format and return a new
704     string. The characters of the new string are the octets of the
705     encoded result, with each octet converted to a character via
706     code-char. This is the inverse to String-Decode"
707     (when (zerop (length string))
708     (return-from string-encode string))
709 rtoy 1.3 (multiple-value-bind (result ptr)
710     (lisp::with-array-data ((string string) (start start) (end end))
711 rtoy 1.6 (funcall (ef-encode external-format) string start end
712 rtoy 1.3 (make-string (length string) :element-type 'base-char)))
713     (lisp::shrink-vector result ptr)))
714    
715     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
716     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
717     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
718     (type simple-string string)
719 rtoy 1.6 (type kernel:index end count)
720 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
721     (type simple-string result)
722     (ignorable state))
723     (loop until (>= ptr end)
724     ;; increasing size of result shouldn't ever be necessary, unless
725     ;; someone implements an encoding smaller than the source string...
726 rtoy 1.11 do (setf (schar result (incf pos))
727     (octets-to-char ,extfmt state count
728     ;; note the need to return NIL for EOF
729     (if (= (1+ ptr) (length string))
730     nil
731     (char-code (char string (incf ptr))))
732     (lambda (n) (decf ptr n))))
733 rtoy 1.5 finally (return (values result (1+ pos))))))
734 rtoy 1.3
735     (defun string-decode (string external-format &optional (start 0) end)
736 rtoy 1.6 "Decode String using the given External-Format and return the new
737     string. The input string is treated as if it were an array of
738     octets, where the char-code of each character is the octet. This is
739     the inverse of String-Encode."
740     (when (zerop (length string))
741     (return-from string-decode string))
742 rtoy 1.3 (multiple-value-bind (result pos)
743     (lisp::with-array-data ((string string) (start start) (end end))
744 rtoy 1.6 (funcall (ef-decode external-format)
745 rtoy 1.3 string (1- start) (1- end) (make-string (length string))))
746 rtoy 1.5 (lisp::shrink-vector result pos)))
747 rtoy 1.11
748    
749     (defun set-system-external-format (terminal &optional filenames)
750     (unless (find-external-format terminal)
751     (error "Can't find external-format ~S." terminal))
752     (setf (stream-external-format sys:*stdin*) terminal
753     (stream-external-format sys:*stdout*) terminal
754     (stream-external-format sys:*stderr*) terminal)
755     (when (lisp::fd-stream-p sys:*tty*)
756     (setf (stream-external-format sys:*tty*) terminal))
757     (when filenames
758     (unless (find-external-format filenames)
759     (error "Can't find external-format ~S." filenames))
760     (when (and unix::*filename-encoding*
761     (not (eq unix::*filename-encoding* filenames)))
762     (cerror "Change it anyway."
763     "The external-format for encoding filenames is already set.")
764     (setq unix::*filename-encoding* filenames)))
765     t)
766    
767    
768     ;; Despite its name, this doesn't actually compile anything at all. What it
769     ;; does is expand into a lambda expression that can be compiled by the file
770     ;; compiler.
771     (defmacro precompile-ef-slot (ef slot)
772     (let* ((ef (find-external-format ef)))
773     ;; if there's no lambda expression available, flush it and regenerate
774     (unless (and (aref (ef-cache ef) slot)
775     (function-lambda-expression (aref (ef-cache ef) slot)))
776     (setf (aref (ef-cache ef) slot) nil)
777     (ecase slot
778     (#.+ef-cin+ (lisp::%ef-cin ef))
779     (#.+ef-cout+ (lisp::%ef-cout ef))
780     (#.+ef-sout+ (lisp::%ef-sout ef))
781     (#.+ef-os+ (%ef-octets-to-string ef))
782     (#.+ef-so+ (%ef-string-to-octets ef))
783     (#.+ef-en+ (%ef-encode ef))
784     (#.+ef-de+ (%ef-decode ef))))
785     `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
786     ,(subst (ef-name ef) ef
787     (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5