/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (hide annotations)
Mon Mar 8 20:43:20 2010 UTC (4 years, 1 month ago) by rtoy
Branch: MAIN
CVS Tags: snapshot-2010-03
Changes since 1.20: +5 -1 lines
Bind *readtable* to the standard table when compiling the external
format in case the user has set a non-standard readtable that can't
process the external format.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.21 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.21 2010/03/08 20:43:20 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16     (export '(string-to-octets octets-to-string *default-external-format*
17 rtoy 1.15 string-encode string-decode set-system-external-format
18     +replacement-character-code+))
19 rtoy 1.1
20     (defvar *default-external-format* :iso8859-1)
21    
22 rtoy 1.3 (defvar *external-formats* (make-hash-table :test 'equal))
23 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
24    
25 rtoy 1.15 (vm::defenum (:prefix "+EF-" :suffix "+" :start 1)
26     str ; string length
27     cin ; input a character
28     cout ; output a character
29     sin ; input string
30     sout ; output string
31     os ; octets to string
32     so ; string to octets
33     en ; encode
34     de ; decode
35     flush ; flush state
36 rtoy 1.20 copy-state ; copy state
37 rtoy 1.15 max)
38 rtoy 1.6
39     ;; Unicode replacement character U+FFFD
40     (defconstant +replacement-character-code+ #xFFFD)
41 rtoy 1.3
42 rtoy 1.1 (define-condition external-format-not-implemented (error)
43     ()
44     (:report
45     (lambda (condition stream)
46     (declare (ignore condition))
47     (format stream "Attempting unimplemented external-format I/O."))))
48    
49     (defun %efni (a b c d)
50     (declare (ignore a b c d))
51     (error 'external-format-not-implemented))
52    
53 rtoy 1.5 (defstruct efx
54 rtoy 1.15 ;;
55     ;; Function to read a sequence of octets from a stream and convert
56     ;; them a code point.
57 rtoy 1.5 (octets-to-code #'%efni :type function :read-only t)
58 rtoy 1.15 ;;
59     ;; Function to convert a codepoint to a sequence of octets and write
60     ;; them to an output stream.
61 rtoy 1.5 (code-to-octets #'%efni :type function :read-only t)
62 rtoy 1.15 ;;
63     ;; Function (or NIL) to force any state in the external format to be
64     ;; flushed to the output stream. A NIL value means the external
65     ;; format does not need to do anything special.
66     (flush-state nil :type (or null function) :read-only t)
67 rtoy 1.16 ;;
68     ;; Function to copy the state of the external-format. If NIL, then
69     ;; there is no state to be copied.
70     (copy-state nil :type (or null function) :read-only t)
71 rtoy 1.6 (cache nil :type (or null simple-vector))
72 rtoy 1.15 ;;
73     ;; Minimum number of octets needed to form a codepoint
74 rtoy 1.6 (min 1 :type kernel:index :read-only t)
75 rtoy 1.15 ;;
76 rtoy 1.17 ;; Maximum number of octets needed to form a codepoint.
77 rtoy 1.6 (max 1 :type kernel:index :read-only t))
78 rtoy 1.5
79 rtoy 1.1 (defstruct (external-format
80     (:conc-name ef-)
81     (:print-function %print-external-format)
82 rtoy 1.5 (:constructor make-external-format (name efx composingp
83     &optional slots slotd)))
84 rtoy 1.3 (name (ext:required-argument) :type (or keyword cons) :read-only t)
85 rtoy 1.5 (efx (ext:required-argument) :type efx :read-only t)
86 rtoy 1.3 (composingp (ext:required-argument) :type boolean :read-only t)
87 rtoy 1.1 (slots #() :type simple-vector :read-only t)
88 rtoy 1.5 (slotd nil :type list :read-only t))
89 rtoy 1.1
90     (defun %print-external-format (ef stream depth)
91     (declare (ignore depth))
92     (print-unreadable-object (ef stream :type t :identity t)
93     (princ (ef-name ef) stream)))
94    
95 rtoy 1.6 (defun %intern-ef (ef)
96 rtoy 1.3 (setf (gethash (ef-name ef) *external-formats*) ef))
97    
98 rtoy 1.16 (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
99 rtoy 1.15 ef-cache ef-min-octets ef-max-octets))
100 rtoy 1.5
101     (defun ef-octets-to-code (ef)
102     (efx-octets-to-code (ef-efx ef)))
103    
104     (defun ef-code-to-octets (ef)
105     (efx-code-to-octets (ef-efx ef)))
106    
107 rtoy 1.15 (defun ef-flush-state (ef)
108     (efx-flush-state (ef-efx ef)))
109    
110 rtoy 1.16 (defun ef-copy-state (ef)
111     (efx-copy-state (ef-efx ef)))
112    
113 rtoy 1.5 (defun ef-cache (ef)
114     (efx-cache (ef-efx ef)))
115    
116 rtoy 1.6 (defun ef-min-octets (ef)
117     (efx-min (ef-efx ef)))
118    
119     (defun ef-max-octets (ef)
120     (efx-max (ef-efx ef)))
121    
122     (eval-when (:compile-toplevel :load-toplevel :execute)
123     (defun %merge-slots (old new)
124     (let* ((pos (length old))
125     (tmp (mapcar (lambda (x)
126     (let* ((name (if (consp x) (first x) x))
127     (init (if (consp x) (second x) nil))
128     (list (if (consp x) (nthcdr 2 x) nil))
129     (prev (assoc name old))
130     (posn (if prev (second prev) (1- (incf pos)))))
131     (list name posn init (getf list :type t))))
132     new)))
133     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
134     :key #'second))))
135    
136     ;;; DEFINE-EXTERNAL-FORMAT -- Public
137     ;;;
138 rtoy 1.16 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
139     ;;; flush-state copy-state
140     ;;;
141 rtoy 1.6 ;;; Define a new external format. Min/Max/Size are the minimum and
142     ;;; maximum number of octets that make up a character (:size N is just
143     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
144     ;;; similar to defstruct.
145     ;;;
146     ;;; name (base) (&rest slots)
147     ;;; Define an external format based on a previously-defined external
148     ;;; format, Base. The slot names used in Slots must match those in Base.
149     ;;;
150     ;;; octets-to-code (state input unput &rest vars)
151     ;;; Defines a form to be used by the external format to convert
152     ;;; octets to a code point. State is a form that can be used by the
153 rtoy 1.16 ;;; body to access the state of the stream. Input is a form that
154     ;;; can be used to read one octet from the input stream. (It can be
155     ;;; called as many times as needed.) Similarly, Unput is a form to
156     ;;; put back one octet to the input stream. Vars is a list of vars
157     ;;; that need to be defined for any symbols used within the form.
158 rtoy 1.6 ;;;
159     ;;; This should return two values: the code and the number of octets
160     ;;; read to form the code.
161     ;;;
162     ;;; code-to-octets (code state output &rest vars)
163     ;;; Defines a form to be used by the external format to convert a
164     ;;; code point to octets for output. Code is the code point to be
165     ;;; converted. State is a form to access the current value of the
166     ;;; stream's state variable. Output is a form that writes one octet
167     ;;; to the output stream.
168     ;;;
169 rtoy 1.15 ;;; flush-state (state output &rest vars)
170     ;;; Defines a form to be used by the external format to flush out
171     ;;; any state when an output stream is closed. Similar to
172     ;;; CODE-TO-OCTETS, but there is no code.
173     ;;;
174 rtoy 1.16 ;;; copy-state (state &rest vars)
175     ;;; Defines a form to copy any state needed by the external format.
176     ;;; This should probably be a deep copy so that if the original
177     ;;; state is modified, the copy is not.
178     ;;;
179 rtoy 1.6 ;;; Note: external-formats work on code-points, not
180     ;;; characters, so that the entire 31 bit ISO-10646 range can be
181     ;;; used internally regardless of the size of a character recognized
182     ;;; by Lisp and external formats can be useful to people who want to
183     ;;; process characters outside the Lisp range (see
184     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
185     ;;;
186     (defmacro define-external-format (name (&rest args) (&rest slots)
187 rtoy 1.16 &optional octets-to-code code-to-octets
188     flush-state copy-state)
189 rtoy 1.6 (when (and (oddp (length args)) (not (= (length args) 1)))
190     (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
191     (let* ((tmp (gensym))
192     (min (if (evenp (length args))
193     (or (getf args :min) (getf args :size) 1)
194     1))
195     (max (if (evenp (length args))
196     (or (getf args :max) (getf args :size) 6)
197     6))
198     (base (if (= (length args) 1)
199     (find-external-format (first args))
200     nil))
201     (bslotd (if base (ef-slotd base) nil))
202     (slotd (%merge-slots bslotd slots))
203     (slotb (loop for slot in slotd
204     collect `(,(first slot)
205     `(the ,',(fourth slot)
206     ;; IDENTITY is here to protect against SETF
207     (identity (svref %slots% ,',(second slot))))))))
208 rtoy 1.3 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
209 rtoy 1.6 `(lambda (,state ,input ,unput)
210     (declare (ignorable ,state ,input ,unput)
211 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
212 rtoy 1.6 (let (,@',slotb
213     (,input `(the (or (unsigned-byte 8) null) ,,input))
214 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
215     ,body)))
216     (code-to-octets ((code state output &rest vars) body)
217 rtoy 1.6 `(lambda (,',tmp ,state ,output)
218     (declare (ignorable ,state ,output)
219 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
220 rtoy 1.6 (let (,@',slotb
221     (,code ',code)
222 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
223 rtoy 1.16 `(let ((,',code (the lisp:codepoint ,,',tmp)))
224 rtoy 1.3 (declare (ignorable ,',code))
225 rtoy 1.15 ,,body))))
226     (flush-state ((state output &rest vars) body)
227     `(lambda (,state ,output)
228     (declare (ignorable ,state ,output))
229     (let (,@',slotb
230     ,@(loop for var in vars collect `(,var (gensym))))
231 rtoy 1.16 ,body)))
232     (copy-state ((state &rest vars) body)
233     `(lambda (,state)
234     (declare (ignorable ,state))
235     (let (,@',slotb
236     ,@(loop for var in vars collect `(,var (gensym))))
237 rtoy 1.15 ,body))))
238 rtoy 1.6 (%intern-ef (make-external-format ,name
239     ,(if base
240     `(ef-efx (find-external-format ,(ef-name base)))
241     `(make-efx :octets-to-code ,octets-to-code
242     :code-to-octets ,code-to-octets
243 rtoy 1.16 :flush-state ,flush-state
244     :copy-state ,copy-state
245 rtoy 1.6 :cache (make-array +ef-max+
246     :initial-element nil)
247     :min ,(min min max) :max ,(max min max)))
248     nil
249     (let* ,(loop for x in slotd
250     collect (list (first x) (third x)))
251     (vector ,@(mapcar #'first slotd)))
252     ',slotd)))))
253 rtoy 1.3
254 rtoy 1.6 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
255     ;;;
256     ;;; A composing-external-format differs from an (ordinary) external-format
257     ;;; in that it translates characters (really codepoints, of course) into
258     ;;; other characters, rather than translating between characters and binary
259     ;;; octets. They have to be composed with a non-composing external-format
260     ;;; to be of any use.
261     ;;;
262 rtoy 1.16 ;;;
263     ;;; name (&key min max size) input output
264     ;;; Defines a new composing external format. The parameters Min,
265     ;;; Max, and Size are the same as for defining an external format.
266     ;;; The parameters input and output are forms to handle input and
267     ;;; output.
268     ;;;
269     ;;; input (state input unput &rest vars)
270     ;;; Defines a form to be used by the composing external format when
271     ;;; reading input to transform a codepoint (or sequence of
272     ;;; codepoints) to another. State is a form that can be used by the
273     ;;; body to access the state of the external format. Input is a
274     ;;; form that can be used to read one code point from the input
275     ;;; stream. (Input returns two values, the codepoint and the number
276     ;;; of octets read.) It may be called as many times as needed.
277     ;;; This returns two values: the codepoint of the character (or NIL)
278     ;;; and the number of octets read. Similarly, Unput is a form to
279     ;;; put back one octet to the input stream. Vars is a list of vars
280     ;;; that need to be defined for any symbols used within the form.
281     ;;;
282     ;;; This should return two values: the code and the number of octets
283     ;;; read to form the code.
284     ;;;
285     ;;; output (code state output &rest vars)
286     ;;; Defines a form to be used by the composing external format to
287     ;;; convert a code point to octets for output. Code is the code
288     ;;; point to be converted. State is a form to access the current
289     ;;; value of the stream's state variable. Output is a form that
290     ;;; writes one octet to the output stream.
291    
292 rtoy 1.6 (defmacro define-composing-external-format (name (&key min max size)
293     input output)
294     (let ((tmp (gensym))
295     (min (or min size 1))
296     (max (or max size 1)))
297 rtoy 1.3 `(macrolet ((input ((state input unput &rest vars) body)
298 rtoy 1.6 `(lambda (,state ,input ,unput)
299     (declare (ignorable ,state ,input ,unput)
300 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
301 rtoy 1.16 (let ((,input `(the (values (or lisp:codepoint null)
302 rtoy 1.6 kernel:index)
303 rtoy 1.3 ,,input))
304     ,@(loop for var in vars collect `(,var (gensym))))
305     ,body)))
306     (output ((code state output &rest vars) body)
307 rtoy 1.6 `(lambda (,',tmp ,state ,output)
308     (declare (ignorable ,state ,output)
309 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
310     (let ((,code ',code)
311     ,@(loop for var in vars collect `(,var (gensym))))
312 rtoy 1.16 `(let ((,',code (the lisp:codepoint ,,',tmp)))
313 rtoy 1.3 (declare (ignorable ,',code))
314     ,,body)))))
315 rtoy 1.6 (%intern-ef (make-external-format ,name
316     (make-efx :octets-to-code ,input
317     :code-to-octets ,output
318     :min ,(min min max) :max ,(max min max))
319     t
320     #() '())))))
321 rtoy 1.1
322     (defun load-external-format-aliases ()
323 rtoy 1.6 (let ((*package* (find-package "KEYWORD"))
324     (unix::*filename-encoding* :iso8859-1))
325 rtoy 1.11 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
326     :external-format :iso8859-1)
327 rtoy 1.1 (when stm
328 rtoy 1.6 (do ((alias (read stm nil stm) (read stm nil stm))
329     (value (read stm nil stm) (read stm nil stm)))
330     ((or (eq alias stm) (eq value stm))
331     (unless (eq alias stm)
332     (warn "External-format aliases file ends early.")))
333 rtoy 1.11 (if (and (keywordp alias) (or (keywordp value)
334     (and (consp value)
335     (every #'keywordp value))))
336 rtoy 1.6 (setf (gethash alias *external-format-aliases*) value)
337     (warn "Bad entry in external-format aliases file: ~S => ~S."
338     alias value)))))))
339 rtoy 1.1
340 rtoy 1.3 (defun %find-external-format (name)
341 rtoy 1.6 ;; avoid loading files, etc., early in the boot sequence
342     (when (or (eq name :iso8859-1)
343     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
344     (return-from %find-external-format
345     (gethash :iso8859-1 *external-formats*)))
346    
347 rtoy 1.1 (when (zerop (hash-table-count *external-format-aliases*))
348     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
349     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
350     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
351     (load-external-format-aliases))
352    
353     (do ((tmp (gethash name *external-format-aliases*)
354     (gethash tmp *external-format-aliases*))
355     (cnt 0 (1+ cnt)))
356     ((or (null tmp) (= cnt 50))
357     (unless (null tmp)
358     (error "External-format aliasing depth exceeded.")))
359     (setq name tmp))
360    
361     (or (gethash name *external-formats*)
362 rtoy 1.11 (and (consp name) (find-external-format name))
363 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
364 rtoy 1.12 (lisp::*enable-package-locked-errors* nil)
365 rtoy 1.21 ;; The standard readtable is what we want to use when
366     ;; compiling the external format, in case the user
367     ;; has changed *readtable* in some way.
368     (*readtable* (copy-readtable nil))
369 rtoy 1.12 (s (open (format nil "ext-formats:~(~A~).lisp" name)
370     :if-does-not-exist nil :external-format :iso8859-1)))
371     (when s
372     (null (nth-value 1 (ext:compile-from-stream s)))))
373 rtoy 1.3 (gethash name *external-formats*))))
374    
375     (defun %composed-ef-name (a b)
376     (if (consp a) (append a (list b)) (list a b)))
377    
378 rtoy 1.6 (defun %compose-external-formats (a b)
379 rtoy 1.3 (when (ef-composingp a)
380     (error "~S is a Composing-External-Format." (ef-name a)))
381     (unless (ef-composingp b)
382     (error "~S is not a Composing-External-Format." (ef-name b)))
383 rtoy 1.5 (make-external-format
384     (%composed-ef-name (ef-name a) (ef-name b))
385     (make-efx
386 rtoy 1.6 :octets-to-code (lambda (state input unput)
387 rtoy 1.11 (let ((nstate (gensym "STATE-")))
388     `(let ((,nstate ,state))
389     (when (null ,nstate)
390     (setq ,nstate (setf ,state (cons nil nil))))
391     ,(funcall (ef-octets-to-code b) `(car ,nstate)
392     (funcall (ef-octets-to-code a)
393     `(cdr ,nstate) input unput)
394     unput))))
395 rtoy 1.6 :code-to-octets (lambda (code state output)
396 rtoy 1.11 (let ((nstate (gensym "STATE-")))
397     `(let ((,nstate ,state))
398     (when (null ,nstate)
399     (setq ,nstate (setf ,state (cons nil nil))))
400     ,(funcall (ef-code-to-octets b) code `(car ,nstate)
401     `(lambda (x)
402     ,(funcall (ef-code-to-octets a)
403     'x `(cdr ,nstate) output))))))
404 rtoy 1.6 :cache (make-array +ef-max+ :initial-element nil)
405     :min (* (ef-min-octets a) (ef-min-octets b))
406     :max (* (ef-max-octets a) (ef-max-octets b)))
407 rtoy 1.5 nil #() '()))
408 rtoy 1.3
409     (defun find-external-format (name &optional (error-p t))
410     (when (external-format-p name)
411     (return-from find-external-format name))
412    
413     (or (if (consp name) (every #'keywordp name) (keywordp name))
414     (error "~S is not a valid external format name." name))
415 rtoy 1.1
416 rtoy 1.3 (when (eq name :default)
417     (setq name *default-external-format*))
418    
419     (when (and (consp name) (not (cdr name)))
420     (setq name (car name)))
421    
422 rtoy 1.6 (flet ((not-found ()
423     (when (equal *default-external-format* name)
424     (setq *default-external-format* :iso8859-1))
425     (if error-p (error "External format ~S not found." name) nil)))
426     (if (consp name)
427     (let ((efs (mapcar #'%find-external-format name)))
428     (if (member nil efs)
429     (not-found)
430     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
431     (or (gethash name *external-formats*)
432     (%intern-ef (reduce #'%compose-external-formats efs))))))
433     (or (%find-external-format name) (not-found)))))
434    
435     (defun flush-external-formats ()
436     (maphash (lambda (name ef)
437     (declare (ignore name))
438     (fill (ef-cache ef) nil))
439     *external-formats*))
440    
441     (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
442    
443     (defun invert-table (table)
444     (declare (type (or (simple-array (unsigned-byte 31) *)
445     (simple-array (unsigned-byte 16) *))
446     table)
447     (optimize (speed 3) (space 0) (safety 0) (debug 0)
448     (ext:inhibit-warnings 3)))
449     (or (gethash table *.table-inverse.*)
450     (let* ((mbits (if (= (array-total-size table) 128) 7 8))
451     (lbits (cond ((> (array-total-size table) 256) 3)
452     ((< (array-total-size table) 100) 6)
453     (t 5)))
454     (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
455     :element-type '(unsigned-byte 16)
456     :initial-element #xFFFF))
457     (mvec (make-array 0 :element-type '(unsigned-byte 16)))
458     (lvec (make-array 0 :element-type '(unsigned-byte 16)))
459     (width (array-dimension table 0))
460     (power (1- (array-rank table)))
461     (base (if (= width 94) 1 0))
462     hx mx lx)
463     (assert (and (< power 2) (<= width 256)))
464     (dotimes (i (array-total-size table))
465     (declare (type (integer 0 (#.array-dimension-limit)) i))
466     (let ((tmp i) (val (row-major-aref table i)) (z 0))
467     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
468     (type (unsigned-byte 16) z))
469     (unless (= val #xFFFE)
470     (when (plusp power)
471     (multiple-value-bind (x y) (floor tmp width)
472     (setq tmp x)
473     (setq z (logior z (ash (the (integer 0 255) (+ y base))
474     (the (integer 0 24)
475     (* 8 power)))))))
476     (setq hx (ash val (- 0 mbits lbits)))
477     (when (= (aref hvec hx) #xFFFF)
478     (setf (aref hvec hx) (length mvec))
479     (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
480     :element-type '(unsigned-byte 16)
481     :initial-element #xFFFF)))
482     (replace tmp mvec)
483     (setq mvec tmp)))
484     (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
485 rtoy 1.7 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
486     (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
487 rtoy 1.6 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
488     :element-type '(unsigned-byte 16)
489     :initial-element #xFFFF)))
490     (replace tmp lvec)
491     (setq lvec tmp)))
492     (setq lx (logand val (lognot (ash -1 lbits))))
493 rtoy 1.7 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
494 rtoy 1.6 (logior z (+ tmp base))))))
495     (setf (gethash table *.table-inverse.*)
496     (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
497     :hvec hvec :mvec mvec :lvec lvec)))))
498    
499     (declaim (inline get-inverse))
500     (defun get-inverse (ntrie code)
501 rtoy 1.11 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
502 rtoy 1.6 (let ((n (lisp::qref ntrie code)))
503     (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
504     (if (= m #xFFFF) nil m)))))
505    
506 rtoy 1.1
507     (define-condition void-external-format (error)
508     ()
509     (:report
510     (lambda (condition stream)
511     (declare (ignore condition))
512     (format stream "Attempting I/O through void external-format."))))
513    
514 rtoy 1.6 (define-external-format :void (:size 0) ()
515 rtoy 1.1 (octets-to-code (state input unput)
516 rtoy 1.3 `(error 'void-external-format))
517 rtoy 1.1 (code-to-octets (code state output)
518 rtoy 1.3 `(error 'void-external-format)))
519 rtoy 1.1
520 rtoy 1.6 (define-external-format :iso8859-1 (:size 1) ()
521 rtoy 1.1 (octets-to-code (state input unput)
522 rtoy 1.3 `(values ,input 1))
523 rtoy 1.1 (code-to-octets (code state output)
524 rtoy 1.3 `(,output (if (> ,code 255) #x3F ,code))))
525 rtoy 1.1
526 rtoy 1.6 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
527     ;;;
528     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
529     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
530     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
531     ;;; to read or write texts containing characters not supported by your Lisp,
532     ;;; these macros can be used instead.
533 rtoy 1.1 (defmacro octets-to-codepoint (external-format state count input unput)
534 rtoy 1.6 (let ((tmp1 (gensym)) (tmp2 (gensym))
535     (ef (find-external-format external-format)))
536     `(multiple-value-bind (,tmp1 ,tmp2)
537     ,(funcall (ef-octets-to-code ef) state input unput)
538     (setf ,count (the kernel:index ,tmp2))
539 rtoy 1.16 (the (or lisp:codepoint null) ,tmp1))))
540 rtoy 1.1
541     (defmacro codepoint-to-octets (external-format code state output)
542 rtoy 1.6 (let ((ef (find-external-format external-format)))
543     (funcall (ef-code-to-octets ef) code state output)))
544 rtoy 1.3
545    
546    
547     (defvar *ef-base* +ef-max+)
548     (defvar *ef-extensions* '())
549    
550     (defun ensure-cache (ef id reqd)
551     (let ((base (or (getf *ef-extensions* id)
552     (setf (getf *ef-extensions* id)
553     (prog1 *ef-base* (incf *ef-base* reqd))))))
554     (when (< (length (ef-cache ef)) (+ base reqd))
555 rtoy 1.5 (setf (efx-cache (ef-efx ef))
556 rtoy 1.3 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
557     base))
558    
559 rtoy 1.6 ;;; DEF-EF-MACRO -- Public
560     ;;;
561     ;;;
562 rtoy 1.3 (defmacro def-ef-macro (name (ef id reqd idx) body)
563 rtoy 1.11 (let* ((tmp1 (gensym))
564     (tmp2 (gensym))
565     (blknm (nth-value 1 (lisp::valid-function-name-p name)))
566     (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
567 rtoy 1.6 `(progn
568     (defun ,%name (,ef)
569     (let* ((,tmp1 (find-external-format ,ef))
570     (,tmp2 ,(if (eq id 'lisp::lisp)
571     idx
572     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
573     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
574     (setf (aref (ef-cache ,tmp1) ,tmp2)
575 rtoy 1.20 (let ((*compile-print* nil)
576     ;; Set default format when we compile so we
577     ;; can see compiler messages. If we don't,
578     ;; we run into a problem that we might be
579     ;; changing the default format while we're
580     ;; compiling, and we don't know how to output
581     ;; the compiler messages.
582     #|(*default-external-format* :iso8859-1)|#)
583     (compile nil `(lambda (%slots%)
584     (declare (ignorable %slots%))
585     (block ,',blknm
586     ,,body))))))
587 rtoy 1.6 (ef-slots ,tmp1))))
588     (declaim (inline ,name))
589     (defun ,name (,tmp1)
590     (let ((,tmp2 (load-time-value (cons nil nil))))
591     (when (eq ,tmp1 :default)
592     (setq ,tmp1 *default-external-format*))
593     (if (eq ,tmp1 (car ,tmp2))
594     (cdr ,tmp2)
595     (setf (car ,tmp2) ,tmp1
596     (cdr ,tmp2) (,%name ,tmp1))))))))
597 rtoy 1.1
598    
599    
600 rtoy 1.6 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
601     ;;;
602     ;;; Read and write one character through an external-format
603     ;;;
604 rtoy 1.1 (defmacro octets-to-char (external-format state count input unput)
605 rtoy 1.11 (let ((nstate (gensym)))
606     `(let ((,nstate ,state))
607     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
608     (if (car ,nstate)
609 rtoy 1.13 ;; Return the trailing surrgate. Must set count to 0 to
610     ;; tell the stream code we didn't consume any octets!
611 rtoy 1.11 (prog1 (the character (car ,nstate))
612     (setf (car ,nstate) nil ,count 0))
613     (let ((code (octets-to-codepoint ,external-format
614     (cdr ,nstate) ,count ,input ,unput)))
615 rtoy 1.16 (declare (type lisp:codepoint code))
616 rtoy 1.11 ;;@@ on non-Unicode builds, limit to 8-bit chars
617     ;;@@ if unicode-bootstrap, can't use #\u+fffd
618 rtoy 1.16 (cond ((or (lisp::surrogatep code) (> code #x10FFFF))
619 rtoy 1.13 #-(and unicode (not unicode-bootstrap)) #\?
620     #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
621     #+unicode
622 rtoy 1.11 ((> code #xFFFF)
623     (multiple-value-bind (hi lo) (surrogates code)
624     (setf (car ,nstate) lo)
625 rtoy 1.6 hi))
626 rtoy 1.11 (t (code-char code))))))))
627 rtoy 1.6
628 rtoy 1.1 (defmacro char-to-octets (external-format char state output)
629 rtoy 1.11 (let ((nchar (gensym))
630     (nstate (gensym))
631     (wryte (gensym))
632     (ch (gensym)))
633     `(let ((,nchar ,char)
634     (,nstate ,state))
635     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
636 rtoy 1.14 (if (lisp::surrogatep (char-code ,nchar) :high)
637 rtoy 1.11 (setf (car ,nstate) ,nchar)
638     (flet ((,wryte (,ch)
639     (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
640     ,output)))
641     (declare (dynamic-extent #',wryte))
642     (if (car ,nstate)
643 rtoy 1.14 (prog1
644     ;; Invalid surrogate sequences get replaced with
645     ;; the replacement character.
646     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
647     (surrogates-to-codepoint (car ,nstate) ,nchar)
648     +replacement-character-code+))
649 rtoy 1.11 (setf (car ,nstate) nil))
650 rtoy 1.14 ;; A lone trailing (low) surrogate gets replaced with
651     ;; the replacement character.
652     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
653     +replacement-character-code+
654     (char-code ,nchar)))))))))
655 rtoy 1.6
656 rtoy 1.15 (defmacro flush-state (external-format state output)
657     (let* ((ef (find-external-format external-format))
658     (f (ef-flush-state ef)))
659     (when f
660     (funcall f state output))))
661    
662 rtoy 1.16 (defmacro copy-state (external-format state)
663     (let* ((ef (find-external-format external-format))
664     (f (ef-copy-state ef)))
665     (when f
666     (funcall f state))))
667    
668 rtoy 1.3 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
669 rtoy 1.11 `(lambda (string start end buffer &aux (ptr 0) (state nil))
670 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
671     (type simple-string string)
672 rtoy 1.6 (type kernel:index start end ptr)
673 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) buffer)
674     (ignorable state))
675 rtoy 1.11 (dotimes (i (- end start) (values buffer ptr))
676     (declare (type kernel:index i))
677     (char-to-octets ,extfmt (schar string (+ start i)) state
678     (lambda (b)
679     (when (= ptr (length buffer))
680     (setq buffer (adjust-array buffer (* 2 ptr))))
681     (setf (aref buffer (1- (incf ptr))) b))))))
682 rtoy 1.3
683 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
684     (buffer nil bufferp))
685 rtoy 1.16 "Convert String to octets using the specified External-format. The
686     string is bounded by Start (defaulting to 0) and End (defaulting to
687     the end of the string. If Buffer is given, the octets are stored
688     there. If not, a new buffer is created."
689 rtoy 1.1 (declare (type string string)
690 rtoy 1.6 (type kernel:index start)
691     (type (or kernel:index null) end)
692 rtoy 1.3 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
693 rtoy 1.6 (let* ((buffer (or buffer (make-array (length string)
694 rtoy 1.3 :element-type '(unsigned-byte 8)))))
695 rtoy 1.6 (multiple-value-bind (buffer ptr)
696     (lisp::with-array-data ((string string) (start start) (end end))
697     (funcall (ef-string-to-octets external-format)
698     string start end buffer))
699     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
700 rtoy 1.3
701     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
702 rtoy 1.20 `(lambda (octets ptr end state string s-start s-end &aux (pos s-start) (count 0) (last-octet 0))
703     (declare (optimize (speed 3) (safety 0) #|(space 0) (debug 0)|#)
704 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) octets)
705 rtoy 1.20 (type kernel:index pos end count last-octet s-start s-end)
706 rtoy 1.18 (type (integer -1 (#.array-dimension-limit)) ptr)
707 rtoy 1.3 (type simple-string string)
708     (ignorable state))
709 rtoy 1.18 (catch 'end-of-octets
710 rtoy 1.20 (loop while (< pos s-end)
711     do (setf (schar string pos)
712     (octets-to-char ,extfmt state count
713     (if (>= ptr end)
714     (throw 'end-of-octets nil)
715     (aref octets (incf ptr)))
716     (lambda (n) (decf ptr n))))
717 rtoy 1.18 (incf pos)
718     (incf last-octet count)))
719 rtoy 1.20 (values string pos last-octet state)))
720 rtoy 1.1
721 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
722 rtoy 1.20 (string nil stringp)
723     (s-start 0) (s-end nil s-end-p)
724     (state nil))
725 rtoy 1.16 "Octets-to-string converts an array of octets in Octets to a string
726     according to the specified External-format. The array of octets is
727     bounded by Start (defaulting ot 0) and End (defaulting to the end of
728 rtoy 1.20 the array. If String is not given, a new string is created. If
729     String is given, the converted octets are stored in String, starting
730     at S-Start (defaulting to the 0) and ending at S-End (defaulting to
731     the length of String). If the string is not large enough to hold
732     all of characters, then some octets will not be converted. A State
733     may also be specified; this is used as the state of the external
734     format.
735    
736     Four values are returned: the string, the number of characters read,
737     the number of octets actually consumed and the new state of the
738     external format."
739 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
740 rtoy 1.20 (type kernel:index start s-start)
741 rtoy 1.6 (type (or kernel:index null) end)
742 rtoy 1.3 (type (or simple-string null) string))
743 rtoy 1.20 (let ((s-end (if s-end-p
744     s-end
745     (if stringp
746     (length string)
747     (length octets)))))
748     (multiple-value-bind (string pos last-octet new-state)
749     (funcall (ef-octets-to-string external-format)
750     octets (1- start) (1- (or end (length octets)))
751     state
752     (or string (make-string (length octets)))
753     s-start s-end)
754     (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
755 rtoy 1.1
756    
757    
758 rtoy 1.3 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
759     `(lambda (string start end result &aux (ptr 0) (state nil))
760     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
761     (type simple-string string)
762 rtoy 1.6 (type kernel:index start end ptr)
763 rtoy 1.3 (type simple-base-string result)
764     (ignorable state))
765 rtoy 1.11 (dotimes (i (- end start) (values result ptr))
766     (declare (type kernel:index i))
767     (char-to-octets ,extfmt (schar string (+ start i)) state
768     (lambda (b)
769     (when (= ptr (length result))
770     (setq result (adjust-array result (* 2 ptr))))
771     (setf (aref result (1- (incf ptr)))
772     (code-char b)))))))
773 rtoy 1.3
774     (defun string-encode (string external-format &optional (start 0) end)
775 rtoy 1.6 "Encode the given String using External-Format and return a new
776     string. The characters of the new string are the octets of the
777     encoded result, with each octet converted to a character via
778     code-char. This is the inverse to String-Decode"
779     (when (zerop (length string))
780     (return-from string-encode string))
781 rtoy 1.3 (multiple-value-bind (result ptr)
782     (lisp::with-array-data ((string string) (start start) (end end))
783 rtoy 1.6 (funcall (ef-encode external-format) string start end
784 rtoy 1.3 (make-string (length string) :element-type 'base-char)))
785     (lisp::shrink-vector result ptr)))
786    
787     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
788     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
789     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
790     (type simple-string string)
791 rtoy 1.6 (type kernel:index end count)
792 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
793     (type simple-string result)
794     (ignorable state))
795     (loop until (>= ptr end)
796     ;; increasing size of result shouldn't ever be necessary, unless
797     ;; someone implements an encoding smaller than the source string...
798 rtoy 1.11 do (setf (schar result (incf pos))
799     (octets-to-char ,extfmt state count
800     ;; note the need to return NIL for EOF
801     (if (= (1+ ptr) (length string))
802     nil
803     (char-code (char string (incf ptr))))
804     (lambda (n) (decf ptr n))))
805 rtoy 1.5 finally (return (values result (1+ pos))))))
806 rtoy 1.3
807     (defun string-decode (string external-format &optional (start 0) end)
808 rtoy 1.6 "Decode String using the given External-Format and return the new
809     string. The input string is treated as if it were an array of
810     octets, where the char-code of each character is the octet. This is
811     the inverse of String-Encode."
812     (when (zerop (length string))
813     (return-from string-decode string))
814 rtoy 1.3 (multiple-value-bind (result pos)
815     (lisp::with-array-data ((string string) (start start) (end end))
816 rtoy 1.6 (funcall (ef-decode external-format)
817 rtoy 1.3 string (1- start) (1- end) (make-string (length string))))
818 rtoy 1.5 (lisp::shrink-vector result pos)))
819 rtoy 1.11
820    
821     (defun set-system-external-format (terminal &optional filenames)
822 rtoy 1.19 "Change the external format of the standard streams to Terminal.
823     The standard streams are sys::*stdin*, sys::*stdout*, and
824     sys::*stderr*, which are normally the input and/or output streams
825     for *standard-input* and *standard-output*. Also sets sys::*tty*
826     (normally *terminal-io* to the given external format. If the
827     optional argument Filenames is gvien, then the filename encoding is
828     set to the specified format."
829 rtoy 1.11 (unless (find-external-format terminal)
830     (error "Can't find external-format ~S." terminal))
831     (setf (stream-external-format sys:*stdin*) terminal
832     (stream-external-format sys:*stdout*) terminal
833     (stream-external-format sys:*stderr*) terminal)
834     (when (lisp::fd-stream-p sys:*tty*)
835     (setf (stream-external-format sys:*tty*) terminal))
836     (when filenames
837     (unless (find-external-format filenames)
838     (error "Can't find external-format ~S." filenames))
839     (when (and unix::*filename-encoding*
840     (not (eq unix::*filename-encoding* filenames)))
841     (cerror "Change it anyway."
842     "The external-format for encoding filenames is already set.")
843     (setq unix::*filename-encoding* filenames)))
844     t)
845    
846    
847     ;; Despite its name, this doesn't actually compile anything at all. What it
848     ;; does is expand into a lambda expression that can be compiled by the file
849     ;; compiler.
850     (defmacro precompile-ef-slot (ef slot)
851     (let* ((ef (find-external-format ef)))
852     ;; if there's no lambda expression available, flush it and regenerate
853     (unless (and (aref (ef-cache ef) slot)
854     (function-lambda-expression (aref (ef-cache ef) slot)))
855     (setf (aref (ef-cache ef) slot) nil)
856     (ecase slot
857     (#.+ef-cin+ (lisp::%ef-cin ef))
858     (#.+ef-cout+ (lisp::%ef-cout ef))
859     (#.+ef-sout+ (lisp::%ef-sout ef))
860     (#.+ef-os+ (%ef-octets-to-string ef))
861     (#.+ef-so+ (%ef-string-to-octets ef))
862     (#.+ef-en+ (%ef-encode ef))
863     (#.+ef-de+ (%ef-decode ef))))
864     `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
865     ,(subst (ef-name ef) ef
866     (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5