/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20.4.1 - (hide annotations)
Mon Feb 8 17:15:47 2010 UTC (4 years, 2 months ago) by rtoy
Branch: intl-branch
Changes since 1.20: +3 -1 lines
Add (intl:textdomain "cmucl") to the files to set the textdomain.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.20.4.1 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.20.4.1 2010/02/08 17:15:47 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16 rtoy 1.20.4.1 (intl:textdomain "cmucl")
17    
18 rtoy 1.1 (export '(string-to-octets octets-to-string *default-external-format*
19 rtoy 1.15 string-encode string-decode set-system-external-format
20     +replacement-character-code+))
21 rtoy 1.1
22     (defvar *default-external-format* :iso8859-1)
23    
24 rtoy 1.3 (defvar *external-formats* (make-hash-table :test 'equal))
25 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
26    
27 rtoy 1.15 (vm::defenum (:prefix "+EF-" :suffix "+" :start 1)
28     str ; string length
29     cin ; input a character
30     cout ; output a character
31     sin ; input string
32     sout ; output string
33     os ; octets to string
34     so ; string to octets
35     en ; encode
36     de ; decode
37     flush ; flush state
38 rtoy 1.20 copy-state ; copy state
39 rtoy 1.15 max)
40 rtoy 1.6
41     ;; Unicode replacement character U+FFFD
42     (defconstant +replacement-character-code+ #xFFFD)
43 rtoy 1.3
44 rtoy 1.1 (define-condition external-format-not-implemented (error)
45     ()
46     (:report
47     (lambda (condition stream)
48     (declare (ignore condition))
49     (format stream "Attempting unimplemented external-format I/O."))))
50    
51     (defun %efni (a b c d)
52     (declare (ignore a b c d))
53     (error 'external-format-not-implemented))
54    
55 rtoy 1.5 (defstruct efx
56 rtoy 1.15 ;;
57     ;; Function to read a sequence of octets from a stream and convert
58     ;; them a code point.
59 rtoy 1.5 (octets-to-code #'%efni :type function :read-only t)
60 rtoy 1.15 ;;
61     ;; Function to convert a codepoint to a sequence of octets and write
62     ;; them to an output stream.
63 rtoy 1.5 (code-to-octets #'%efni :type function :read-only t)
64 rtoy 1.15 ;;
65     ;; Function (or NIL) to force any state in the external format to be
66     ;; flushed to the output stream. A NIL value means the external
67     ;; format does not need to do anything special.
68     (flush-state nil :type (or null function) :read-only t)
69 rtoy 1.16 ;;
70     ;; Function to copy the state of the external-format. If NIL, then
71     ;; there is no state to be copied.
72     (copy-state nil :type (or null function) :read-only t)
73 rtoy 1.6 (cache nil :type (or null simple-vector))
74 rtoy 1.15 ;;
75     ;; Minimum number of octets needed to form a codepoint
76 rtoy 1.6 (min 1 :type kernel:index :read-only t)
77 rtoy 1.15 ;;
78 rtoy 1.17 ;; Maximum number of octets needed to form a codepoint.
79 rtoy 1.6 (max 1 :type kernel:index :read-only t))
80 rtoy 1.5
81 rtoy 1.1 (defstruct (external-format
82     (:conc-name ef-)
83     (:print-function %print-external-format)
84 rtoy 1.5 (:constructor make-external-format (name efx composingp
85     &optional slots slotd)))
86 rtoy 1.3 (name (ext:required-argument) :type (or keyword cons) :read-only t)
87 rtoy 1.5 (efx (ext:required-argument) :type efx :read-only t)
88 rtoy 1.3 (composingp (ext:required-argument) :type boolean :read-only t)
89 rtoy 1.1 (slots #() :type simple-vector :read-only t)
90 rtoy 1.5 (slotd nil :type list :read-only t))
91 rtoy 1.1
92     (defun %print-external-format (ef stream depth)
93     (declare (ignore depth))
94     (print-unreadable-object (ef stream :type t :identity t)
95     (princ (ef-name ef) stream)))
96    
97 rtoy 1.6 (defun %intern-ef (ef)
98 rtoy 1.3 (setf (gethash (ef-name ef) *external-formats*) ef))
99    
100 rtoy 1.16 (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
101 rtoy 1.15 ef-cache ef-min-octets ef-max-octets))
102 rtoy 1.5
103     (defun ef-octets-to-code (ef)
104     (efx-octets-to-code (ef-efx ef)))
105    
106     (defun ef-code-to-octets (ef)
107     (efx-code-to-octets (ef-efx ef)))
108    
109 rtoy 1.15 (defun ef-flush-state (ef)
110     (efx-flush-state (ef-efx ef)))
111    
112 rtoy 1.16 (defun ef-copy-state (ef)
113     (efx-copy-state (ef-efx ef)))
114    
115 rtoy 1.5 (defun ef-cache (ef)
116     (efx-cache (ef-efx ef)))
117    
118 rtoy 1.6 (defun ef-min-octets (ef)
119     (efx-min (ef-efx ef)))
120    
121     (defun ef-max-octets (ef)
122     (efx-max (ef-efx ef)))
123    
124     (eval-when (:compile-toplevel :load-toplevel :execute)
125     (defun %merge-slots (old new)
126     (let* ((pos (length old))
127     (tmp (mapcar (lambda (x)
128     (let* ((name (if (consp x) (first x) x))
129     (init (if (consp x) (second x) nil))
130     (list (if (consp x) (nthcdr 2 x) nil))
131     (prev (assoc name old))
132     (posn (if prev (second prev) (1- (incf pos)))))
133     (list name posn init (getf list :type t))))
134     new)))
135     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
136     :key #'second))))
137    
138     ;;; DEFINE-EXTERNAL-FORMAT -- Public
139     ;;;
140 rtoy 1.16 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
141     ;;; flush-state copy-state
142     ;;;
143 rtoy 1.6 ;;; Define a new external format. Min/Max/Size are the minimum and
144     ;;; maximum number of octets that make up a character (:size N is just
145     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
146     ;;; similar to defstruct.
147     ;;;
148     ;;; name (base) (&rest slots)
149     ;;; Define an external format based on a previously-defined external
150     ;;; format, Base. The slot names used in Slots must match those in Base.
151     ;;;
152     ;;; octets-to-code (state input unput &rest vars)
153     ;;; Defines a form to be used by the external format to convert
154     ;;; octets to a code point. State is a form that can be used by the
155 rtoy 1.16 ;;; body to access the state of the stream. Input is a form that
156     ;;; can be used to read one octet from the input stream. (It can be
157     ;;; called as many times as needed.) Similarly, Unput is a form to
158     ;;; put back one octet to the input stream. Vars is a list of vars
159     ;;; that need to be defined for any symbols used within the form.
160 rtoy 1.6 ;;;
161     ;;; This should return two values: the code and the number of octets
162     ;;; read to form the code.
163     ;;;
164     ;;; code-to-octets (code state output &rest vars)
165     ;;; Defines a form to be used by the external format to convert a
166     ;;; code point to octets for output. Code is the code point to be
167     ;;; converted. State is a form to access the current value of the
168     ;;; stream's state variable. Output is a form that writes one octet
169     ;;; to the output stream.
170     ;;;
171 rtoy 1.15 ;;; flush-state (state output &rest vars)
172     ;;; Defines a form to be used by the external format to flush out
173     ;;; any state when an output stream is closed. Similar to
174     ;;; CODE-TO-OCTETS, but there is no code.
175     ;;;
176 rtoy 1.16 ;;; copy-state (state &rest vars)
177     ;;; Defines a form to copy any state needed by the external format.
178     ;;; This should probably be a deep copy so that if the original
179     ;;; state is modified, the copy is not.
180     ;;;
181 rtoy 1.6 ;;; Note: external-formats work on code-points, not
182     ;;; characters, so that the entire 31 bit ISO-10646 range can be
183     ;;; used internally regardless of the size of a character recognized
184     ;;; by Lisp and external formats can be useful to people who want to
185     ;;; process characters outside the Lisp range (see
186     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
187     ;;;
188     (defmacro define-external-format (name (&rest args) (&rest slots)
189 rtoy 1.16 &optional octets-to-code code-to-octets
190     flush-state copy-state)
191 rtoy 1.6 (when (and (oddp (length args)) (not (= (length args) 1)))
192     (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
193     (let* ((tmp (gensym))
194     (min (if (evenp (length args))
195     (or (getf args :min) (getf args :size) 1)
196     1))
197     (max (if (evenp (length args))
198     (or (getf args :max) (getf args :size) 6)
199     6))
200     (base (if (= (length args) 1)
201     (find-external-format (first args))
202     nil))
203     (bslotd (if base (ef-slotd base) nil))
204     (slotd (%merge-slots bslotd slots))
205     (slotb (loop for slot in slotd
206     collect `(,(first slot)
207     `(the ,',(fourth slot)
208     ;; IDENTITY is here to protect against SETF
209     (identity (svref %slots% ,',(second slot))))))))
210 rtoy 1.3 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
211 rtoy 1.6 `(lambda (,state ,input ,unput)
212     (declare (ignorable ,state ,input ,unput)
213 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
214 rtoy 1.6 (let (,@',slotb
215     (,input `(the (or (unsigned-byte 8) null) ,,input))
216 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
217     ,body)))
218     (code-to-octets ((code state output &rest vars) body)
219 rtoy 1.6 `(lambda (,',tmp ,state ,output)
220     (declare (ignorable ,state ,output)
221 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
222 rtoy 1.6 (let (,@',slotb
223     (,code ',code)
224 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
225 rtoy 1.16 `(let ((,',code (the lisp:codepoint ,,',tmp)))
226 rtoy 1.3 (declare (ignorable ,',code))
227 rtoy 1.15 ,,body))))
228     (flush-state ((state output &rest vars) body)
229     `(lambda (,state ,output)
230     (declare (ignorable ,state ,output))
231     (let (,@',slotb
232     ,@(loop for var in vars collect `(,var (gensym))))
233 rtoy 1.16 ,body)))
234     (copy-state ((state &rest vars) body)
235     `(lambda (,state)
236     (declare (ignorable ,state))
237     (let (,@',slotb
238     ,@(loop for var in vars collect `(,var (gensym))))
239 rtoy 1.15 ,body))))
240 rtoy 1.6 (%intern-ef (make-external-format ,name
241     ,(if base
242     `(ef-efx (find-external-format ,(ef-name base)))
243     `(make-efx :octets-to-code ,octets-to-code
244     :code-to-octets ,code-to-octets
245 rtoy 1.16 :flush-state ,flush-state
246     :copy-state ,copy-state
247 rtoy 1.6 :cache (make-array +ef-max+
248     :initial-element nil)
249     :min ,(min min max) :max ,(max min max)))
250     nil
251     (let* ,(loop for x in slotd
252     collect (list (first x) (third x)))
253     (vector ,@(mapcar #'first slotd)))
254     ',slotd)))))
255 rtoy 1.3
256 rtoy 1.6 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
257     ;;;
258     ;;; A composing-external-format differs from an (ordinary) external-format
259     ;;; in that it translates characters (really codepoints, of course) into
260     ;;; other characters, rather than translating between characters and binary
261     ;;; octets. They have to be composed with a non-composing external-format
262     ;;; to be of any use.
263     ;;;
264 rtoy 1.16 ;;;
265     ;;; name (&key min max size) input output
266     ;;; Defines a new composing external format. The parameters Min,
267     ;;; Max, and Size are the same as for defining an external format.
268     ;;; The parameters input and output are forms to handle input and
269     ;;; output.
270     ;;;
271     ;;; input (state input unput &rest vars)
272     ;;; Defines a form to be used by the composing external format when
273     ;;; reading input to transform a codepoint (or sequence of
274     ;;; codepoints) to another. State is a form that can be used by the
275     ;;; body to access the state of the external format. Input is a
276     ;;; form that can be used to read one code point from the input
277     ;;; stream. (Input returns two values, the codepoint and the number
278     ;;; of octets read.) It may be called as many times as needed.
279     ;;; This returns two values: the codepoint of the character (or NIL)
280     ;;; and the number of octets read. Similarly, Unput is a form to
281     ;;; put back one octet to the input stream. Vars is a list of vars
282     ;;; that need to be defined for any symbols used within the form.
283     ;;;
284     ;;; This should return two values: the code and the number of octets
285     ;;; read to form the code.
286     ;;;
287     ;;; output (code state output &rest vars)
288     ;;; Defines a form to be used by the composing external format to
289     ;;; convert a code point to octets for output. Code is the code
290     ;;; point to be converted. State is a form to access the current
291     ;;; value of the stream's state variable. Output is a form that
292     ;;; writes one octet to the output stream.
293    
294 rtoy 1.6 (defmacro define-composing-external-format (name (&key min max size)
295     input output)
296     (let ((tmp (gensym))
297     (min (or min size 1))
298     (max (or max size 1)))
299 rtoy 1.3 `(macrolet ((input ((state input unput &rest vars) body)
300 rtoy 1.6 `(lambda (,state ,input ,unput)
301     (declare (ignorable ,state ,input ,unput)
302 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
303 rtoy 1.16 (let ((,input `(the (values (or lisp:codepoint null)
304 rtoy 1.6 kernel:index)
305 rtoy 1.3 ,,input))
306     ,@(loop for var in vars collect `(,var (gensym))))
307     ,body)))
308     (output ((code state output &rest vars) body)
309 rtoy 1.6 `(lambda (,',tmp ,state ,output)
310     (declare (ignorable ,state ,output)
311 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
312     (let ((,code ',code)
313     ,@(loop for var in vars collect `(,var (gensym))))
314 rtoy 1.16 `(let ((,',code (the lisp:codepoint ,,',tmp)))
315 rtoy 1.3 (declare (ignorable ,',code))
316     ,,body)))))
317 rtoy 1.6 (%intern-ef (make-external-format ,name
318     (make-efx :octets-to-code ,input
319     :code-to-octets ,output
320     :min ,(min min max) :max ,(max min max))
321     t
322     #() '())))))
323 rtoy 1.1
324     (defun load-external-format-aliases ()
325 rtoy 1.6 (let ((*package* (find-package "KEYWORD"))
326     (unix::*filename-encoding* :iso8859-1))
327 rtoy 1.11 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
328     :external-format :iso8859-1)
329 rtoy 1.1 (when stm
330 rtoy 1.6 (do ((alias (read stm nil stm) (read stm nil stm))
331     (value (read stm nil stm) (read stm nil stm)))
332     ((or (eq alias stm) (eq value stm))
333     (unless (eq alias stm)
334     (warn "External-format aliases file ends early.")))
335 rtoy 1.11 (if (and (keywordp alias) (or (keywordp value)
336     (and (consp value)
337     (every #'keywordp value))))
338 rtoy 1.6 (setf (gethash alias *external-format-aliases*) value)
339     (warn "Bad entry in external-format aliases file: ~S => ~S."
340     alias value)))))))
341 rtoy 1.1
342 rtoy 1.3 (defun %find-external-format (name)
343 rtoy 1.6 ;; avoid loading files, etc., early in the boot sequence
344     (when (or (eq name :iso8859-1)
345     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
346     (return-from %find-external-format
347     (gethash :iso8859-1 *external-formats*)))
348    
349 rtoy 1.1 (when (zerop (hash-table-count *external-format-aliases*))
350     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
351     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
352     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
353     (load-external-format-aliases))
354    
355     (do ((tmp (gethash name *external-format-aliases*)
356     (gethash tmp *external-format-aliases*))
357     (cnt 0 (1+ cnt)))
358     ((or (null tmp) (= cnt 50))
359     (unless (null tmp)
360     (error "External-format aliasing depth exceeded.")))
361     (setq name tmp))
362    
363     (or (gethash name *external-formats*)
364 rtoy 1.11 (and (consp name) (find-external-format name))
365 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
366 rtoy 1.12 (lisp::*enable-package-locked-errors* nil)
367     (s (open (format nil "ext-formats:~(~A~).lisp" name)
368     :if-does-not-exist nil :external-format :iso8859-1)))
369     (when s
370     (null (nth-value 1 (ext:compile-from-stream s)))))
371 rtoy 1.3 (gethash name *external-formats*))))
372    
373     (defun %composed-ef-name (a b)
374     (if (consp a) (append a (list b)) (list a b)))
375    
376 rtoy 1.6 (defun %compose-external-formats (a b)
377 rtoy 1.3 (when (ef-composingp a)
378     (error "~S is a Composing-External-Format." (ef-name a)))
379     (unless (ef-composingp b)
380     (error "~S is not a Composing-External-Format." (ef-name b)))
381 rtoy 1.5 (make-external-format
382     (%composed-ef-name (ef-name a) (ef-name b))
383     (make-efx
384 rtoy 1.6 :octets-to-code (lambda (state input unput)
385 rtoy 1.11 (let ((nstate (gensym "STATE-")))
386     `(let ((,nstate ,state))
387     (when (null ,nstate)
388     (setq ,nstate (setf ,state (cons nil nil))))
389     ,(funcall (ef-octets-to-code b) `(car ,nstate)
390     (funcall (ef-octets-to-code a)
391     `(cdr ,nstate) input unput)
392     unput))))
393 rtoy 1.6 :code-to-octets (lambda (code state output)
394 rtoy 1.11 (let ((nstate (gensym "STATE-")))
395     `(let ((,nstate ,state))
396     (when (null ,nstate)
397     (setq ,nstate (setf ,state (cons nil nil))))
398     ,(funcall (ef-code-to-octets b) code `(car ,nstate)
399     `(lambda (x)
400     ,(funcall (ef-code-to-octets a)
401     'x `(cdr ,nstate) output))))))
402 rtoy 1.6 :cache (make-array +ef-max+ :initial-element nil)
403     :min (* (ef-min-octets a) (ef-min-octets b))
404     :max (* (ef-max-octets a) (ef-max-octets b)))
405 rtoy 1.5 nil #() '()))
406 rtoy 1.3
407     (defun find-external-format (name &optional (error-p t))
408     (when (external-format-p name)
409     (return-from find-external-format name))
410    
411     (or (if (consp name) (every #'keywordp name) (keywordp name))
412     (error "~S is not a valid external format name." name))
413 rtoy 1.1
414 rtoy 1.3 (when (eq name :default)
415     (setq name *default-external-format*))
416    
417     (when (and (consp name) (not (cdr name)))
418     (setq name (car name)))
419    
420 rtoy 1.6 (flet ((not-found ()
421     (when (equal *default-external-format* name)
422     (setq *default-external-format* :iso8859-1))
423     (if error-p (error "External format ~S not found." name) nil)))
424     (if (consp name)
425     (let ((efs (mapcar #'%find-external-format name)))
426     (if (member nil efs)
427     (not-found)
428     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
429     (or (gethash name *external-formats*)
430     (%intern-ef (reduce #'%compose-external-formats efs))))))
431     (or (%find-external-format name) (not-found)))))
432    
433     (defun flush-external-formats ()
434     (maphash (lambda (name ef)
435     (declare (ignore name))
436     (fill (ef-cache ef) nil))
437     *external-formats*))
438    
439     (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
440    
441     (defun invert-table (table)
442     (declare (type (or (simple-array (unsigned-byte 31) *)
443     (simple-array (unsigned-byte 16) *))
444     table)
445     (optimize (speed 3) (space 0) (safety 0) (debug 0)
446     (ext:inhibit-warnings 3)))
447     (or (gethash table *.table-inverse.*)
448     (let* ((mbits (if (= (array-total-size table) 128) 7 8))
449     (lbits (cond ((> (array-total-size table) 256) 3)
450     ((< (array-total-size table) 100) 6)
451     (t 5)))
452     (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
453     :element-type '(unsigned-byte 16)
454     :initial-element #xFFFF))
455     (mvec (make-array 0 :element-type '(unsigned-byte 16)))
456     (lvec (make-array 0 :element-type '(unsigned-byte 16)))
457     (width (array-dimension table 0))
458     (power (1- (array-rank table)))
459     (base (if (= width 94) 1 0))
460     hx mx lx)
461     (assert (and (< power 2) (<= width 256)))
462     (dotimes (i (array-total-size table))
463     (declare (type (integer 0 (#.array-dimension-limit)) i))
464     (let ((tmp i) (val (row-major-aref table i)) (z 0))
465     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
466     (type (unsigned-byte 16) z))
467     (unless (= val #xFFFE)
468     (when (plusp power)
469     (multiple-value-bind (x y) (floor tmp width)
470     (setq tmp x)
471     (setq z (logior z (ash (the (integer 0 255) (+ y base))
472     (the (integer 0 24)
473     (* 8 power)))))))
474     (setq hx (ash val (- 0 mbits lbits)))
475     (when (= (aref hvec hx) #xFFFF)
476     (setf (aref hvec hx) (length mvec))
477     (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
478     :element-type '(unsigned-byte 16)
479     :initial-element #xFFFF)))
480     (replace tmp mvec)
481     (setq mvec tmp)))
482     (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
483 rtoy 1.7 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
484     (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
485 rtoy 1.6 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
486     :element-type '(unsigned-byte 16)
487     :initial-element #xFFFF)))
488     (replace tmp lvec)
489     (setq lvec tmp)))
490     (setq lx (logand val (lognot (ash -1 lbits))))
491 rtoy 1.7 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
492 rtoy 1.6 (logior z (+ tmp base))))))
493     (setf (gethash table *.table-inverse.*)
494     (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
495     :hvec hvec :mvec mvec :lvec lvec)))))
496    
497     (declaim (inline get-inverse))
498     (defun get-inverse (ntrie code)
499 rtoy 1.11 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
500 rtoy 1.6 (let ((n (lisp::qref ntrie code)))
501     (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
502     (if (= m #xFFFF) nil m)))))
503    
504 rtoy 1.1
505     (define-condition void-external-format (error)
506     ()
507     (:report
508     (lambda (condition stream)
509     (declare (ignore condition))
510     (format stream "Attempting I/O through void external-format."))))
511    
512 rtoy 1.6 (define-external-format :void (:size 0) ()
513 rtoy 1.1 (octets-to-code (state input unput)
514 rtoy 1.3 `(error 'void-external-format))
515 rtoy 1.1 (code-to-octets (code state output)
516 rtoy 1.3 `(error 'void-external-format)))
517 rtoy 1.1
518 rtoy 1.6 (define-external-format :iso8859-1 (:size 1) ()
519 rtoy 1.1 (octets-to-code (state input unput)
520 rtoy 1.3 `(values ,input 1))
521 rtoy 1.1 (code-to-octets (code state output)
522 rtoy 1.3 `(,output (if (> ,code 255) #x3F ,code))))
523 rtoy 1.1
524 rtoy 1.6 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
525     ;;;
526     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
527     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
528     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
529     ;;; to read or write texts containing characters not supported by your Lisp,
530     ;;; these macros can be used instead.
531 rtoy 1.1 (defmacro octets-to-codepoint (external-format state count input unput)
532 rtoy 1.6 (let ((tmp1 (gensym)) (tmp2 (gensym))
533     (ef (find-external-format external-format)))
534     `(multiple-value-bind (,tmp1 ,tmp2)
535     ,(funcall (ef-octets-to-code ef) state input unput)
536     (setf ,count (the kernel:index ,tmp2))
537 rtoy 1.16 (the (or lisp:codepoint null) ,tmp1))))
538 rtoy 1.1
539     (defmacro codepoint-to-octets (external-format code state output)
540 rtoy 1.6 (let ((ef (find-external-format external-format)))
541     (funcall (ef-code-to-octets ef) code state output)))
542 rtoy 1.3
543    
544    
545     (defvar *ef-base* +ef-max+)
546     (defvar *ef-extensions* '())
547    
548     (defun ensure-cache (ef id reqd)
549     (let ((base (or (getf *ef-extensions* id)
550     (setf (getf *ef-extensions* id)
551     (prog1 *ef-base* (incf *ef-base* reqd))))))
552     (when (< (length (ef-cache ef)) (+ base reqd))
553 rtoy 1.5 (setf (efx-cache (ef-efx ef))
554 rtoy 1.3 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
555     base))
556    
557 rtoy 1.6 ;;; DEF-EF-MACRO -- Public
558     ;;;
559     ;;;
560 rtoy 1.3 (defmacro def-ef-macro (name (ef id reqd idx) body)
561 rtoy 1.11 (let* ((tmp1 (gensym))
562     (tmp2 (gensym))
563     (blknm (nth-value 1 (lisp::valid-function-name-p name)))
564     (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
565 rtoy 1.6 `(progn
566     (defun ,%name (,ef)
567     (let* ((,tmp1 (find-external-format ,ef))
568     (,tmp2 ,(if (eq id 'lisp::lisp)
569     idx
570     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
571     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
572     (setf (aref (ef-cache ,tmp1) ,tmp2)
573 rtoy 1.20 (let ((*compile-print* nil)
574     ;; Set default format when we compile so we
575     ;; can see compiler messages. If we don't,
576     ;; we run into a problem that we might be
577     ;; changing the default format while we're
578     ;; compiling, and we don't know how to output
579     ;; the compiler messages.
580     #|(*default-external-format* :iso8859-1)|#)
581     (compile nil `(lambda (%slots%)
582     (declare (ignorable %slots%))
583     (block ,',blknm
584     ,,body))))))
585 rtoy 1.6 (ef-slots ,tmp1))))
586     (declaim (inline ,name))
587     (defun ,name (,tmp1)
588     (let ((,tmp2 (load-time-value (cons nil nil))))
589     (when (eq ,tmp1 :default)
590     (setq ,tmp1 *default-external-format*))
591     (if (eq ,tmp1 (car ,tmp2))
592     (cdr ,tmp2)
593     (setf (car ,tmp2) ,tmp1
594     (cdr ,tmp2) (,%name ,tmp1))))))))
595 rtoy 1.1
596    
597    
598 rtoy 1.6 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
599     ;;;
600     ;;; Read and write one character through an external-format
601     ;;;
602 rtoy 1.1 (defmacro octets-to-char (external-format state count input unput)
603 rtoy 1.11 (let ((nstate (gensym)))
604     `(let ((,nstate ,state))
605     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
606     (if (car ,nstate)
607 rtoy 1.13 ;; Return the trailing surrgate. Must set count to 0 to
608     ;; tell the stream code we didn't consume any octets!
609 rtoy 1.11 (prog1 (the character (car ,nstate))
610     (setf (car ,nstate) nil ,count 0))
611     (let ((code (octets-to-codepoint ,external-format
612     (cdr ,nstate) ,count ,input ,unput)))
613 rtoy 1.16 (declare (type lisp:codepoint code))
614 rtoy 1.11 ;;@@ on non-Unicode builds, limit to 8-bit chars
615     ;;@@ if unicode-bootstrap, can't use #\u+fffd
616 rtoy 1.16 (cond ((or (lisp::surrogatep code) (> code #x10FFFF))
617 rtoy 1.13 #-(and unicode (not unicode-bootstrap)) #\?
618     #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
619     #+unicode
620 rtoy 1.11 ((> code #xFFFF)
621     (multiple-value-bind (hi lo) (surrogates code)
622     (setf (car ,nstate) lo)
623 rtoy 1.6 hi))
624 rtoy 1.11 (t (code-char code))))))))
625 rtoy 1.6
626 rtoy 1.1 (defmacro char-to-octets (external-format char state output)
627 rtoy 1.11 (let ((nchar (gensym))
628     (nstate (gensym))
629     (wryte (gensym))
630     (ch (gensym)))
631     `(let ((,nchar ,char)
632     (,nstate ,state))
633     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
634 rtoy 1.14 (if (lisp::surrogatep (char-code ,nchar) :high)
635 rtoy 1.11 (setf (car ,nstate) ,nchar)
636     (flet ((,wryte (,ch)
637     (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
638     ,output)))
639     (declare (dynamic-extent #',wryte))
640     (if (car ,nstate)
641 rtoy 1.14 (prog1
642     ;; Invalid surrogate sequences get replaced with
643     ;; the replacement character.
644     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
645     (surrogates-to-codepoint (car ,nstate) ,nchar)
646     +replacement-character-code+))
647 rtoy 1.11 (setf (car ,nstate) nil))
648 rtoy 1.14 ;; A lone trailing (low) surrogate gets replaced with
649     ;; the replacement character.
650     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
651     +replacement-character-code+
652     (char-code ,nchar)))))))))
653 rtoy 1.6
654 rtoy 1.15 (defmacro flush-state (external-format state output)
655     (let* ((ef (find-external-format external-format))
656     (f (ef-flush-state ef)))
657     (when f
658     (funcall f state output))))
659    
660 rtoy 1.16 (defmacro copy-state (external-format state)
661     (let* ((ef (find-external-format external-format))
662     (f (ef-copy-state ef)))
663     (when f
664     (funcall f state))))
665    
666 rtoy 1.3 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
667 rtoy 1.11 `(lambda (string start end buffer &aux (ptr 0) (state nil))
668 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
669     (type simple-string string)
670 rtoy 1.6 (type kernel:index start end ptr)
671 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) buffer)
672     (ignorable state))
673 rtoy 1.11 (dotimes (i (- end start) (values buffer ptr))
674     (declare (type kernel:index i))
675     (char-to-octets ,extfmt (schar string (+ start i)) state
676     (lambda (b)
677     (when (= ptr (length buffer))
678     (setq buffer (adjust-array buffer (* 2 ptr))))
679     (setf (aref buffer (1- (incf ptr))) b))))))
680 rtoy 1.3
681 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
682     (buffer nil bufferp))
683 rtoy 1.16 "Convert String to octets using the specified External-format. The
684     string is bounded by Start (defaulting to 0) and End (defaulting to
685     the end of the string. If Buffer is given, the octets are stored
686     there. If not, a new buffer is created."
687 rtoy 1.1 (declare (type string string)
688 rtoy 1.6 (type kernel:index start)
689     (type (or kernel:index null) end)
690 rtoy 1.3 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
691 rtoy 1.6 (let* ((buffer (or buffer (make-array (length string)
692 rtoy 1.3 :element-type '(unsigned-byte 8)))))
693 rtoy 1.6 (multiple-value-bind (buffer ptr)
694     (lisp::with-array-data ((string string) (start start) (end end))
695     (funcall (ef-string-to-octets external-format)
696     string start end buffer))
697     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
698 rtoy 1.3
699     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
700 rtoy 1.20 `(lambda (octets ptr end state string s-start s-end &aux (pos s-start) (count 0) (last-octet 0))
701     (declare (optimize (speed 3) (safety 0) #|(space 0) (debug 0)|#)
702 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) octets)
703 rtoy 1.20 (type kernel:index pos end count last-octet s-start s-end)
704 rtoy 1.18 (type (integer -1 (#.array-dimension-limit)) ptr)
705 rtoy 1.3 (type simple-string string)
706     (ignorable state))
707 rtoy 1.18 (catch 'end-of-octets
708 rtoy 1.20 (loop while (< pos s-end)
709     do (setf (schar string pos)
710     (octets-to-char ,extfmt state count
711     (if (>= ptr end)
712     (throw 'end-of-octets nil)
713     (aref octets (incf ptr)))
714     (lambda (n) (decf ptr n))))
715 rtoy 1.18 (incf pos)
716     (incf last-octet count)))
717 rtoy 1.20 (values string pos last-octet state)))
718 rtoy 1.1
719 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
720 rtoy 1.20 (string nil stringp)
721     (s-start 0) (s-end nil s-end-p)
722     (state nil))
723 rtoy 1.16 "Octets-to-string converts an array of octets in Octets to a string
724     according to the specified External-format. The array of octets is
725     bounded by Start (defaulting ot 0) and End (defaulting to the end of
726 rtoy 1.20 the array. If String is not given, a new string is created. If
727     String is given, the converted octets are stored in String, starting
728     at S-Start (defaulting to the 0) and ending at S-End (defaulting to
729     the length of String). If the string is not large enough to hold
730     all of characters, then some octets will not be converted. A State
731     may also be specified; this is used as the state of the external
732     format.
733    
734     Four values are returned: the string, the number of characters read,
735     the number of octets actually consumed and the new state of the
736     external format."
737 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
738 rtoy 1.20 (type kernel:index start s-start)
739 rtoy 1.6 (type (or kernel:index null) end)
740 rtoy 1.3 (type (or simple-string null) string))
741 rtoy 1.20 (let ((s-end (if s-end-p
742     s-end
743     (if stringp
744     (length string)
745     (length octets)))))
746     (multiple-value-bind (string pos last-octet new-state)
747     (funcall (ef-octets-to-string external-format)
748     octets (1- start) (1- (or end (length octets)))
749     state
750     (or string (make-string (length octets)))
751     s-start s-end)
752     (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
753 rtoy 1.1
754    
755    
756 rtoy 1.3 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
757     `(lambda (string start end result &aux (ptr 0) (state nil))
758     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
759     (type simple-string string)
760 rtoy 1.6 (type kernel:index start end ptr)
761 rtoy 1.3 (type simple-base-string result)
762     (ignorable state))
763 rtoy 1.11 (dotimes (i (- end start) (values result ptr))
764     (declare (type kernel:index i))
765     (char-to-octets ,extfmt (schar string (+ start i)) state
766     (lambda (b)
767     (when (= ptr (length result))
768     (setq result (adjust-array result (* 2 ptr))))
769     (setf (aref result (1- (incf ptr)))
770     (code-char b)))))))
771 rtoy 1.3
772     (defun string-encode (string external-format &optional (start 0) end)
773 rtoy 1.6 "Encode the given String using External-Format and return a new
774     string. The characters of the new string are the octets of the
775     encoded result, with each octet converted to a character via
776     code-char. This is the inverse to String-Decode"
777     (when (zerop (length string))
778     (return-from string-encode string))
779 rtoy 1.3 (multiple-value-bind (result ptr)
780     (lisp::with-array-data ((string string) (start start) (end end))
781 rtoy 1.6 (funcall (ef-encode external-format) string start end
782 rtoy 1.3 (make-string (length string) :element-type 'base-char)))
783     (lisp::shrink-vector result ptr)))
784    
785     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
786     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
787     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
788     (type simple-string string)
789 rtoy 1.6 (type kernel:index end count)
790 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
791     (type simple-string result)
792     (ignorable state))
793     (loop until (>= ptr end)
794     ;; increasing size of result shouldn't ever be necessary, unless
795     ;; someone implements an encoding smaller than the source string...
796 rtoy 1.11 do (setf (schar result (incf pos))
797     (octets-to-char ,extfmt state count
798     ;; note the need to return NIL for EOF
799     (if (= (1+ ptr) (length string))
800     nil
801     (char-code (char string (incf ptr))))
802     (lambda (n) (decf ptr n))))
803 rtoy 1.5 finally (return (values result (1+ pos))))))
804 rtoy 1.3
805     (defun string-decode (string external-format &optional (start 0) end)
806 rtoy 1.6 "Decode String using the given External-Format and return the new
807     string. The input string is treated as if it were an array of
808     octets, where the char-code of each character is the octet. This is
809     the inverse of String-Encode."
810     (when (zerop (length string))
811     (return-from string-decode string))
812 rtoy 1.3 (multiple-value-bind (result pos)
813     (lisp::with-array-data ((string string) (start start) (end end))
814 rtoy 1.6 (funcall (ef-decode external-format)
815 rtoy 1.3 string (1- start) (1- end) (make-string (length string))))
816 rtoy 1.5 (lisp::shrink-vector result pos)))
817 rtoy 1.11
818    
819     (defun set-system-external-format (terminal &optional filenames)
820 rtoy 1.19 "Change the external format of the standard streams to Terminal.
821     The standard streams are sys::*stdin*, sys::*stdout*, and
822     sys::*stderr*, which are normally the input and/or output streams
823     for *standard-input* and *standard-output*. Also sets sys::*tty*
824     (normally *terminal-io* to the given external format. If the
825     optional argument Filenames is gvien, then the filename encoding is
826     set to the specified format."
827 rtoy 1.11 (unless (find-external-format terminal)
828     (error "Can't find external-format ~S." terminal))
829     (setf (stream-external-format sys:*stdin*) terminal
830     (stream-external-format sys:*stdout*) terminal
831     (stream-external-format sys:*stderr*) terminal)
832     (when (lisp::fd-stream-p sys:*tty*)
833     (setf (stream-external-format sys:*tty*) terminal))
834     (when filenames
835     (unless (find-external-format filenames)
836     (error "Can't find external-format ~S." filenames))
837     (when (and unix::*filename-encoding*
838     (not (eq unix::*filename-encoding* filenames)))
839     (cerror "Change it anyway."
840     "The external-format for encoding filenames is already set.")
841     (setq unix::*filename-encoding* filenames)))
842     t)
843    
844    
845     ;; Despite its name, this doesn't actually compile anything at all. What it
846     ;; does is expand into a lambda expression that can be compiled by the file
847     ;; compiler.
848     (defmacro precompile-ef-slot (ef slot)
849     (let* ((ef (find-external-format ef)))
850     ;; if there's no lambda expression available, flush it and regenerate
851     (unless (and (aref (ef-cache ef) slot)
852     (function-lambda-expression (aref (ef-cache ef) slot)))
853     (setf (aref (ef-cache ef) slot) nil)
854     (ecase slot
855     (#.+ef-cin+ (lisp::%ef-cin ef))
856     (#.+ef-cout+ (lisp::%ef-cout ef))
857     (#.+ef-sout+ (lisp::%ef-sout ef))
858     (#.+ef-os+ (%ef-octets-to-string ef))
859     (#.+ef-so+ (%ef-string-to-octets ef))
860     (#.+ef-en+ (%ef-encode ef))
861     (#.+ef-de+ (%ef-decode ef))))
862     `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
863     ,(subst (ef-name ef) ef
864     (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5