/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.30 - (show annotations)
Sat Jul 3 13:39:19 2010 UTC (3 years, 9 months ago) by rtoy
Branch: MAIN
Changes since 1.29: +7 -7 lines
code/extfmts.lisp:
o Add error parameter to flush-state in external format definition so
  we can handle errors when flushing the state to a stream.
o Add optional error parameter to flush-state macro.

code/fd-stream.lisp:
o For the case where an external format has flush method, EF-FLUSH was
  not calling it correctly.  Update so the output function actuall
  works.
o Add error handler to call to flush-state.
o For the case where an external format does not have a flush method,
  output the state value instead of a replacement character so the
  external format can handle any errors.
1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written by Paul Foley and has been placed in the public
5 ;;; domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.30 2010/07/03 13:39:19 rtoy Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Implementation of external-formats
13
14 (in-package "STREAM")
15
16 (intl:textdomain "cmucl")
17
18 (export '(string-to-octets octets-to-string *default-external-format*
19 string-encode string-decode set-system-external-format
20 +replacement-character-code+))
21
22 (defvar *default-external-format* :iso8859-1)
23
24 (defvar *external-formats* (make-hash-table :test 'equal))
25 (defvar *external-format-aliases* (make-hash-table))
26
27 (vm::defenum (:prefix "+EF-" :suffix "+" :start 1)
28 str ; string length
29 cin ; input a character
30 cout ; output a character
31 sin ; input string
32 sout ; output string
33 os ; octets to string
34 so ; string to octets
35 en ; encode
36 de ; decode
37 flush ; flush state
38 copy-state ; copy state
39 max)
40
41 ;; Unicode replacement character U+FFFD
42 (defconstant +replacement-character-code+ #xFFFD)
43
44 (define-condition external-format-not-implemented (error)
45 ()
46 (:report
47 (lambda (condition stream)
48 (declare (ignore condition))
49 (format stream (intl:gettext "Attempting unimplemented external-format I/O.")))))
50
51 (defun %efni (a b c d)
52 (declare (ignore a b c d))
53 (error 'external-format-not-implemented))
54
55 (defstruct efx
56 ;;
57 ;; Function to read a sequence of octets from a stream and convert
58 ;; them a code point.
59 (octets-to-code #'%efni :type function :read-only t)
60 ;;
61 ;; Function to convert a codepoint to a sequence of octets and write
62 ;; them to an output stream.
63 (code-to-octets #'%efni :type function :read-only t)
64 ;;
65 ;; Function (or NIL) to force any state in the external format to be
66 ;; flushed to the output stream. A NIL value means the external
67 ;; format does not need to do anything special.
68 (flush-state nil :type (or null function) :read-only t)
69 ;;
70 ;; Function to copy the state of the external-format. If NIL, then
71 ;; there is no state to be copied.
72 (copy-state nil :type (or null function) :read-only t)
73 (cache nil :type (or null simple-vector))
74 ;;
75 ;; Minimum number of octets needed to form a codepoint
76 (min 1 :type kernel:index :read-only t)
77 ;;
78 ;; Maximum number of octets needed to form a codepoint.
79 (max 1 :type kernel:index :read-only t))
80
81 (defstruct (external-format
82 (:conc-name ef-)
83 (:print-function %print-external-format)
84 (:constructor make-external-format (name efx composingp
85 &optional slots slotd)))
86 (name (ext:required-argument) :type (or keyword cons) :read-only t)
87 (efx (ext:required-argument) :type efx :read-only t)
88 (composingp (ext:required-argument) :type boolean :read-only t)
89 (slots #() :type simple-vector :read-only t)
90 (slotd nil :type list :read-only t))
91
92 (defun %print-external-format (ef stream depth)
93 (declare (ignore depth))
94 (print-unreadable-object (ef stream :type t :identity t)
95 (princ (ef-name ef) stream)))
96
97 (defun %intern-ef (ef)
98 (setf (gethash (ef-name ef) *external-formats*) ef))
99
100 (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
101 ef-cache ef-min-octets ef-max-octets))
102
103 (defun ef-octets-to-code (ef)
104 (efx-octets-to-code (ef-efx ef)))
105
106 (defun ef-code-to-octets (ef)
107 (efx-code-to-octets (ef-efx ef)))
108
109 (defun ef-flush-state (ef)
110 (efx-flush-state (ef-efx ef)))
111
112 (defun ef-copy-state (ef)
113 (efx-copy-state (ef-efx ef)))
114
115 (defun ef-cache (ef)
116 (efx-cache (ef-efx ef)))
117
118 (defun ef-min-octets (ef)
119 (efx-min (ef-efx ef)))
120
121 (defun ef-max-octets (ef)
122 (efx-max (ef-efx ef)))
123
124 (eval-when (:compile-toplevel :load-toplevel :execute)
125 (defun %merge-slots (old new)
126 (let* ((pos (length old))
127 (tmp (mapcar (lambda (x)
128 (let* ((name (if (consp x) (first x) x))
129 (init (if (consp x) (second x) nil))
130 (list (if (consp x) (nthcdr 2 x) nil))
131 (prev (assoc name old))
132 (posn (if prev (second prev) (1- (incf pos)))))
133 (list name posn init (getf list :type t))))
134 new)))
135 (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
136 :key #'second))))
137
138 ;;; DEFINE-EXTERNAL-FORMAT -- Public
139 ;;;
140 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
141 ;;; flush-state copy-state
142 ;;;
143 ;;; Define a new external format. Min/Max/Size are the minimum and
144 ;;; maximum number of octets that make up a character (:size N is just
145 ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
146 ;;; similar to defstruct.
147 ;;;
148 ;;; name (base) (&rest slots)
149 ;;; Define an external format based on a previously-defined external
150 ;;; format, Base. The slot names used in Slots must match those in Base.
151 ;;;
152 ;;; octets-to-code (state input unput &rest vars)
153 ;;; Defines a form to be used by the external format to convert
154 ;;; octets to a code point. State is a form that can be used by the
155 ;;; body to access the state of the stream. Input is a form that
156 ;;; can be used to read one octet from the input stream. (It can be
157 ;;; called as many times as needed.) Similarly, Unput is a form to
158 ;;; put back one octet to the input stream. Vars is a list of vars
159 ;;; that need to be defined for any symbols used within the form.
160 ;;;
161 ;;; This should return two values: the code and the number of octets
162 ;;; read to form the code.
163 ;;;
164 ;;; code-to-octets (code state output &rest vars)
165 ;;; Defines a form to be used by the external format to convert a
166 ;;; code point to octets for output. Code is the code point to be
167 ;;; converted. State is a form to access the current value of the
168 ;;; stream's state variable. Output is a form that writes one octet
169 ;;; to the output stream.
170 ;;;
171 ;;; flush-state (state output error &rest vars)
172 ;;; Defines a form to be used by the external format to flush out
173 ;;; any state when an output stream is closed. Similar to
174 ;;; CODE-TO-OCTETS, but there is no code.
175 ;;;
176 ;;; copy-state (state &rest vars)
177 ;;; Defines a form to copy any state needed by the external format.
178 ;;; This should probably be a deep copy so that if the original
179 ;;; state is modified, the copy is not.
180 ;;;
181 ;;; Note: external-formats work on code-points, not
182 ;;; characters, so that the entire 31 bit ISO-10646 range can be
183 ;;; used internally regardless of the size of a character recognized
184 ;;; by Lisp and external formats can be useful to people who want to
185 ;;; process characters outside the Lisp range (see
186 ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
187 ;;;
188 (defmacro define-external-format (name (&rest args) (&rest slots)
189 &optional octets-to-code code-to-octets
190 flush-state copy-state)
191 (when (and (oddp (length args)) (not (= (length args) 1)))
192 (warn (intl:gettext "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT.") args))
193 (let* ((tmp (gensym))
194 (min (if (evenp (length args))
195 (or (getf args :min) (getf args :size) 1)
196 1))
197 (max (if (evenp (length args))
198 (or (getf args :max) (getf args :size) 6)
199 6))
200 (base (if (= (length args) 1)
201 (find-external-format (first args))
202 nil))
203 (bslotd (if base (ef-slotd base) nil))
204 (slotd (%merge-slots bslotd slots))
205 (slotb (loop for slot in slotd
206 collect `(,(first slot)
207 `(the ,',(fourth slot)
208 ;; IDENTITY is here to protect against SETF
209 (identity (svref %slots% ,',(second slot))))))))
210 `(macrolet ((octets-to-code ((state input unput error &rest vars) body)
211 `(lambda (,state ,input ,unput ,error)
212 (declare (ignorable ,state ,input ,unput ,error)
213 (optimize (ext:inhibit-warnings 3)))
214 (let (,@',slotb
215 (,input `(the (or (unsigned-byte 8) null) ,,input))
216 ,@(loop for var in vars collect `(,var (gensym))))
217 ,body)))
218 (code-to-octets ((code state output error &rest vars) body)
219 `(lambda (,',tmp ,state ,output ,error)
220 (declare (ignorable ,state ,output ,error)
221 (optimize (ext:inhibit-warnings 3)))
222 (let (,@',slotb
223 (,code ',code)
224 ,@(loop for var in vars collect `(,var (gensym))))
225 `(let ((,',code (the lisp:codepoint ,,',tmp)))
226 (declare (ignorable ,',code))
227 ,,body))))
228 (flush-state ((state output error &rest vars) body)
229 `(lambda (,state ,output ,error)
230 (declare (ignorable ,state ,output ,error))
231 (let (,@',slotb
232 ,@(loop for var in vars collect `(,var (gensym))))
233 ,body)))
234 (copy-state ((state &rest vars) body)
235 `(lambda (,state)
236 (declare (ignorable ,state))
237 (let (,@',slotb
238 ,@(loop for var in vars collect `(,var (gensym))))
239 ,body))))
240 (%intern-ef (make-external-format ,name
241 ,(if base
242 `(ef-efx (find-external-format ,(ef-name base)))
243 `(make-efx :octets-to-code ,octets-to-code
244 :code-to-octets ,code-to-octets
245 :flush-state ,flush-state
246 :copy-state ,copy-state
247 :cache (make-array +ef-max+
248 :initial-element nil)
249 :min ,(min min max) :max ,(max min max)))
250 nil
251 (let* ,(loop for x in slotd
252 collect (list (first x) (third x)))
253 (vector ,@(mapcar #'first slotd)))
254 ',slotd)))))
255
256 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
257 ;;;
258 ;;; A composing-external-format differs from an (ordinary) external-format
259 ;;; in that it translates characters (really codepoints, of course) into
260 ;;; other characters, rather than translating between characters and binary
261 ;;; octets. They have to be composed with a non-composing external-format
262 ;;; to be of any use.
263 ;;;
264 ;;;
265 ;;; name (&key min max size) input output
266 ;;; Defines a new composing external format. The parameters Min,
267 ;;; Max, and Size are the same as for defining an external format.
268 ;;; The parameters input and output are forms to handle input and
269 ;;; output.
270 ;;;
271 ;;; input (state input unput &rest vars)
272 ;;; Defines a form to be used by the composing external format when
273 ;;; reading input to transform a codepoint (or sequence of
274 ;;; codepoints) to another. State is a form that can be used by the
275 ;;; body to access the state of the external format. Input is a
276 ;;; form that can be used to read one code point from the input
277 ;;; stream. (Input returns two values, the codepoint and the number
278 ;;; of octets read.) It may be called as many times as needed.
279 ;;; This returns two values: the codepoint of the character (or NIL)
280 ;;; and the number of octets read. Similarly, Unput is a form to
281 ;;; put back one octet to the input stream. Vars is a list of vars
282 ;;; that need to be defined for any symbols used within the form.
283 ;;;
284 ;;; This should return two values: the code and the number of octets
285 ;;; read to form the code.
286 ;;;
287 ;;; output (code state output &rest vars)
288 ;;; Defines a form to be used by the composing external format to
289 ;;; convert a code point to octets for output. Code is the code
290 ;;; point to be converted. State is a form to access the current
291 ;;; value of the stream's state variable. Output is a form that
292 ;;; writes one octet to the output stream.
293
294 (defmacro define-composing-external-format (name (&key min max size)
295 input output)
296 (let ((tmp (gensym))
297 (min (or min size 1))
298 (max (or max size 1)))
299 `(macrolet ((input ((state input unput &rest vars) body)
300 `(lambda (,state ,input ,unput)
301 (declare (ignorable ,state ,input ,unput)
302 (optimize (ext:inhibit-warnings 3)))
303 (let ((,input `(the (values (or lisp:codepoint null)
304 kernel:index)
305 ,,input))
306 ,@(loop for var in vars collect `(,var (gensym))))
307 ,body)))
308 (output ((code state output &rest vars) body)
309 `(lambda (,',tmp ,state ,output)
310 (declare (ignorable ,state ,output)
311 (optimize (ext:inhibit-warnings 3)))
312 (let ((,code ',code)
313 ,@(loop for var in vars collect `(,var (gensym))))
314 `(let ((,',code (the lisp:codepoint ,,',tmp)))
315 (declare (ignorable ,',code))
316 ,,body)))))
317 (%intern-ef (make-external-format ,name
318 (make-efx :octets-to-code ,input
319 :code-to-octets ,output
320 :min ,(min min max) :max ,(max min max))
321 t
322 #() '())))))
323
324 (defun load-external-format-aliases ()
325 (let ((*package* (find-package "KEYWORD"))
326 (unix::*filename-encoding* :iso8859-1))
327 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
328 :external-format :iso8859-1)
329 (when stm
330 (do ((alias (read stm nil stm) (read stm nil stm))
331 (value (read stm nil stm) (read stm nil stm)))
332 ((or (eq alias stm) (eq value stm))
333 (unless (eq alias stm)
334 (warn (intl:gettext "External-format aliases file ends early."))))
335 (if (and (keywordp alias) (or (keywordp value)
336 (and (consp value)
337 (every #'keywordp value))))
338 (setf (gethash alias *external-format-aliases*) value)
339 (warn (intl:gettext "Bad entry in external-format aliases file: ~S => ~S.")
340 alias value)))))))
341
342 (defun %find-external-format (name)
343 ;; avoid loading files, etc., early in the boot sequence
344 (when (or (eq name :iso8859-1)
345 (and (eq name :default) (eq *default-external-format* :iso8859-1)))
346 (return-from %find-external-format
347 (gethash :iso8859-1 *external-formats*)))
348
349 (when (zerop (hash-table-count *external-format-aliases*))
350 (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
351 (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
352 (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
353 (load-external-format-aliases))
354
355 (do ((tmp (gethash name *external-format-aliases*)
356 (gethash tmp *external-format-aliases*))
357 (cnt 0 (1+ cnt)))
358 ((or (null tmp) (= cnt 50))
359 (unless (null tmp)
360 (error (intl:gettext "External-format aliasing depth exceeded."))))
361 (setq name tmp))
362
363 (or (gethash name *external-formats*)
364 (and (consp name) (find-external-format name))
365 (and (with-standard-io-syntax
366 ;; Use standard IO syntax so that changes by the user
367 ;; don't mess up compiling the external format.
368 (let ((*package* (find-package "STREAM"))
369 (lisp::*enable-package-locked-errors* nil)
370 (s (open (format nil "ext-formats:~(~A~).lisp" name)
371 :if-does-not-exist nil :external-format :iso8859-1)))
372 (when s
373 (null (nth-value 1 (ext:compile-from-stream s))))))
374 (gethash name *external-formats*))))
375
376 (defun %composed-ef-name (a b)
377 (if (consp a) (append a (list b)) (list a b)))
378
379 (defun %compose-external-formats (a b)
380 (when (ef-composingp a)
381 (error (intl:gettext "~S is a Composing-External-Format.") (ef-name a)))
382 (unless (ef-composingp b)
383 (error (intl:gettext "~S is not a Composing-External-Format.") (ef-name b)))
384 (make-external-format
385 (%composed-ef-name (ef-name a) (ef-name b))
386 (make-efx
387 :octets-to-code (lambda (state input unput error)
388 (let ((nstate (gensym "STATE-")))
389 `(let ((,nstate ,state))
390 (when (null ,nstate)
391 (setq ,nstate (setf ,state (cons nil nil))))
392 ,(funcall (ef-octets-to-code b) `(car ,nstate)
393 (funcall (ef-octets-to-code a)
394 `(cdr ,nstate) input unput error)
395 unput))))
396 :code-to-octets (lambda (code state output error)
397 (let ((nstate (gensym "STATE-")))
398 `(let ((,nstate ,state))
399 (when (null ,nstate)
400 (setq ,nstate (setf ,state (cons nil nil))))
401 ,(funcall (ef-code-to-octets b) code `(car ,nstate)
402 `(lambda (x)
403 ,(funcall (ef-code-to-octets a)
404 'x `(cdr ,nstate) output error))))))
405 :cache (make-array +ef-max+ :initial-element nil)
406 :min (* (ef-min-octets a) (ef-min-octets b))
407 :max (* (ef-max-octets a) (ef-max-octets b)))
408 nil #() '()))
409
410 (defun find-external-format (name &optional (error-p t))
411 (when (external-format-p name)
412 (return-from find-external-format name))
413
414 (or (if (consp name) (every #'keywordp name) (keywordp name))
415 (error (intl:gettext "~S is not a valid external format name.") name))
416
417 (when (eq name :default)
418 (setq name *default-external-format*))
419
420 (when (and (consp name) (not (cdr name)))
421 (setq name (car name)))
422
423 (flet ((not-found ()
424 (when (equal *default-external-format* name)
425 (setq *default-external-format* :iso8859-1))
426 (if error-p (error (intl:gettext "External format ~S not found.") name) nil)))
427 (if (consp name)
428 (let ((efs (mapcar #'%find-external-format name)))
429 (if (member nil efs)
430 (not-found)
431 (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
432 (or (gethash name *external-formats*)
433 (%intern-ef (reduce #'%compose-external-formats efs))))))
434 (or (%find-external-format name) (not-found)))))
435
436 (defun flush-external-formats ()
437 (maphash (lambda (name ef)
438 (declare (ignore name))
439 (fill (ef-cache ef) nil))
440 *external-formats*))
441
442 (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
443
444 (defun invert-table (table)
445 (declare (type (or (simple-array (unsigned-byte 31) *)
446 (simple-array (unsigned-byte 16) *))
447 table)
448 (optimize (speed 3) (space 0) (safety 0) (debug 0)
449 (ext:inhibit-warnings 3)))
450 (or (gethash table *.table-inverse.*)
451 (let* ((mbits (if (= (array-total-size table) 128) 7 8))
452 (lbits (cond ((> (array-total-size table) 256) 3)
453 ((< (array-total-size table) 100) 6)
454 (t 5)))
455 (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
456 :element-type '(unsigned-byte 16)
457 :initial-element #xFFFF))
458 (mvec (make-array 0 :element-type '(unsigned-byte 16)))
459 (lvec (make-array 0 :element-type '(unsigned-byte 16)))
460 (width (array-dimension table 0))
461 (power (1- (array-rank table)))
462 (base (if (= width 94) 1 0))
463 hx mx lx)
464 (assert (and (< power 2) (<= width 256)))
465 (dotimes (i (array-total-size table))
466 (declare (type (integer 0 (#.array-dimension-limit)) i))
467 (let ((tmp i) (val (row-major-aref table i)) (z 0))
468 (declare (type (integer 0 (#.array-dimension-limit)) tmp)
469 (type (unsigned-byte 16) z))
470 (unless (= val #xFFFE)
471 (when (plusp power)
472 (multiple-value-bind (x y) (floor tmp width)
473 (setq tmp x)
474 (setq z (logior z (ash (the (integer 0 255) (+ y base))
475 (the (integer 0 24)
476 (* 8 power)))))))
477 (setq hx (ash val (- 0 mbits lbits)))
478 (when (= (aref hvec hx) #xFFFF)
479 (setf (aref hvec hx) (length mvec))
480 (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
481 :element-type '(unsigned-byte 16)
482 :initial-element #xFFFF)))
483 (replace tmp mvec)
484 (setq mvec tmp)))
485 (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
486 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
487 (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
488 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
489 :element-type '(unsigned-byte 16)
490 :initial-element #xFFFF)))
491 (replace tmp lvec)
492 (setq lvec tmp)))
493 (setq lx (logand val (lognot (ash -1 lbits))))
494 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
495 (logior z (+ tmp base))))))
496 (setf (gethash table *.table-inverse.*)
497 (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
498 :hvec hvec :mvec mvec :lvec lvec)))))
499
500 (declaim (inline get-inverse))
501 (defun get-inverse (ntrie code)
502 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
503 (let ((n (lisp::qref ntrie code)))
504 (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
505 (if (= m #xFFFF) nil m)))))
506
507
508 (define-condition void-external-format (error)
509 ()
510 (:report
511 (lambda (condition stream)
512 (declare (ignore condition))
513 (format stream (intl:gettext "Attempting I/O through void external-format.")))))
514
515 (define-external-format :void (:size 0) ()
516 (octets-to-code (state input unput error)
517 `(error 'void-external-format))
518 (code-to-octets (code state output error)
519 `(error 'void-external-format)))
520
521 (define-external-format :iso8859-1 (:size 1) ()
522 (octets-to-code (state input unput error)
523 `(values ,input 1))
524 (code-to-octets (code state output error)
525 `(,output (if (> ,code 255)
526 (if ,error
527 (funcall ,error "Cannot output codepoint #x~X to ISO8859-1 stream"
528 ,code 1)
529 #x3F)
530 ,code))))
531
532 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
533 ;;;
534 ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
535 ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
536 ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
537 ;;; to read or write texts containing characters not supported by your Lisp,
538 ;;; these macros can be used instead.
539 (defmacro octets-to-codepoint (external-format state count input unput &optional error)
540 (let ((tmp1 (gensym)) (tmp2 (gensym))
541 (ef (find-external-format external-format)))
542 `(multiple-value-bind (,tmp1 ,tmp2)
543 ,(funcall (ef-octets-to-code ef) state input unput error)
544 (setf ,count (the kernel:index ,tmp2))
545 (the (or lisp:codepoint null) ,tmp1))))
546
547 (defmacro codepoint-to-octets (external-format code state output &optional error)
548 (let ((ef (find-external-format external-format)))
549 (funcall (ef-code-to-octets ef) code state output error)))
550
551
552
553 (defvar *ef-base* +ef-max+)
554 (defvar *ef-extensions* '())
555
556 (defun ensure-cache (ef id reqd)
557 (let ((base (or (getf *ef-extensions* id)
558 (setf (getf *ef-extensions* id)
559 (prog1 *ef-base* (incf *ef-base* reqd))))))
560 (when (< (length (ef-cache ef)) (+ base reqd))
561 (setf (efx-cache (ef-efx ef))
562 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
563 base))
564
565 ;;; DEF-EF-MACRO -- Public
566 ;;;
567 ;;;
568 (defmacro def-ef-macro (name (ef id reqd idx) body)
569 (let* ((tmp1 (gensym))
570 (tmp2 (gensym))
571 (blknm (nth-value 1 (lisp::valid-function-name-p name)))
572 (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
573 `(progn
574 (defun ,%name (,ef)
575 (let* ((,tmp1 (find-external-format ,ef))
576 (,tmp2 ,(if (eq id 'lisp::lisp)
577 idx
578 `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
579 (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
580 (setf (aref (ef-cache ,tmp1) ,tmp2)
581 (let ((*compile-print* nil)
582 ;; Set default format when we compile so we
583 ;; can see compiler messages. If we don't,
584 ;; we run into a problem that we might be
585 ;; changing the default format while we're
586 ;; compiling, and we don't know how to output
587 ;; the compiler messages.
588 #|(*default-external-format* :iso8859-1)|#)
589 (compile nil `(lambda (%slots%)
590 (declare (ignorable %slots%))
591 (block ,',blknm
592 ,,body))))))
593 (ef-slots ,tmp1))))
594 (declaim (inline ,name))
595 (defun ,name (,tmp1)
596 (let ((,tmp2 (load-time-value (cons nil nil))))
597 (when (eq ,tmp1 :default)
598 (setq ,tmp1 *default-external-format*))
599 (if (eq ,tmp1 (car ,tmp2))
600 (cdr ,tmp2)
601 (setf (car ,tmp2) ,tmp1
602 (cdr ,tmp2) (,%name ,tmp1))))))))
603
604
605
606 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
607 ;;;
608 ;;; Read and write one character through an external-format
609 ;;;
610 (defmacro octets-to-char (external-format state count input unput &optional error)
611 (let ((nstate (gensym)))
612 `(let ((,nstate ,state))
613 (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
614 (if (car ,nstate)
615 ;; Return the trailing surrgate. Must set count to 0 to
616 ;; tell the stream code we didn't consume any octets!
617 (prog1 (the character (car ,nstate))
618 (setf (car ,nstate) nil ,count 0))
619 (let ((code (octets-to-codepoint ,external-format
620 (cdr ,nstate) ,count ,input ,unput ,error)))
621 (declare (type lisp:codepoint code))
622 ;;@@ on non-Unicode builds, limit to 8-bit chars
623 ;;@@ if unicode-bootstrap, can't use #\u+fffd
624 (cond ((or (lisp::surrogatep code) (> code #x10FFFF))
625 (if ,error
626 (funcall ,error "Cannot output codepoint #x~X" code)
627 #-(and unicode (not unicode-bootstrap)) #\?
628 #+(and unicode (not unicode-bootstrap)) #\U+FFFD))
629 #+unicode
630 ((> code #xFFFF)
631 (multiple-value-bind (hi lo) (surrogates code)
632 (setf (car ,nstate) lo)
633 hi))
634 (t (code-char code))))))))
635
636 (defmacro char-to-octets (external-format char state output &optional error)
637 (let ((nchar (gensym))
638 (nstate (gensym))
639 (wryte (gensym))
640 (ch (gensym)))
641 `(let ((,nchar ,char)
642 (,nstate ,state))
643 (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
644 (if (lisp::surrogatep (char-code ,nchar) :high)
645 (setf (car ,nstate) ,nchar)
646 (flet ((,wryte (,ch)
647 (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
648 ,output ,error)))
649 (declare (dynamic-extent #',wryte))
650 (if (car ,nstate)
651 (prog1
652 ;; Invalid surrogate sequences get replaced with
653 ;; the replacement character.
654 (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
655 (surrogates-to-codepoint (car ,nstate) ,nchar)
656 (if ,error
657 (funcall ,error "Cannot convert invalid surrogate #x~X to character"
658 ,nchar)
659 +replacement-character-code+)))
660 (setf (car ,nstate) nil))
661 ;; A lone trailing (low) surrogate gets replaced with
662 ;; the replacement character.
663 (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
664 (if ,error
665 (funcall ,error "Cannot convert lone trailing surrogate #x~X to character"
666 ,nchar)
667 +replacement-character-code+)
668 (char-code ,nchar)))))))))
669
670 (defmacro flush-state (external-format state output &optional error)
671 (let* ((ef (find-external-format external-format))
672 (f (ef-flush-state ef)))
673 (when f
674 (funcall f state output error))))
675
676 (defmacro copy-state (external-format state)
677 (let* ((ef (find-external-format external-format))
678 (f (ef-copy-state ef)))
679 (when f
680 (funcall f state))))
681
682 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
683 `(lambda (string start end buffer error &aux (ptr 0) (state nil))
684 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
685 (type simple-string string)
686 (type kernel:index start end ptr)
687 (type (simple-array (unsigned-byte 8) (*)) buffer)
688 (ignorable state))
689 (dotimes (i (- end start) (values buffer ptr))
690 (declare (type kernel:index i))
691 (char-to-octets ,extfmt (schar string (+ start i)) state
692 (lambda (b)
693 (when (= ptr (length buffer))
694 (setq buffer (adjust-array buffer (* 2 ptr))))
695 (setf (aref buffer (1- (incf ptr))) b))
696 error))))
697
698 (defun string-to-octets (string &key (start 0) end (external-format :default)
699 (buffer nil bufferp)
700 error)
701 "Convert String to octets using the specified External-format. The
702 string is bounded by Start (defaulting to 0) and End (defaulting to
703 the end of the string. If Buffer is given, the octets are stored
704 there. If not, a new buffer is created."
705 (declare (type string string)
706 (type kernel:index start)
707 (type (or kernel:index null) end)
708 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
709 (let* ((buffer (or buffer (make-array (length string)
710 :element-type '(unsigned-byte 8)))))
711 (multiple-value-bind (buffer ptr)
712 (lisp::with-array-data ((string string) (start start) (end end))
713 (funcall (ef-string-to-octets external-format)
714 string start end buffer error))
715 (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
716
717 (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
718 `(lambda (octets ptr end state string s-start s-end error
719 &aux (pos s-start) (count 0) (last-octet 0))
720 (declare (optimize (speed 3) (safety 0) #|(space 0) (debug 0)|#)
721 (type (simple-array (unsigned-byte 8) (*)) octets)
722 (type kernel:index pos end count last-octet s-start s-end)
723 (type (integer -1 (#.array-dimension-limit)) ptr)
724 (type simple-string string)
725 (ignorable state))
726 (catch 'end-of-octets
727 (loop while (< pos s-end)
728 do (setf (schar string pos)
729 (octets-to-char ,extfmt state count
730 (if (>= ptr end)
731 (throw 'end-of-octets nil)
732 (aref octets (incf ptr)))
733 (lambda (n) (decf ptr n))
734 error))
735 (incf pos)
736 (incf last-octet count)))
737 (values string pos last-octet state)))
738
739 (defun octets-to-string (octets &key (start 0) end (external-format :default)
740 (string nil stringp)
741 (s-start 0) (s-end nil s-end-p)
742 (state nil)
743 error)
744 "Octets-to-string converts an array of octets in Octets to a string
745 according to the specified External-format. The array of octets is
746 bounded by Start (defaulting ot 0) and End (defaulting to the end of
747 the array. If String is not given, a new string is created. If
748 String is given, the converted octets are stored in String, starting
749 at S-Start (defaulting to the 0) and ending at S-End (defaulting to
750 the length of String). If the string is not large enough to hold
751 all of characters, then some octets will not be converted. A State
752 may also be specified; this is used as the state of the external
753 format.
754
755 Four values are returned: the string, the number of characters read,
756 the number of octets actually consumed and the new state of the
757 external format."
758 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
759 (type kernel:index start s-start)
760 (type (or kernel:index null) end)
761 (type (or simple-string null) string))
762 (let ((s-end (if s-end-p
763 s-end
764 (if stringp
765 (length string)
766 (length octets)))))
767 (multiple-value-bind (string pos last-octet new-state)
768 (funcall (ef-octets-to-string external-format)
769 octets (1- start) (1- (or end (length octets)))
770 state
771 (or string (make-string (length octets)))
772 s-start s-end
773 error)
774 (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
775
776
777
778 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
779 `(lambda (string start end result error &aux (ptr 0) (state nil))
780 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
781 (type simple-string string)
782 (type kernel:index start end ptr)
783 (type simple-base-string result)
784 (ignorable state))
785 (dotimes (i (- end start) (values result ptr))
786 (declare (type kernel:index i))
787 (char-to-octets ,extfmt (schar string (+ start i)) state
788 (lambda (b)
789 (when (= ptr (length result))
790 (setq result (adjust-array result (* 2 ptr))))
791 (setf (aref result (1- (incf ptr)))
792 (code-char b)))
793 error))))
794
795 (defun string-encode (string external-format &key (start 0) end error)
796 "Encode the given String using External-Format and return a new
797 string. The characters of the new string are the octets of the
798 encoded result, with each octet converted to a character via
799 code-char. This is the inverse to String-Decode"
800 (when (zerop (length string))
801 (return-from string-encode string))
802 (multiple-value-bind (result ptr)
803 (lisp::with-array-data ((string string) (start start) (end end))
804 (funcall (ef-encode external-format) string start end
805 (make-string (length string) :element-type 'base-char)
806 error))
807 (lisp::shrink-vector result ptr)))
808
809 (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
810 `(lambda (string ptr end result error &aux (pos -1) (count 0) (state nil))
811 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
812 (type simple-string string)
813 (type kernel:index end count)
814 (type (integer -1 (#.array-dimension-limit)) ptr pos)
815 (type simple-string result)
816 (ignorable state))
817 (loop until (>= ptr end)
818 ;; increasing size of result shouldn't ever be necessary, unless
819 ;; someone implements an encoding smaller than the source string...
820 do (setf (schar result (incf pos))
821 (octets-to-char ,extfmt state count
822 ;; note the need to return NIL for EOF
823 (if (= (1+ ptr) (length string))
824 nil
825 (char-code (char string (incf ptr))))
826 (lambda (n) (decf ptr n))
827 error))
828 finally (return (values result (1+ pos))))))
829
830 (defun string-decode (string external-format &key (start 0) end error)
831 "Decode String using the given External-Format and return the new
832 string. The input string is treated as if it were an array of
833 octets, where the char-code of each character is the octet. This is
834 the inverse of String-Encode."
835 (when (zerop (length string))
836 (return-from string-decode string))
837 (multiple-value-bind (result pos)
838 (lisp::with-array-data ((string string) (start start) (end end))
839 (funcall (ef-decode external-format)
840 string (1- start) (1- end) (make-string (length string))
841 error))
842 (lisp::shrink-vector result pos)))
843
844
845 (defun set-system-external-format (terminal &optional filenames)
846 "Change the external format of the standard streams to Terminal.
847 The standard streams are sys::*stdin*, sys::*stdout*, and
848 sys::*stderr*, which are normally the input and/or output streams
849 for *standard-input* and *standard-output*. Also sets sys::*tty*
850 (normally *terminal-io* to the given external format. If the
851 optional argument Filenames is gvien, then the filename encoding is
852 set to the specified format."
853 (unless (find-external-format terminal)
854 (error (intl:gettext "Can't find external-format ~S.") terminal))
855 (setf (stream-external-format sys:*stdin*) terminal
856 (stream-external-format sys:*stdout*) terminal
857 (stream-external-format sys:*stderr*) terminal)
858 (when (lisp::fd-stream-p sys:*tty*)
859 (setf (stream-external-format sys:*tty*) terminal))
860 (when filenames
861 (unless (find-external-format filenames)
862 (error (intl:gettext "Can't find external-format ~S.") filenames))
863 (when (and unix::*filename-encoding*
864 (not (eq unix::*filename-encoding* filenames)))
865 (cerror (intl:gettext "Change it anyway.")
866 (intl:gettext "The external-format for encoding filenames is already set."))
867 (setq unix::*filename-encoding* filenames)))
868 t)
869
870
871 ;; Despite its name, this doesn't actually compile anything at all. What it
872 ;; does is expand into a lambda expression that can be compiled by the file
873 ;; compiler.
874 (defmacro precompile-ef-slot (ef slot)
875 (let* ((ef (find-external-format ef)))
876 ;; if there's no lambda expression available, flush it and regenerate
877 (unless (and (aref (ef-cache ef) slot)
878 (function-lambda-expression (aref (ef-cache ef) slot)))
879 (setf (aref (ef-cache ef) slot) nil)
880 (ecase slot
881 (#.+ef-cin+ (lisp::%ef-cin ef))
882 (#.+ef-cout+ (lisp::%ef-cout ef))
883 (#.+ef-sout+ (lisp::%ef-sout ef))
884 (#.+ef-os+ (%ef-octets-to-string ef))
885 (#.+ef-so+ (%ef-string-to-octets ef))
886 (#.+ef-en+ (%ef-encode ef))
887 (#.+ef-de+ (%ef-decode ef))))
888 `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
889 ,(subst (ef-name ef) ef
890 (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5