/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (show annotations)
Thu Sep 17 16:15:34 2009 UTC (4 years, 7 months ago) by rtoy
Branch: MAIN
Changes since 1.16: +20 -14 lines
Merge changes from 20a branch.
1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written by Paul Foley and has been placed in the public
5 ;;; domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.17 2009/09/17 16:15:34 rtoy Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Implementation of external-formats
13
14 (in-package "STREAM")
15
16 (export '(string-to-octets octets-to-string *default-external-format*
17 string-encode string-decode set-system-external-format
18 +replacement-character-code+))
19
20 (defvar *default-external-format* :iso8859-1)
21
22 (defvar *external-formats* (make-hash-table :test 'equal))
23 (defvar *external-format-aliases* (make-hash-table))
24
25 (vm::defenum (:prefix "+EF-" :suffix "+" :start 1)
26 str ; string length
27 cin ; input a character
28 cout ; output a character
29 sin ; input string
30 sout ; output string
31 os ; octets to string
32 so ; string to octets
33 en ; encode
34 de ; decode
35 flush ; flush state
36 max)
37
38 ;; Unicode replacement character U+FFFD
39 (defconstant +replacement-character-code+ #xFFFD)
40
41 (define-condition external-format-not-implemented (error)
42 ()
43 (:report
44 (lambda (condition stream)
45 (declare (ignore condition))
46 (format stream "Attempting unimplemented external-format I/O."))))
47
48 (defun %efni (a b c d)
49 (declare (ignore a b c d))
50 (error 'external-format-not-implemented))
51
52 (defstruct efx
53 ;;
54 ;; Function to read a sequence of octets from a stream and convert
55 ;; them a code point.
56 (octets-to-code #'%efni :type function :read-only t)
57 ;;
58 ;; Function to convert a codepoint to a sequence of octets and write
59 ;; them to an output stream.
60 (code-to-octets #'%efni :type function :read-only t)
61 ;;
62 ;; Function (or NIL) to force any state in the external format to be
63 ;; flushed to the output stream. A NIL value means the external
64 ;; format does not need to do anything special.
65 (flush-state nil :type (or null function) :read-only t)
66 ;;
67 ;; Function to copy the state of the external-format. If NIL, then
68 ;; there is no state to be copied.
69 (copy-state nil :type (or null function) :read-only t)
70 (cache nil :type (or null simple-vector))
71 ;;
72 ;; Minimum number of octets needed to form a codepoint
73 (min 1 :type kernel:index :read-only t)
74 ;;
75 ;; Maximum number of octets needed to form a codepoint.
76 (max 1 :type kernel:index :read-only t))
77
78 (defstruct (external-format
79 (:conc-name ef-)
80 (:print-function %print-external-format)
81 (:constructor make-external-format (name efx composingp
82 &optional slots slotd)))
83 (name (ext:required-argument) :type (or keyword cons) :read-only t)
84 (efx (ext:required-argument) :type efx :read-only t)
85 (composingp (ext:required-argument) :type boolean :read-only t)
86 (slots #() :type simple-vector :read-only t)
87 (slotd nil :type list :read-only t))
88
89 (defun %print-external-format (ef stream depth)
90 (declare (ignore depth))
91 (print-unreadable-object (ef stream :type t :identity t)
92 (princ (ef-name ef) stream)))
93
94 (defun %intern-ef (ef)
95 (setf (gethash (ef-name ef) *external-formats*) ef))
96
97 (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
98 ef-cache ef-min-octets ef-max-octets))
99
100 (defun ef-octets-to-code (ef)
101 (efx-octets-to-code (ef-efx ef)))
102
103 (defun ef-code-to-octets (ef)
104 (efx-code-to-octets (ef-efx ef)))
105
106 (defun ef-flush-state (ef)
107 (efx-flush-state (ef-efx ef)))
108
109 (defun ef-copy-state (ef)
110 (efx-copy-state (ef-efx ef)))
111
112 (defun ef-cache (ef)
113 (efx-cache (ef-efx ef)))
114
115 (defun ef-min-octets (ef)
116 (efx-min (ef-efx ef)))
117
118 (defun ef-max-octets (ef)
119 (efx-max (ef-efx ef)))
120
121 (eval-when (:compile-toplevel :load-toplevel :execute)
122 (defun %merge-slots (old new)
123 (let* ((pos (length old))
124 (tmp (mapcar (lambda (x)
125 (let* ((name (if (consp x) (first x) x))
126 (init (if (consp x) (second x) nil))
127 (list (if (consp x) (nthcdr 2 x) nil))
128 (prev (assoc name old))
129 (posn (if prev (second prev) (1- (incf pos)))))
130 (list name posn init (getf list :type t))))
131 new)))
132 (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
133 :key #'second))))
134
135 ;;; DEFINE-EXTERNAL-FORMAT -- Public
136 ;;;
137 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
138 ;;; flush-state copy-state
139 ;;;
140 ;;; Define a new external format. Min/Max/Size are the minimum and
141 ;;; maximum number of octets that make up a character (:size N is just
142 ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
143 ;;; similar to defstruct.
144 ;;;
145 ;;; name (base) (&rest slots)
146 ;;; Define an external format based on a previously-defined external
147 ;;; format, Base. The slot names used in Slots must match those in Base.
148 ;;;
149 ;;; octets-to-code (state input unput &rest vars)
150 ;;; Defines a form to be used by the external format to convert
151 ;;; octets to a code point. State is a form that can be used by the
152 ;;; body to access the state of the stream. Input is a form that
153 ;;; can be used to read one octet from the input stream. (It can be
154 ;;; called as many times as needed.) Similarly, Unput is a form to
155 ;;; put back one octet to the input stream. Vars is a list of vars
156 ;;; that need to be defined for any symbols used within the form.
157 ;;;
158 ;;; This should return two values: the code and the number of octets
159 ;;; read to form the code.
160 ;;;
161 ;;; code-to-octets (code state output &rest vars)
162 ;;; Defines a form to be used by the external format to convert a
163 ;;; code point to octets for output. Code is the code point to be
164 ;;; converted. State is a form to access the current value of the
165 ;;; stream's state variable. Output is a form that writes one octet
166 ;;; to the output stream.
167 ;;;
168 ;;; flush-state (state output &rest vars)
169 ;;; Defines a form to be used by the external format to flush out
170 ;;; any state when an output stream is closed. Similar to
171 ;;; CODE-TO-OCTETS, but there is no code.
172 ;;;
173 ;;; copy-state (state &rest vars)
174 ;;; Defines a form to copy any state needed by the external format.
175 ;;; This should probably be a deep copy so that if the original
176 ;;; state is modified, the copy is not.
177 ;;;
178 ;;; Note: external-formats work on code-points, not
179 ;;; characters, so that the entire 31 bit ISO-10646 range can be
180 ;;; used internally regardless of the size of a character recognized
181 ;;; by Lisp and external formats can be useful to people who want to
182 ;;; process characters outside the Lisp range (see
183 ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
184 ;;;
185 (defmacro define-external-format (name (&rest args) (&rest slots)
186 &optional octets-to-code code-to-octets
187 flush-state copy-state)
188 (when (and (oddp (length args)) (not (= (length args) 1)))
189 (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
190 (let* ((tmp (gensym))
191 (min (if (evenp (length args))
192 (or (getf args :min) (getf args :size) 1)
193 1))
194 (max (if (evenp (length args))
195 (or (getf args :max) (getf args :size) 6)
196 6))
197 (base (if (= (length args) 1)
198 (find-external-format (first args))
199 nil))
200 (bslotd (if base (ef-slotd base) nil))
201 (slotd (%merge-slots bslotd slots))
202 (slotb (loop for slot in slotd
203 collect `(,(first slot)
204 `(the ,',(fourth slot)
205 ;; IDENTITY is here to protect against SETF
206 (identity (svref %slots% ,',(second slot))))))))
207 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
208 `(lambda (,state ,input ,unput)
209 (declare (ignorable ,state ,input ,unput)
210 (optimize (ext:inhibit-warnings 3)))
211 (let (,@',slotb
212 (,input `(the (or (unsigned-byte 8) null) ,,input))
213 ,@(loop for var in vars collect `(,var (gensym))))
214 ,body)))
215 (code-to-octets ((code state output &rest vars) body)
216 `(lambda (,',tmp ,state ,output)
217 (declare (ignorable ,state ,output)
218 (optimize (ext:inhibit-warnings 3)))
219 (let (,@',slotb
220 (,code ',code)
221 ,@(loop for var in vars collect `(,var (gensym))))
222 `(let ((,',code (the lisp:codepoint ,,',tmp)))
223 (declare (ignorable ,',code))
224 ,,body))))
225 (flush-state ((state output &rest vars) body)
226 `(lambda (,state ,output)
227 (declare (ignorable ,state ,output))
228 (let (,@',slotb
229 ,@(loop for var in vars collect `(,var (gensym))))
230 ,body)))
231 (copy-state ((state &rest vars) body)
232 `(lambda (,state)
233 (declare (ignorable ,state))
234 (let (,@',slotb
235 ,@(loop for var in vars collect `(,var (gensym))))
236 ,body))))
237 (%intern-ef (make-external-format ,name
238 ,(if base
239 `(ef-efx (find-external-format ,(ef-name base)))
240 `(make-efx :octets-to-code ,octets-to-code
241 :code-to-octets ,code-to-octets
242 :flush-state ,flush-state
243 :copy-state ,copy-state
244 :cache (make-array +ef-max+
245 :initial-element nil)
246 :min ,(min min max) :max ,(max min max)))
247 nil
248 (let* ,(loop for x in slotd
249 collect (list (first x) (third x)))
250 (vector ,@(mapcar #'first slotd)))
251 ',slotd)))))
252
253 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
254 ;;;
255 ;;; A composing-external-format differs from an (ordinary) external-format
256 ;;; in that it translates characters (really codepoints, of course) into
257 ;;; other characters, rather than translating between characters and binary
258 ;;; octets. They have to be composed with a non-composing external-format
259 ;;; to be of any use.
260 ;;;
261 ;;;
262 ;;; name (&key min max size) input output
263 ;;; Defines a new composing external format. The parameters Min,
264 ;;; Max, and Size are the same as for defining an external format.
265 ;;; The parameters input and output are forms to handle input and
266 ;;; output.
267 ;;;
268 ;;; input (state input unput &rest vars)
269 ;;; Defines a form to be used by the composing external format when
270 ;;; reading input to transform a codepoint (or sequence of
271 ;;; codepoints) to another. State is a form that can be used by the
272 ;;; body to access the state of the external format. Input is a
273 ;;; form that can be used to read one code point from the input
274 ;;; stream. (Input returns two values, the codepoint and the number
275 ;;; of octets read.) It may be called as many times as needed.
276 ;;; This returns two values: the codepoint of the character (or NIL)
277 ;;; and the number of octets read. Similarly, Unput is a form to
278 ;;; put back one octet to the input stream. Vars is a list of vars
279 ;;; that need to be defined for any symbols used within the form.
280 ;;;
281 ;;; This should return two values: the code and the number of octets
282 ;;; read to form the code.
283 ;;;
284 ;;; output (code state output &rest vars)
285 ;;; Defines a form to be used by the composing external format to
286 ;;; convert a code point to octets for output. Code is the code
287 ;;; point to be converted. State is a form to access the current
288 ;;; value of the stream's state variable. Output is a form that
289 ;;; writes one octet to the output stream.
290
291 (defmacro define-composing-external-format (name (&key min max size)
292 input output)
293 (let ((tmp (gensym))
294 (min (or min size 1))
295 (max (or max size 1)))
296 `(macrolet ((input ((state input unput &rest vars) body)
297 `(lambda (,state ,input ,unput)
298 (declare (ignorable ,state ,input ,unput)
299 (optimize (ext:inhibit-warnings 3)))
300 (let ((,input `(the (values (or lisp:codepoint null)
301 kernel:index)
302 ,,input))
303 ,@(loop for var in vars collect `(,var (gensym))))
304 ,body)))
305 (output ((code state output &rest vars) body)
306 `(lambda (,',tmp ,state ,output)
307 (declare (ignorable ,state ,output)
308 (optimize (ext:inhibit-warnings 3)))
309 (let ((,code ',code)
310 ,@(loop for var in vars collect `(,var (gensym))))
311 `(let ((,',code (the lisp:codepoint ,,',tmp)))
312 (declare (ignorable ,',code))
313 ,,body)))))
314 (%intern-ef (make-external-format ,name
315 (make-efx :octets-to-code ,input
316 :code-to-octets ,output
317 :min ,(min min max) :max ,(max min max))
318 t
319 #() '())))))
320
321 (defun load-external-format-aliases ()
322 (let ((*package* (find-package "KEYWORD"))
323 (unix::*filename-encoding* :iso8859-1))
324 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
325 :external-format :iso8859-1)
326 (when stm
327 (do ((alias (read stm nil stm) (read stm nil stm))
328 (value (read stm nil stm) (read stm nil stm)))
329 ((or (eq alias stm) (eq value stm))
330 (unless (eq alias stm)
331 (warn "External-format aliases file ends early.")))
332 (if (and (keywordp alias) (or (keywordp value)
333 (and (consp value)
334 (every #'keywordp value))))
335 (setf (gethash alias *external-format-aliases*) value)
336 (warn "Bad entry in external-format aliases file: ~S => ~S."
337 alias value)))))))
338
339 (defun %find-external-format (name)
340 ;; avoid loading files, etc., early in the boot sequence
341 (when (or (eq name :iso8859-1)
342 (and (eq name :default) (eq *default-external-format* :iso8859-1)))
343 (return-from %find-external-format
344 (gethash :iso8859-1 *external-formats*)))
345
346 (when (zerop (hash-table-count *external-format-aliases*))
347 (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
348 (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
349 (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
350 (load-external-format-aliases))
351
352 (do ((tmp (gethash name *external-format-aliases*)
353 (gethash tmp *external-format-aliases*))
354 (cnt 0 (1+ cnt)))
355 ((or (null tmp) (= cnt 50))
356 (unless (null tmp)
357 (error "External-format aliasing depth exceeded.")))
358 (setq name tmp))
359
360 (or (gethash name *external-formats*)
361 (and (consp name) (find-external-format name))
362 (and (let ((*package* (find-package "STREAM"))
363 (lisp::*enable-package-locked-errors* nil)
364 (s (open (format nil "ext-formats:~(~A~).lisp" name)
365 :if-does-not-exist nil :external-format :iso8859-1)))
366 (when s
367 (null (nth-value 1 (ext:compile-from-stream s)))))
368 (gethash name *external-formats*))))
369
370 (defun %composed-ef-name (a b)
371 (if (consp a) (append a (list b)) (list a b)))
372
373 (defun %compose-external-formats (a b)
374 (when (ef-composingp a)
375 (error "~S is a Composing-External-Format." (ef-name a)))
376 (unless (ef-composingp b)
377 (error "~S is not a Composing-External-Format." (ef-name b)))
378 (make-external-format
379 (%composed-ef-name (ef-name a) (ef-name b))
380 (make-efx
381 :octets-to-code (lambda (state input unput)
382 (let ((nstate (gensym "STATE-")))
383 `(let ((,nstate ,state))
384 (when (null ,nstate)
385 (setq ,nstate (setf ,state (cons nil nil))))
386 ,(funcall (ef-octets-to-code b) `(car ,nstate)
387 (funcall (ef-octets-to-code a)
388 `(cdr ,nstate) input unput)
389 unput))))
390 :code-to-octets (lambda (code state output)
391 (let ((nstate (gensym "STATE-")))
392 `(let ((,nstate ,state))
393 (when (null ,nstate)
394 (setq ,nstate (setf ,state (cons nil nil))))
395 ,(funcall (ef-code-to-octets b) code `(car ,nstate)
396 `(lambda (x)
397 ,(funcall (ef-code-to-octets a)
398 'x `(cdr ,nstate) output))))))
399 :cache (make-array +ef-max+ :initial-element nil)
400 :min (* (ef-min-octets a) (ef-min-octets b))
401 :max (* (ef-max-octets a) (ef-max-octets b)))
402 nil #() '()))
403
404 (defun find-external-format (name &optional (error-p t))
405 (when (external-format-p name)
406 (return-from find-external-format name))
407
408 (or (if (consp name) (every #'keywordp name) (keywordp name))
409 (error "~S is not a valid external format name." name))
410
411 (when (eq name :default)
412 (setq name *default-external-format*))
413
414 (when (and (consp name) (not (cdr name)))
415 (setq name (car name)))
416
417 (flet ((not-found ()
418 (when (equal *default-external-format* name)
419 (setq *default-external-format* :iso8859-1))
420 (if error-p (error "External format ~S not found." name) nil)))
421 (if (consp name)
422 (let ((efs (mapcar #'%find-external-format name)))
423 (if (member nil efs)
424 (not-found)
425 (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
426 (or (gethash name *external-formats*)
427 (%intern-ef (reduce #'%compose-external-formats efs))))))
428 (or (%find-external-format name) (not-found)))))
429
430 (defun flush-external-formats ()
431 (maphash (lambda (name ef)
432 (declare (ignore name))
433 (fill (ef-cache ef) nil))
434 *external-formats*))
435
436 (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
437
438 (defun invert-table (table)
439 (declare (type (or (simple-array (unsigned-byte 31) *)
440 (simple-array (unsigned-byte 16) *))
441 table)
442 (optimize (speed 3) (space 0) (safety 0) (debug 0)
443 (ext:inhibit-warnings 3)))
444 (or (gethash table *.table-inverse.*)
445 (let* ((mbits (if (= (array-total-size table) 128) 7 8))
446 (lbits (cond ((> (array-total-size table) 256) 3)
447 ((< (array-total-size table) 100) 6)
448 (t 5)))
449 (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
450 :element-type '(unsigned-byte 16)
451 :initial-element #xFFFF))
452 (mvec (make-array 0 :element-type '(unsigned-byte 16)))
453 (lvec (make-array 0 :element-type '(unsigned-byte 16)))
454 (width (array-dimension table 0))
455 (power (1- (array-rank table)))
456 (base (if (= width 94) 1 0))
457 hx mx lx)
458 (assert (and (< power 2) (<= width 256)))
459 (dotimes (i (array-total-size table))
460 (declare (type (integer 0 (#.array-dimension-limit)) i))
461 (let ((tmp i) (val (row-major-aref table i)) (z 0))
462 (declare (type (integer 0 (#.array-dimension-limit)) tmp)
463 (type (unsigned-byte 16) z))
464 (unless (= val #xFFFE)
465 (when (plusp power)
466 (multiple-value-bind (x y) (floor tmp width)
467 (setq tmp x)
468 (setq z (logior z (ash (the (integer 0 255) (+ y base))
469 (the (integer 0 24)
470 (* 8 power)))))))
471 (setq hx (ash val (- 0 mbits lbits)))
472 (when (= (aref hvec hx) #xFFFF)
473 (setf (aref hvec hx) (length mvec))
474 (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
475 :element-type '(unsigned-byte 16)
476 :initial-element #xFFFF)))
477 (replace tmp mvec)
478 (setq mvec tmp)))
479 (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
480 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
481 (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
482 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
483 :element-type '(unsigned-byte 16)
484 :initial-element #xFFFF)))
485 (replace tmp lvec)
486 (setq lvec tmp)))
487 (setq lx (logand val (lognot (ash -1 lbits))))
488 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
489 (logior z (+ tmp base))))))
490 (setf (gethash table *.table-inverse.*)
491 (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
492 :hvec hvec :mvec mvec :lvec lvec)))))
493
494 (declaim (inline get-inverse))
495 (defun get-inverse (ntrie code)
496 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
497 (let ((n (lisp::qref ntrie code)))
498 (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
499 (if (= m #xFFFF) nil m)))))
500
501
502 (define-condition void-external-format (error)
503 ()
504 (:report
505 (lambda (condition stream)
506 (declare (ignore condition))
507 (format stream "Attempting I/O through void external-format."))))
508
509 (define-external-format :void (:size 0) ()
510 (octets-to-code (state input unput)
511 `(error 'void-external-format))
512 (code-to-octets (code state output)
513 `(error 'void-external-format)))
514
515 (define-external-format :iso8859-1 (:size 1) ()
516 (octets-to-code (state input unput)
517 `(values ,input 1))
518 (code-to-octets (code state output)
519 `(,output (if (> ,code 255) #x3F ,code))))
520
521 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
522 ;;;
523 ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
524 ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
525 ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
526 ;;; to read or write texts containing characters not supported by your Lisp,
527 ;;; these macros can be used instead.
528 (defmacro octets-to-codepoint (external-format state count input unput)
529 (let ((tmp1 (gensym)) (tmp2 (gensym))
530 (ef (find-external-format external-format)))
531 `(multiple-value-bind (,tmp1 ,tmp2)
532 ,(funcall (ef-octets-to-code ef) state input unput)
533 (setf ,count (the kernel:index ,tmp2))
534 (the (or lisp:codepoint null) ,tmp1))))
535
536 (defmacro codepoint-to-octets (external-format code state output)
537 (let ((ef (find-external-format external-format)))
538 (funcall (ef-code-to-octets ef) code state output)))
539
540
541
542 (defvar *ef-base* +ef-max+)
543 (defvar *ef-extensions* '())
544
545 (defun ensure-cache (ef id reqd)
546 (let ((base (or (getf *ef-extensions* id)
547 (setf (getf *ef-extensions* id)
548 (prog1 *ef-base* (incf *ef-base* reqd))))))
549 (when (< (length (ef-cache ef)) (+ base reqd))
550 (setf (efx-cache (ef-efx ef))
551 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
552 base))
553
554 ;;; DEF-EF-MACRO -- Public
555 ;;;
556 ;;;
557 (defmacro def-ef-macro (name (ef id reqd idx) body)
558 (let* ((tmp1 (gensym))
559 (tmp2 (gensym))
560 (blknm (nth-value 1 (lisp::valid-function-name-p name)))
561 (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
562 `(progn
563 (defun ,%name (,ef)
564 (let* ((,tmp1 (find-external-format ,ef))
565 (,tmp2 ,(if (eq id 'lisp::lisp)
566 idx
567 `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
568 (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
569 (setf (aref (ef-cache ,tmp1) ,tmp2)
570 (let ((*compile-print* nil)
571 ;; Set default format when we compile so we
572 ;; can see compiler messages. If we don't,
573 ;; we run into a problem that we might be
574 ;; changing the default format while we're
575 ;; compiling, and we don't know how to output
576 ;; the compiler messages.
577 #|(*default-external-format* :iso8859-1)|#)
578 (compile nil `(lambda (%slots%)
579 (declare (ignorable %slots%))
580 (block ,',blknm
581 ,,body))))))
582 (ef-slots ,tmp1))))
583 (declaim (inline ,name))
584 (defun ,name (,tmp1)
585 (let ((,tmp2 (load-time-value (cons nil nil))))
586 (when (eq ,tmp1 :default)
587 (setq ,tmp1 *default-external-format*))
588 (if (eq ,tmp1 (car ,tmp2))
589 (cdr ,tmp2)
590 (setf (car ,tmp2) ,tmp1
591 (cdr ,tmp2) (,%name ,tmp1))))))))
592
593
594
595 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
596 ;;;
597 ;;; Read and write one character through an external-format
598 ;;;
599 (defmacro octets-to-char (external-format state count input unput)
600 (let ((nstate (gensym)))
601 `(let ((,nstate ,state))
602 (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
603 (if (car ,nstate)
604 ;; Return the trailing surrgate. Must set count to 0 to
605 ;; tell the stream code we didn't consume any octets!
606 (prog1 (the character (car ,nstate))
607 (setf (car ,nstate) nil ,count 0))
608 (let ((code (octets-to-codepoint ,external-format
609 (cdr ,nstate) ,count ,input ,unput)))
610 (declare (type lisp:codepoint code))
611 ;;@@ on non-Unicode builds, limit to 8-bit chars
612 ;;@@ if unicode-bootstrap, can't use #\u+fffd
613 (cond ((or (lisp::surrogatep code) (> code #x10FFFF))
614 #-(and unicode (not unicode-bootstrap)) #\?
615 #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
616 #+unicode
617 ((> code #xFFFF)
618 (multiple-value-bind (hi lo) (surrogates code)
619 (setf (car ,nstate) lo)
620 hi))
621 (t (code-char code))))))))
622
623 (defmacro char-to-octets (external-format char state output)
624 (let ((nchar (gensym))
625 (nstate (gensym))
626 (wryte (gensym))
627 (ch (gensym)))
628 `(let ((,nchar ,char)
629 (,nstate ,state))
630 (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
631 (if (lisp::surrogatep (char-code ,nchar) :high)
632 (setf (car ,nstate) ,nchar)
633 (flet ((,wryte (,ch)
634 (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
635 ,output)))
636 (declare (dynamic-extent #',wryte))
637 (if (car ,nstate)
638 (prog1
639 ;; Invalid surrogate sequences get replaced with
640 ;; the replacement character.
641 (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
642 (surrogates-to-codepoint (car ,nstate) ,nchar)
643 +replacement-character-code+))
644 (setf (car ,nstate) nil))
645 ;; A lone trailing (low) surrogate gets replaced with
646 ;; the replacement character.
647 (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
648 +replacement-character-code+
649 (char-code ,nchar)))))))))
650
651 (defmacro flush-state (external-format state output)
652 (let* ((ef (find-external-format external-format))
653 (f (ef-flush-state ef)))
654 (when f
655 (funcall f state output))))
656
657 (defmacro copy-state (external-format state)
658 (let* ((ef (find-external-format external-format))
659 (f (ef-copy-state ef)))
660 (when f
661 (funcall f state))))
662
663 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
664 `(lambda (string start end buffer &aux (ptr 0) (state nil))
665 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
666 (type simple-string string)
667 (type kernel:index start end ptr)
668 (type (simple-array (unsigned-byte 8) (*)) buffer)
669 (ignorable state))
670 (dotimes (i (- end start) (values buffer ptr))
671 (declare (type kernel:index i))
672 (char-to-octets ,extfmt (schar string (+ start i)) state
673 (lambda (b)
674 (when (= ptr (length buffer))
675 (setq buffer (adjust-array buffer (* 2 ptr))))
676 (setf (aref buffer (1- (incf ptr))) b))))))
677
678 (defun string-to-octets (string &key (start 0) end (external-format :default)
679 (buffer nil bufferp))
680 "Convert String to octets using the specified External-format. The
681 string is bounded by Start (defaulting to 0) and End (defaulting to
682 the end of the string. If Buffer is given, the octets are stored
683 there. If not, a new buffer is created."
684 (declare (type string string)
685 (type kernel:index start)
686 (type (or kernel:index null) end)
687 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
688 (let* ((buffer (or buffer (make-array (length string)
689 :element-type '(unsigned-byte 8)))))
690 (multiple-value-bind (buffer ptr)
691 (lisp::with-array-data ((string string) (start start) (end end))
692 (funcall (ef-string-to-octets external-format)
693 string start end buffer))
694 (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
695
696 (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
697 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil) (last-octet 0))
698 (declare (optimize (speed 3) #|(safety 0) (space 0) (debug 0)|#)
699 (type (simple-array (unsigned-byte 8) (*)) octets)
700 (type kernel:index end count last-octet)
701 (type (integer -1 (#.array-dimension-limit)) ptr pos)
702 (type simple-string string)
703 (ignorable state))
704 (loop until (>= ptr end)
705 do (when (= pos (length string))
706 (setq string (adjust-array string (* 2 pos))))
707 (setf (schar string (incf pos))
708 (octets-to-char ,extfmt state count
709 (if (>= ptr end)
710 (throw 'eof-input-catcher
711 (values string pos last-octet))
712 (aref octets (incf ptr)))
713 (lambda (n) (decf ptr n))))
714 (incf last-octet count)
715 finally (return (values string (1+ pos) last-octet)))))
716
717 (defun octets-to-string (octets &key (start 0) end (external-format :default)
718 (string nil stringp))
719 "Octets-to-string converts an array of octets in Octets to a string
720 according to the specified External-format. The array of octets is
721 bounded by Start (defaulting ot 0) and End (defaulting to the end of
722 the array. If String is given, the string is stored there;
723 otherwise a new string is created. Three values are returned: the string,
724 the number of characters read, and the number of octets consumed."
725 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
726 (type kernel:index start)
727 (type (or kernel:index null) end)
728 (type (or simple-string null) string))
729 (multiple-value-bind (string pos last-octet)
730 (catch 'eof-input-catcher
731 (funcall (ef-octets-to-string external-format)
732 octets (1- start) (1- (or end (length octets)))
733 (or string (make-string (length octets)))))
734 (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet)))
735
736
737
738 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
739 `(lambda (string start end result &aux (ptr 0) (state nil))
740 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
741 (type simple-string string)
742 (type kernel:index start end ptr)
743 (type simple-base-string result)
744 (ignorable state))
745 (dotimes (i (- end start) (values result ptr))
746 (declare (type kernel:index i))
747 (char-to-octets ,extfmt (schar string (+ start i)) state
748 (lambda (b)
749 (when (= ptr (length result))
750 (setq result (adjust-array result (* 2 ptr))))
751 (setf (aref result (1- (incf ptr)))
752 (code-char b)))))))
753
754 (defun string-encode (string external-format &optional (start 0) end)
755 "Encode the given String using External-Format and return a new
756 string. The characters of the new string are the octets of the
757 encoded result, with each octet converted to a character via
758 code-char. This is the inverse to String-Decode"
759 (when (zerop (length string))
760 (return-from string-encode string))
761 (multiple-value-bind (result ptr)
762 (lisp::with-array-data ((string string) (start start) (end end))
763 (funcall (ef-encode external-format) string start end
764 (make-string (length string) :element-type 'base-char)))
765 (lisp::shrink-vector result ptr)))
766
767 (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
768 `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
769 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
770 (type simple-string string)
771 (type kernel:index end count)
772 (type (integer -1 (#.array-dimension-limit)) ptr pos)
773 (type simple-string result)
774 (ignorable state))
775 (loop until (>= ptr end)
776 ;; increasing size of result shouldn't ever be necessary, unless
777 ;; someone implements an encoding smaller than the source string...
778 do (setf (schar result (incf pos))
779 (octets-to-char ,extfmt state count
780 ;; note the need to return NIL for EOF
781 (if (= (1+ ptr) (length string))
782 nil
783 (char-code (char string (incf ptr))))
784 (lambda (n) (decf ptr n))))
785 finally (return (values result (1+ pos))))))
786
787 (defun string-decode (string external-format &optional (start 0) end)
788 "Decode String using the given External-Format and return the new
789 string. The input string is treated as if it were an array of
790 octets, where the char-code of each character is the octet. This is
791 the inverse of String-Encode."
792 (when (zerop (length string))
793 (return-from string-decode string))
794 (multiple-value-bind (result pos)
795 (lisp::with-array-data ((string string) (start start) (end end))
796 (funcall (ef-decode external-format)
797 string (1- start) (1- end) (make-string (length string))))
798 (lisp::shrink-vector result pos)))
799
800
801 (defun set-system-external-format (terminal &optional filenames)
802 (unless (find-external-format terminal)
803 (error "Can't find external-format ~S." terminal))
804 (setf (stream-external-format sys:*stdin*) terminal
805 (stream-external-format sys:*stdout*) terminal
806 (stream-external-format sys:*stderr*) terminal)
807 (when (lisp::fd-stream-p sys:*tty*)
808 (setf (stream-external-format sys:*tty*) terminal))
809 (when filenames
810 (unless (find-external-format filenames)
811 (error "Can't find external-format ~S." filenames))
812 (when (and unix::*filename-encoding*
813 (not (eq unix::*filename-encoding* filenames)))
814 (cerror "Change it anyway."
815 "The external-format for encoding filenames is already set.")
816 (setq unix::*filename-encoding* filenames)))
817 t)
818
819
820 ;; Despite its name, this doesn't actually compile anything at all. What it
821 ;; does is expand into a lambda expression that can be compiled by the file
822 ;; compiler.
823 (defmacro precompile-ef-slot (ef slot)
824 (let* ((ef (find-external-format ef)))
825 ;; if there's no lambda expression available, flush it and regenerate
826 (unless (and (aref (ef-cache ef) slot)
827 (function-lambda-expression (aref (ef-cache ef) slot)))
828 (setf (aref (ef-cache ef) slot) nil)
829 (ecase slot
830 (#.+ef-cin+ (lisp::%ef-cin ef))
831 (#.+ef-cout+ (lisp::%ef-cout ef))
832 (#.+ef-sout+ (lisp::%ef-sout ef))
833 (#.+ef-os+ (%ef-octets-to-string ef))
834 (#.+ef-so+ (%ef-string-to-octets ef))
835 (#.+ef-en+ (%ef-encode ef))
836 (#.+ef-de+ (%ef-decode ef))))
837 `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
838 ,(subst (ef-name ef) ef
839 (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5