/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (show annotations)
Thu Aug 13 13:55:13 2009 UTC (4 years, 8 months ago) by rtoy
Branch: MAIN
Changes since 1.13: +13 -4 lines
Illegal surrogate sequences (leading surrogate without trailing
surrogate or a lone trailing surrogate) get replaced with the
replacement character.
1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written by Paul Foley and has been placed in the public
5 ;;; domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.14 2009/08/13 13:55:13 rtoy Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Implementation of external-formats
13
14 (in-package "STREAM")
15
16 (export '(string-to-octets octets-to-string *default-external-format*
17 string-encode string-decode set-system-external-format))
18
19 (defvar *default-external-format* :iso8859-1)
20
21 (defvar *external-formats* (make-hash-table :test 'equal))
22 (defvar *external-format-aliases* (make-hash-table))
23
24 (defconstant +ef-str+ 1)
25 (defconstant +ef-cin+ 2)
26 (defconstant +ef-cout+ 3)
27 (defconstant +ef-sin+ 4)
28 (defconstant +ef-sout+ 5)
29 (defconstant +ef-os+ 6)
30 (defconstant +ef-so+ 7)
31 (defconstant +ef-en+ 8)
32 (defconstant +ef-de+ 9)
33 (defconstant +ef-max+ 10)
34
35 ;; Unicode replacement character U+FFFD
36 (defconstant +replacement-character-code+ #xFFFD)
37
38 (define-condition external-format-not-implemented (error)
39 ()
40 (:report
41 (lambda (condition stream)
42 (declare (ignore condition))
43 (format stream "Attempting unimplemented external-format I/O."))))
44
45 (defun %efni (a b c d)
46 (declare (ignore a b c d))
47 (error 'external-format-not-implemented))
48
49 (defstruct efx
50 (octets-to-code #'%efni :type function :read-only t)
51 (code-to-octets #'%efni :type function :read-only t)
52 (cache nil :type (or null simple-vector))
53 (min 1 :type kernel:index :read-only t)
54 (max 1 :type kernel:index :read-only t))
55
56 (defstruct (external-format
57 (:conc-name ef-)
58 (:print-function %print-external-format)
59 (:constructor make-external-format (name efx composingp
60 &optional slots slotd)))
61 (name (ext:required-argument) :type (or keyword cons) :read-only t)
62 (efx (ext:required-argument) :type efx :read-only t)
63 (composingp (ext:required-argument) :type boolean :read-only t)
64 (slots #() :type simple-vector :read-only t)
65 (slotd nil :type list :read-only t))
66
67 (defun %print-external-format (ef stream depth)
68 (declare (ignore depth))
69 (print-unreadable-object (ef stream :type t :identity t)
70 (princ (ef-name ef) stream)))
71
72 (defun %intern-ef (ef)
73 (setf (gethash (ef-name ef) *external-formats*) ef))
74
75 (declaim (inline ef-octets-to-code ef-code-to-octets ef-cache
76 ef-min-octets ef-max-octets))
77
78 (defun ef-octets-to-code (ef)
79 (efx-octets-to-code (ef-efx ef)))
80
81 (defun ef-code-to-octets (ef)
82 (efx-code-to-octets (ef-efx ef)))
83
84 (defun ef-cache (ef)
85 (efx-cache (ef-efx ef)))
86
87 (defun ef-min-octets (ef)
88 (efx-min (ef-efx ef)))
89
90 (defun ef-max-octets (ef)
91 (efx-max (ef-efx ef)))
92
93 (eval-when (:compile-toplevel :load-toplevel :execute)
94 (defun %merge-slots (old new)
95 (let* ((pos (length old))
96 (tmp (mapcar (lambda (x)
97 (let* ((name (if (consp x) (first x) x))
98 (init (if (consp x) (second x) nil))
99 (list (if (consp x) (nthcdr 2 x) nil))
100 (prev (assoc name old))
101 (posn (if prev (second prev) (1- (incf pos)))))
102 (list name posn init (getf list :type t))))
103 new)))
104 (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
105 :key #'second))))
106
107 ;;; DEFINE-EXTERNAL-FORMAT -- Public
108 ;;;
109 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
110 ;;; Define a new external format. Min/Max/Size are the minimum and
111 ;;; maximum number of octets that make up a character (:size N is just
112 ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
113 ;;; similar to defstruct.
114 ;;;
115 ;;; name (base) (&rest slots)
116 ;;; Define an external format based on a previously-defined external
117 ;;; format, Base. The slot names used in Slots must match those in Base.
118 ;;;
119 ;;; octets-to-code (state input unput &rest vars)
120 ;;; Defines a form to be used by the external format to convert
121 ;;; octets to a code point. State is a form that can be used by the
122 ;;; body to access the state variable of the stream. Input is a
123 ;;; form that can be used to read one more octets from the input
124 ;;; strema. Similarly, Unput is a form to put back one octet to the
125 ;;; input stream. Vars is a list of vars that need to be defined
126 ;;; for any symbols used within the form.
127 ;;;
128 ;;; This should return two values: the code and the number of octets
129 ;;; read to form the code.
130 ;;;
131 ;;; code-to-octets (code state output &rest vars)
132 ;;; Defines a form to be used by the external format to convert a
133 ;;; code point to octets for output. Code is the code point to be
134 ;;; converted. State is a form to access the current value of the
135 ;;; stream's state variable. Output is a form that writes one octet
136 ;;; to the output stream.
137 ;;;
138 ;;; Note: external-formats work on code-points, not
139 ;;; characters, so that the entire 31 bit ISO-10646 range can be
140 ;;; used internally regardless of the size of a character recognized
141 ;;; by Lisp and external formats can be useful to people who want to
142 ;;; process characters outside the Lisp range (see
143 ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
144 ;;;
145 (defmacro define-external-format (name (&rest args) (&rest slots)
146 &optional octets-to-code code-to-octets)
147 (when (and (oddp (length args)) (not (= (length args) 1)))
148 (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
149 (let* ((tmp (gensym))
150 (min (if (evenp (length args))
151 (or (getf args :min) (getf args :size) 1)
152 1))
153 (max (if (evenp (length args))
154 (or (getf args :max) (getf args :size) 6)
155 6))
156 (base (if (= (length args) 1)
157 (find-external-format (first args))
158 nil))
159 (bslotd (if base (ef-slotd base) nil))
160 (slotd (%merge-slots bslotd slots))
161 (slotb (loop for slot in slotd
162 collect `(,(first slot)
163 `(the ,',(fourth slot)
164 ;; IDENTITY is here to protect against SETF
165 (identity (svref %slots% ,',(second slot))))))))
166 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
167 `(lambda (,state ,input ,unput)
168 (declare (ignorable ,state ,input ,unput)
169 (optimize (ext:inhibit-warnings 3)))
170 (let (,@',slotb
171 (,input `(the (or (unsigned-byte 8) null) ,,input))
172 ,@(loop for var in vars collect `(,var (gensym))))
173 ,body)))
174 (code-to-octets ((code state output &rest vars) body)
175 `(lambda (,',tmp ,state ,output)
176 (declare (ignorable ,state ,output)
177 (optimize (ext:inhibit-warnings 3)))
178 (let (,@',slotb
179 (,code ',code)
180 ,@(loop for var in vars collect `(,var (gensym))))
181 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
182 (declare (ignorable ,',code))
183 ,,body)))))
184 (%intern-ef (make-external-format ,name
185 ,(if base
186 `(ef-efx (find-external-format ,(ef-name base)))
187 `(make-efx :octets-to-code ,octets-to-code
188 :code-to-octets ,code-to-octets
189 :cache (make-array +ef-max+
190 :initial-element nil)
191 :min ,(min min max) :max ,(max min max)))
192 nil
193 (let* ,(loop for x in slotd
194 collect (list (first x) (third x)))
195 (vector ,@(mapcar #'first slotd)))
196 ',slotd)))))
197
198 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
199 ;;;
200 ;;; A composing-external-format differs from an (ordinary) external-format
201 ;;; in that it translates characters (really codepoints, of course) into
202 ;;; other characters, rather than translating between characters and binary
203 ;;; octets. They have to be composed with a non-composing external-format
204 ;;; to be of any use.
205 ;;;
206 (defmacro define-composing-external-format (name (&key min max size)
207 input output)
208 (let ((tmp (gensym))
209 (min (or min size 1))
210 (max (or max size 1)))
211 `(macrolet ((input ((state input unput &rest vars) body)
212 `(lambda (,state ,input ,unput)
213 (declare (ignorable ,state ,input ,unput)
214 (optimize (ext:inhibit-warnings 3)))
215 (let ((,input `(the (values (or (unsigned-byte 21) null)
216 kernel:index)
217 ,,input))
218 ,@(loop for var in vars collect `(,var (gensym))))
219 ,body)))
220 (output ((code state output &rest vars) body)
221 `(lambda (,',tmp ,state ,output)
222 (declare (ignorable ,state ,output)
223 (optimize (ext:inhibit-warnings 3)))
224 (let ((,code ',code)
225 ,@(loop for var in vars collect `(,var (gensym))))
226 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
227 (declare (ignorable ,',code))
228 ,,body)))))
229 (%intern-ef (make-external-format ,name
230 (make-efx :octets-to-code ,input
231 :code-to-octets ,output
232 :min ,(min min max) :max ,(max min max))
233 t
234 #() '())))))
235
236 (defun load-external-format-aliases ()
237 (let ((*package* (find-package "KEYWORD"))
238 (unix::*filename-encoding* :iso8859-1))
239 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
240 :external-format :iso8859-1)
241 (when stm
242 (do ((alias (read stm nil stm) (read stm nil stm))
243 (value (read stm nil stm) (read stm nil stm)))
244 ((or (eq alias stm) (eq value stm))
245 (unless (eq alias stm)
246 (warn "External-format aliases file ends early.")))
247 (if (and (keywordp alias) (or (keywordp value)
248 (and (consp value)
249 (every #'keywordp value))))
250 (setf (gethash alias *external-format-aliases*) value)
251 (warn "Bad entry in external-format aliases file: ~S => ~S."
252 alias value)))))))
253
254 (defun %find-external-format (name)
255 ;; avoid loading files, etc., early in the boot sequence
256 (when (or (eq name :iso8859-1)
257 (and (eq name :default) (eq *default-external-format* :iso8859-1)))
258 (return-from %find-external-format
259 (gethash :iso8859-1 *external-formats*)))
260
261 (when (zerop (hash-table-count *external-format-aliases*))
262 (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
263 (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
264 (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
265 (load-external-format-aliases))
266
267 (do ((tmp (gethash name *external-format-aliases*)
268 (gethash tmp *external-format-aliases*))
269 (cnt 0 (1+ cnt)))
270 ((or (null tmp) (= cnt 50))
271 (unless (null tmp)
272 (error "External-format aliasing depth exceeded.")))
273 (setq name tmp))
274
275 (or (gethash name *external-formats*)
276 (and (consp name) (find-external-format name))
277 (and (let ((*package* (find-package "STREAM"))
278 (lisp::*enable-package-locked-errors* nil)
279 (s (open (format nil "ext-formats:~(~A~).lisp" name)
280 :if-does-not-exist nil :external-format :iso8859-1)))
281 (when s
282 (null (nth-value 1 (ext:compile-from-stream s)))))
283 (gethash name *external-formats*))))
284
285 (defun %composed-ef-name (a b)
286 (if (consp a) (append a (list b)) (list a b)))
287
288 (defun %compose-external-formats (a b)
289 (when (ef-composingp a)
290 (error "~S is a Composing-External-Format." (ef-name a)))
291 (unless (ef-composingp b)
292 (error "~S is not a Composing-External-Format." (ef-name b)))
293 (make-external-format
294 (%composed-ef-name (ef-name a) (ef-name b))
295 (make-efx
296 :octets-to-code (lambda (state input unput)
297 (let ((nstate (gensym "STATE-")))
298 `(let ((,nstate ,state))
299 (when (null ,nstate)
300 (setq ,nstate (setf ,state (cons nil nil))))
301 ,(funcall (ef-octets-to-code b) `(car ,nstate)
302 (funcall (ef-octets-to-code a)
303 `(cdr ,nstate) input unput)
304 unput))))
305 :code-to-octets (lambda (code state output)
306 (let ((nstate (gensym "STATE-")))
307 `(let ((,nstate ,state))
308 (when (null ,nstate)
309 (setq ,nstate (setf ,state (cons nil nil))))
310 ,(funcall (ef-code-to-octets b) code `(car ,nstate)
311 `(lambda (x)
312 ,(funcall (ef-code-to-octets a)
313 'x `(cdr ,nstate) output))))))
314 :cache (make-array +ef-max+ :initial-element nil)
315 :min (* (ef-min-octets a) (ef-min-octets b))
316 :max (* (ef-max-octets a) (ef-max-octets b)))
317 nil #() '()))
318
319 (defun find-external-format (name &optional (error-p t))
320 (when (external-format-p name)
321 (return-from find-external-format name))
322
323 (or (if (consp name) (every #'keywordp name) (keywordp name))
324 (error "~S is not a valid external format name." name))
325
326 (when (eq name :default)
327 (setq name *default-external-format*))
328
329 (when (and (consp name) (not (cdr name)))
330 (setq name (car name)))
331
332 (flet ((not-found ()
333 (when (equal *default-external-format* name)
334 (setq *default-external-format* :iso8859-1))
335 (if error-p (error "External format ~S not found." name) nil)))
336 (if (consp name)
337 (let ((efs (mapcar #'%find-external-format name)))
338 (if (member nil efs)
339 (not-found)
340 (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
341 (or (gethash name *external-formats*)
342 (%intern-ef (reduce #'%compose-external-formats efs))))))
343 (or (%find-external-format name) (not-found)))))
344
345 (defun flush-external-formats ()
346 (maphash (lambda (name ef)
347 (declare (ignore name))
348 (fill (ef-cache ef) nil))
349 *external-formats*))
350
351 (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
352
353 (defun invert-table (table)
354 (declare (type (or (simple-array (unsigned-byte 31) *)
355 (simple-array (unsigned-byte 16) *))
356 table)
357 (optimize (speed 3) (space 0) (safety 0) (debug 0)
358 (ext:inhibit-warnings 3)))
359 (or (gethash table *.table-inverse.*)
360 (let* ((mbits (if (= (array-total-size table) 128) 7 8))
361 (lbits (cond ((> (array-total-size table) 256) 3)
362 ((< (array-total-size table) 100) 6)
363 (t 5)))
364 (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
365 :element-type '(unsigned-byte 16)
366 :initial-element #xFFFF))
367 (mvec (make-array 0 :element-type '(unsigned-byte 16)))
368 (lvec (make-array 0 :element-type '(unsigned-byte 16)))
369 (width (array-dimension table 0))
370 (power (1- (array-rank table)))
371 (base (if (= width 94) 1 0))
372 hx mx lx)
373 (assert (and (< power 2) (<= width 256)))
374 (dotimes (i (array-total-size table))
375 (declare (type (integer 0 (#.array-dimension-limit)) i))
376 (let ((tmp i) (val (row-major-aref table i)) (z 0))
377 (declare (type (integer 0 (#.array-dimension-limit)) tmp)
378 (type (unsigned-byte 16) z))
379 (unless (= val #xFFFE)
380 (when (plusp power)
381 (multiple-value-bind (x y) (floor tmp width)
382 (setq tmp x)
383 (setq z (logior z (ash (the (integer 0 255) (+ y base))
384 (the (integer 0 24)
385 (* 8 power)))))))
386 (setq hx (ash val (- 0 mbits lbits)))
387 (when (= (aref hvec hx) #xFFFF)
388 (setf (aref hvec hx) (length mvec))
389 (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
390 :element-type '(unsigned-byte 16)
391 :initial-element #xFFFF)))
392 (replace tmp mvec)
393 (setq mvec tmp)))
394 (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
395 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
396 (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
397 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
398 :element-type '(unsigned-byte 16)
399 :initial-element #xFFFF)))
400 (replace tmp lvec)
401 (setq lvec tmp)))
402 (setq lx (logand val (lognot (ash -1 lbits))))
403 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
404 (logior z (+ tmp base))))))
405 (setf (gethash table *.table-inverse.*)
406 (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
407 :hvec hvec :mvec mvec :lvec lvec)))))
408
409 (declaim (inline get-inverse))
410 (defun get-inverse (ntrie code)
411 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
412 (let ((n (lisp::qref ntrie code)))
413 (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
414 (if (= m #xFFFF) nil m)))))
415
416
417 (define-condition void-external-format (error)
418 ()
419 (:report
420 (lambda (condition stream)
421 (declare (ignore condition))
422 (format stream "Attempting I/O through void external-format."))))
423
424 (define-external-format :void (:size 0) ()
425 (octets-to-code (state input unput)
426 `(error 'void-external-format))
427 (code-to-octets (code state output)
428 `(error 'void-external-format)))
429
430 (define-external-format :iso8859-1 (:size 1) ()
431 (octets-to-code (state input unput)
432 `(values ,input 1))
433 (code-to-octets (code state output)
434 `(,output (if (> ,code 255) #x3F ,code))))
435
436 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
437 ;;;
438 ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
439 ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
440 ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
441 ;;; to read or write texts containing characters not supported by your Lisp,
442 ;;; these macros can be used instead.
443 (defmacro octets-to-codepoint (external-format state count input unput)
444 (let ((tmp1 (gensym)) (tmp2 (gensym))
445 (ef (find-external-format external-format)))
446 `(multiple-value-bind (,tmp1 ,tmp2)
447 ,(funcall (ef-octets-to-code ef) state input unput)
448 (setf ,count (the kernel:index ,tmp2))
449 (the (or (unsigned-byte 21) null) ,tmp1))))
450
451 (defmacro codepoint-to-octets (external-format code state output)
452 (let ((ef (find-external-format external-format)))
453 (funcall (ef-code-to-octets ef) code state output)))
454
455
456
457 (defvar *ef-base* +ef-max+)
458 (defvar *ef-extensions* '())
459
460 (defun ensure-cache (ef id reqd)
461 (let ((base (or (getf *ef-extensions* id)
462 (setf (getf *ef-extensions* id)
463 (prog1 *ef-base* (incf *ef-base* reqd))))))
464 (when (< (length (ef-cache ef)) (+ base reqd))
465 (setf (efx-cache (ef-efx ef))
466 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
467 base))
468
469 ;;; DEF-EF-MACRO -- Public
470 ;;;
471 ;;;
472 (defmacro def-ef-macro (name (ef id reqd idx) body)
473 (let* ((tmp1 (gensym))
474 (tmp2 (gensym))
475 (blknm (nth-value 1 (lisp::valid-function-name-p name)))
476 (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
477 `(progn
478 (defun ,%name (,ef)
479 (let* ((,tmp1 (find-external-format ,ef))
480 (,tmp2 ,(if (eq id 'lisp::lisp)
481 idx
482 `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
483 (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
484 (setf (aref (ef-cache ,tmp1) ,tmp2)
485 (let ((*compile-print* nil)
486 ;; Set default format when we compile so we
487 ;; can see compiler messages. If we don't,
488 ;; we run into a problem that we might be
489 ;; changing the default format while we're
490 ;; compiling, and we don't know how to output
491 ;; the compiler messages.
492 #|(*default-external-format* :iso8859-1)|#)
493 (compile nil `(lambda (%slots%)
494 (declare (ignorable %slots%))
495 (block ,',blknm
496 ,,body))))))
497 (ef-slots ,tmp1))))
498 (declaim (inline ,name))
499 (defun ,name (,tmp1)
500 (let ((,tmp2 (load-time-value (cons nil nil))))
501 (when (eq ,tmp1 :default)
502 (setq ,tmp1 *default-external-format*))
503 (if (eq ,tmp1 (car ,tmp2))
504 (cdr ,tmp2)
505 (setf (car ,tmp2) ,tmp1
506 (cdr ,tmp2) (,%name ,tmp1))))))))
507
508
509
510 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
511 ;;;
512 ;;; Read and write one character through an external-format
513 ;;;
514 (defmacro octets-to-char (external-format state count input unput)
515 (let ((nstate (gensym)))
516 `(let ((,nstate ,state))
517 (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
518 (if (car ,nstate)
519 ;; Return the trailing surrgate. Must set count to 0 to
520 ;; tell the stream code we didn't consume any octets!
521 (prog1 (the character (car ,nstate))
522 (setf (car ,nstate) nil ,count 0))
523 (let ((code (octets-to-codepoint ,external-format
524 (cdr ,nstate) ,count ,input ,unput)))
525 (declare (type (unsigned-byte 21) code))
526 ;;@@ on non-Unicode builds, limit to 8-bit chars
527 ;;@@ if unicode-bootstrap, can't use #\u+fffd
528 (cond ((or (<= #xD800 code #xDFFF) (> code #x10FFFF))
529 #-(and unicode (not unicode-bootstrap)) #\?
530 #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
531 #+unicode
532 ((> code #xFFFF)
533 (multiple-value-bind (hi lo) (surrogates code)
534 (setf (car ,nstate) lo)
535 hi))
536 (t (code-char code))))))))
537
538 (defmacro char-to-octets (external-format char state output)
539 (let ((nchar (gensym))
540 (nstate (gensym))
541 (wryte (gensym))
542 (ch (gensym)))
543 `(let ((,nchar ,char)
544 (,nstate ,state))
545 (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
546 (if (lisp::surrogatep (char-code ,nchar) :high)
547 (setf (car ,nstate) ,nchar)
548 (flet ((,wryte (,ch)
549 (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
550 ,output)))
551 (declare (dynamic-extent #',wryte))
552 (if (car ,nstate)
553 (prog1
554 ;; Invalid surrogate sequences get replaced with
555 ;; the replacement character.
556 (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
557 (surrogates-to-codepoint (car ,nstate) ,nchar)
558 +replacement-character-code+))
559 (setf (car ,nstate) nil))
560 ;; A lone trailing (low) surrogate gets replaced with
561 ;; the replacement character.
562 (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
563 +replacement-character-code+
564 (char-code ,nchar)))))))))
565
566 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
567 `(lambda (string start end buffer &aux (ptr 0) (state nil))
568 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
569 (type simple-string string)
570 (type kernel:index start end ptr)
571 (type (simple-array (unsigned-byte 8) (*)) buffer)
572 (ignorable state))
573 (dotimes (i (- end start) (values buffer ptr))
574 (declare (type kernel:index i))
575 (char-to-octets ,extfmt (schar string (+ start i)) state
576 (lambda (b)
577 (when (= ptr (length buffer))
578 (setq buffer (adjust-array buffer (* 2 ptr))))
579 (setf (aref buffer (1- (incf ptr))) b))))))
580
581 (defun string-to-octets (string &key (start 0) end (external-format :default)
582 (buffer nil bufferp))
583 (declare (type string string)
584 (type kernel:index start)
585 (type (or kernel:index null) end)
586 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
587 (let* ((buffer (or buffer (make-array (length string)
588 :element-type '(unsigned-byte 8)))))
589 (multiple-value-bind (buffer ptr)
590 (lisp::with-array-data ((string string) (start start) (end end))
591 (funcall (ef-string-to-octets external-format)
592 string start end buffer))
593 (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
594
595 (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
596 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil))
597 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
598 (type (simple-array (unsigned-byte 8) (*)) octets)
599 (type kernel:index end count)
600 (type (integer -1 (#.array-dimension-limit)) ptr pos)
601 (type simple-string string)
602 (ignorable state))
603 (loop until (>= ptr end)
604 do (when (= pos (length string))
605 (setq string (adjust-array string (* 2 pos))))
606 (setf (schar string (incf pos))
607 (octets-to-char ,extfmt state count
608 (aref octets (incf ptr)) ;;@@ EOF??
609 (lambda (n) (decf ptr n))))
610 finally (return (values string (1+ pos))))))
611
612 (defun octets-to-string (octets &key (start 0) end (external-format :default)
613 (string nil stringp))
614 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
615 (type kernel:index start)
616 (type (or kernel:index null) end)
617 (type (or simple-string null) string))
618 (multiple-value-bind (string pos)
619 (funcall (ef-octets-to-string external-format)
620 octets (1- start) (1- (or end (length octets)))
621 (or string (make-string (length octets))))
622 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
623
624
625
626 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
627 `(lambda (string start end result &aux (ptr 0) (state nil))
628 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
629 (type simple-string string)
630 (type kernel:index start end ptr)
631 (type simple-base-string result)
632 (ignorable state))
633 (dotimes (i (- end start) (values result ptr))
634 (declare (type kernel:index i))
635 (char-to-octets ,extfmt (schar string (+ start i)) state
636 (lambda (b)
637 (when (= ptr (length result))
638 (setq result (adjust-array result (* 2 ptr))))
639 (setf (aref result (1- (incf ptr)))
640 (code-char b)))))))
641
642 (defun string-encode (string external-format &optional (start 0) end)
643 "Encode the given String using External-Format and return a new
644 string. The characters of the new string are the octets of the
645 encoded result, with each octet converted to a character via
646 code-char. This is the inverse to String-Decode"
647 (when (zerop (length string))
648 (return-from string-encode string))
649 (multiple-value-bind (result ptr)
650 (lisp::with-array-data ((string string) (start start) (end end))
651 (funcall (ef-encode external-format) string start end
652 (make-string (length string) :element-type 'base-char)))
653 (lisp::shrink-vector result ptr)))
654
655 (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
656 `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
657 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
658 (type simple-string string)
659 (type kernel:index end count)
660 (type (integer -1 (#.array-dimension-limit)) ptr pos)
661 (type simple-string result)
662 (ignorable state))
663 (loop until (>= ptr end)
664 ;; increasing size of result shouldn't ever be necessary, unless
665 ;; someone implements an encoding smaller than the source string...
666 do (setf (schar result (incf pos))
667 (octets-to-char ,extfmt state count
668 ;; note the need to return NIL for EOF
669 (if (= (1+ ptr) (length string))
670 nil
671 (char-code (char string (incf ptr))))
672 (lambda (n) (decf ptr n))))
673 finally (return (values result (1+ pos))))))
674
675 (defun string-decode (string external-format &optional (start 0) end)
676 "Decode String using the given External-Format and return the new
677 string. The input string is treated as if it were an array of
678 octets, where the char-code of each character is the octet. This is
679 the inverse of String-Encode."
680 (when (zerop (length string))
681 (return-from string-decode string))
682 (multiple-value-bind (result pos)
683 (lisp::with-array-data ((string string) (start start) (end end))
684 (funcall (ef-decode external-format)
685 string (1- start) (1- end) (make-string (length string))))
686 (lisp::shrink-vector result pos)))
687
688
689 (defun set-system-external-format (terminal &optional filenames)
690 (unless (find-external-format terminal)
691 (error "Can't find external-format ~S." terminal))
692 (setf (stream-external-format sys:*stdin*) terminal
693 (stream-external-format sys:*stdout*) terminal
694 (stream-external-format sys:*stderr*) terminal)
695 (when (lisp::fd-stream-p sys:*tty*)
696 (setf (stream-external-format sys:*tty*) terminal))
697 (when filenames
698 (unless (find-external-format filenames)
699 (error "Can't find external-format ~S." filenames))
700 (when (and unix::*filename-encoding*
701 (not (eq unix::*filename-encoding* filenames)))
702 (cerror "Change it anyway."
703 "The external-format for encoding filenames is already set.")
704 (setq unix::*filename-encoding* filenames)))
705 t)
706
707
708 ;; Despite its name, this doesn't actually compile anything at all. What it
709 ;; does is expand into a lambda expression that can be compiled by the file
710 ;; compiler.
711 (defmacro precompile-ef-slot (ef slot)
712 (let* ((ef (find-external-format ef)))
713 ;; if there's no lambda expression available, flush it and regenerate
714 (unless (and (aref (ef-cache ef) slot)
715 (function-lambda-expression (aref (ef-cache ef) slot)))
716 (setf (aref (ef-cache ef) slot) nil)
717 (ecase slot
718 (#.+ef-cin+ (lisp::%ef-cin ef))
719 (#.+ef-cout+ (lisp::%ef-cout ef))
720 (#.+ef-sout+ (lisp::%ef-sout ef))
721 (#.+ef-os+ (%ef-octets-to-string ef))
722 (#.+ef-so+ (%ef-string-to-octets ef))
723 (#.+ef-en+ (%ef-encode ef))
724 (#.+ef-de+ (%ef-decode ef))))
725 `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
726 ,(subst (ef-name ef) ef
727 (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5