/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations)
Thu Jun 11 16:03:57 2009 UTC (4 years, 10 months ago) by rtoy
Branch: MAIN
CVS Tags: merged-unicode-utf16-extfmt-2009-06-11, portable-clx-import-2009-06-16, portable-clx-base
Branch point for: portable-clx-branch
Changes since 1.5: +421 -153 lines
Merge Unicode work to trunk.  From label
unicode-utf16-extfmt-2009-06-11.
1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written by Paul Foley and has been placed in the public
5 ;;; domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.6 2009/06/11 16:03:57 rtoy Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Implementation of external-formats
13
14 (in-package "STREAM")
15
16 (export '(string-to-octets octets-to-string *default-external-format*
17 string-encode string-decode))
18
19 (defvar *default-external-format* :iso8859-1)
20
21 (defvar *external-formats* (make-hash-table :test 'equal))
22 (defvar *external-format-aliases* (make-hash-table))
23
24 (defconstant +ef-cin+ 2)
25 (defconstant +ef-cout+ 3)
26 (defconstant +ef-sin+ 4)
27 (defconstant +ef-sout+ 5)
28 (defconstant +ef-os+ 6)
29 (defconstant +ef-so+ 7)
30 (defconstant +ef-en+ 8)
31 (defconstant +ef-de+ 9)
32 (defconstant +ef-max+ 10)
33
34 ;; Unicode replacement character U+FFFD
35 (defconstant +replacement-character-code+ #xFFFD)
36
37 (define-condition external-format-not-implemented (error)
38 ()
39 (:report
40 (lambda (condition stream)
41 (declare (ignore condition))
42 (format stream "Attempting unimplemented external-format I/O."))))
43
44 (defun %efni (a b c d)
45 (declare (ignore a b c d))
46 (error 'external-format-not-implemented))
47
48 (defstruct efx
49 (octets-to-code #'%efni :type function :read-only t)
50 (code-to-octets #'%efni :type function :read-only t)
51 (cache nil :type (or null simple-vector))
52 (min 1 :type kernel:index :read-only t)
53 (max 1 :type kernel:index :read-only t))
54
55 (defstruct (external-format
56 (:conc-name ef-)
57 (:print-function %print-external-format)
58 (:constructor make-external-format (name efx composingp
59 &optional slots slotd)))
60 (name (ext:required-argument) :type (or keyword cons) :read-only t)
61 (efx (ext:required-argument) :type efx :read-only t)
62 (composingp (ext:required-argument) :type boolean :read-only t)
63 (slots #() :type simple-vector :read-only t)
64 (slotd nil :type list :read-only t))
65
66 (defun %print-external-format (ef stream depth)
67 (declare (ignore depth))
68 (print-unreadable-object (ef stream :type t :identity t)
69 (princ (ef-name ef) stream)))
70
71 (defun %intern-ef (ef)
72 (setf (gethash (ef-name ef) *external-formats*) ef))
73
74 (declaim (inline ef-octets-to-code ef-code-to-octets ef-cache
75 ef-min-octets ef-max-octets))
76
77 (defun ef-octets-to-code (ef)
78 (efx-octets-to-code (ef-efx ef)))
79
80 (defun ef-code-to-octets (ef)
81 (efx-code-to-octets (ef-efx ef)))
82
83 (defun ef-cache (ef)
84 (efx-cache (ef-efx ef)))
85
86 (defun ef-min-octets (ef)
87 (efx-min (ef-efx ef)))
88
89 (defun ef-max-octets (ef)
90 (efx-max (ef-efx ef)))
91
92 (eval-when (:compile-toplevel :load-toplevel :execute)
93 (defun %merge-slots (old new)
94 (let* ((pos (length old))
95 (tmp (mapcar (lambda (x)
96 (let* ((name (if (consp x) (first x) x))
97 (init (if (consp x) (second x) nil))
98 (list (if (consp x) (nthcdr 2 x) nil))
99 (prev (assoc name old))
100 (posn (if prev (second prev) (1- (incf pos)))))
101 (list name posn init (getf list :type t))))
102 new)))
103 (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
104 :key #'second))))
105
106 ;;; DEFINE-EXTERNAL-FORMAT -- Public
107 ;;;
108 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
109 ;;; Define a new external format. Min/Max/Size are the minimum and
110 ;;; maximum number of octets that make up a character (:size N is just
111 ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
112 ;;; similar to defstruct.
113 ;;;
114 ;;; name (base) (&rest slots)
115 ;;; Define an external format based on a previously-defined external
116 ;;; format, Base. The slot names used in Slots must match those in Base.
117 ;;;
118 ;;; octets-to-code (state input unput &rest vars)
119 ;;; Defines a form to be used by the external format to convert
120 ;;; octets to a code point. State is a form that can be used by the
121 ;;; body to access the state variable of the stream. Input is a
122 ;;; form that can be used to read one more octets from the input
123 ;;; strema. Similarly, Unput is a form to put back one octet to the
124 ;;; input stream. Vars is a list of vars that need to be defined
125 ;;; for any symbols used within the form.
126 ;;;
127 ;;; This should return two values: the code and the number of octets
128 ;;; read to form the code.
129 ;;;
130 ;;; code-to-octets (code state output &rest vars)
131 ;;; Defines a form to be used by the external format to convert a
132 ;;; code point to octets for output. Code is the code point to be
133 ;;; converted. State is a form to access the current value of the
134 ;;; stream's state variable. Output is a form that writes one octet
135 ;;; to the output stream.
136 ;;;
137 ;;; Note: external-formats work on code-points, not
138 ;;; characters, so that the entire 31 bit ISO-10646 range can be
139 ;;; used internally regardless of the size of a character recognized
140 ;;; by Lisp and external formats can be useful to people who want to
141 ;;; process characters outside the Lisp range (see
142 ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
143 ;;;
144 (defmacro define-external-format (name (&rest args) (&rest slots)
145 &optional octets-to-code code-to-octets)
146 (when (and (oddp (length args)) (not (= (length args) 1)))
147 (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
148 (let* ((tmp (gensym))
149 (min (if (evenp (length args))
150 (or (getf args :min) (getf args :size) 1)
151 1))
152 (max (if (evenp (length args))
153 (or (getf args :max) (getf args :size) 6)
154 6))
155 (base (if (= (length args) 1)
156 (find-external-format (first args))
157 nil))
158 (bslotd (if base (ef-slotd base) nil))
159 (slotd (%merge-slots bslotd slots))
160 (slotb (loop for slot in slotd
161 collect `(,(first slot)
162 `(the ,',(fourth slot)
163 ;; IDENTITY is here to protect against SETF
164 (identity (svref %slots% ,',(second slot))))))))
165 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
166 `(lambda (,state ,input ,unput)
167 (declare (ignorable ,state ,input ,unput)
168 (optimize (ext:inhibit-warnings 3)))
169 (let (,@',slotb
170 (,input `(the (or (unsigned-byte 8) null) ,,input))
171 ,@(loop for var in vars collect `(,var (gensym))))
172 ,body)))
173 (code-to-octets ((code state output &rest vars) body)
174 `(lambda (,',tmp ,state ,output)
175 (declare (ignorable ,state ,output)
176 (optimize (ext:inhibit-warnings 3)))
177 (let (,@',slotb
178 (,code ',code)
179 ,@(loop for var in vars collect `(,var (gensym))))
180 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
181 (declare (ignorable ,',code))
182 ,,body)))))
183 (%intern-ef (make-external-format ,name
184 ,(if base
185 `(ef-efx (find-external-format ,(ef-name base)))
186 `(make-efx :octets-to-code ,octets-to-code
187 :code-to-octets ,code-to-octets
188 :cache (make-array +ef-max+
189 :initial-element nil)
190 :min ,(min min max) :max ,(max min max)))
191 nil
192 (let* ,(loop for x in slotd
193 collect (list (first x) (third x)))
194 (vector ,@(mapcar #'first slotd)))
195 ',slotd)))))
196
197 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
198 ;;;
199 ;;; A composing-external-format differs from an (ordinary) external-format
200 ;;; in that it translates characters (really codepoints, of course) into
201 ;;; other characters, rather than translating between characters and binary
202 ;;; octets. They have to be composed with a non-composing external-format
203 ;;; to be of any use.
204 ;;;
205 (defmacro define-composing-external-format (name (&key min max size)
206 input output)
207 (let ((tmp (gensym))
208 (min (or min size 1))
209 (max (or max size 1)))
210 `(macrolet ((input ((state input unput &rest vars) body)
211 `(lambda (,state ,input ,unput)
212 (declare (ignorable ,state ,input ,unput)
213 (optimize (ext:inhibit-warnings 3)))
214 (let ((,input `(the (values (or (unsigned-byte 21) null)
215 kernel:index)
216 ,,input))
217 ,@(loop for var in vars collect `(,var (gensym))))
218 ,body)))
219 (output ((code state output &rest vars) body)
220 `(lambda (,',tmp ,state ,output)
221 (declare (ignorable ,state ,output)
222 (optimize (ext:inhibit-warnings 3)))
223 (let ((,code ',code)
224 ,@(loop for var in vars collect `(,var (gensym))))
225 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
226 (declare (ignorable ,',code))
227 ,,body)))))
228 (%intern-ef (make-external-format ,name
229 (make-efx :octets-to-code ,input
230 :code-to-octets ,output
231 :min ,(min min max) :max ,(max min max))
232 t
233 #() '())))))
234
235 (defun load-external-format-aliases ()
236 (let ((*package* (find-package "KEYWORD"))
237 (unix::*filename-encoding* :iso8859-1))
238 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil :external-format :iso8859-1)
239 (when stm
240 (do ((alias (read stm nil stm) (read stm nil stm))
241 (value (read stm nil stm) (read stm nil stm)))
242 ((or (eq alias stm) (eq value stm))
243 (unless (eq alias stm)
244 (warn "External-format aliases file ends early.")))
245 (if (and (keywordp alias) (keywordp value))
246 (setf (gethash alias *external-format-aliases*) value)
247 (warn "Bad entry in external-format aliases file: ~S => ~S."
248 alias value)))))))
249
250 (defun %find-external-format (name)
251 ;; avoid loading files, etc., early in the boot sequence
252 (when (or (eq name :iso8859-1)
253 (and (eq name :default) (eq *default-external-format* :iso8859-1)))
254 (return-from %find-external-format
255 (gethash :iso8859-1 *external-formats*)))
256
257 (when (zerop (hash-table-count *external-format-aliases*))
258 (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
259 (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
260 (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
261 (load-external-format-aliases))
262
263 (do ((tmp (gethash name *external-format-aliases*)
264 (gethash tmp *external-format-aliases*))
265 (cnt 0 (1+ cnt)))
266 ((or (null tmp) (= cnt 50))
267 (unless (null tmp)
268 (error "External-format aliasing depth exceeded.")))
269 (setq name tmp))
270
271 (or (gethash name *external-formats*)
272 (and (let ((*package* (find-package "STREAM"))
273 (lisp::*enable-package-locked-errors* nil)
274 (*default-external-format* :iso8859-1)
275 (unix::*filename-encoding* :iso8859-1)
276 (s (open (format nil "ext-formats:~(~A~).lisp" name) :if-does-not-exist nil
277 :external-format :iso8859-1)))
278 (when s
279 (null (nth-value 1 (ext:compile-from-stream s)))))
280 (gethash name *external-formats*))))
281
282 (defun %composed-ef-name (a b)
283 (if (consp a) (append a (list b)) (list a b)))
284
285 (defun %compose-external-formats (a b)
286 (when (ef-composingp a)
287 (error "~S is a Composing-External-Format." (ef-name a)))
288 (unless (ef-composingp b)
289 (error "~S is not a Composing-External-Format." (ef-name b)))
290 (make-external-format
291 (%composed-ef-name (ef-name a) (ef-name b))
292 (make-efx
293 :octets-to-code (lambda (state input unput)
294 (funcall (ef-octets-to-code b) state
295 (funcall (ef-octets-to-code a)
296 state input unput)
297 unput))
298 :code-to-octets (lambda (code state output)
299 (funcall (ef-code-to-octets b) code state
300 `(lambda (x)
301 ,(funcall (ef-code-to-octets a)
302 'x state output))))
303 :cache (make-array +ef-max+ :initial-element nil)
304 :min (* (ef-min-octets a) (ef-min-octets b))
305 :max (* (ef-max-octets a) (ef-max-octets b)))
306 nil #() '()))
307
308 (defun find-external-format (name &optional (error-p t))
309 (when (external-format-p name)
310 (return-from find-external-format name))
311
312 (or (if (consp name) (every #'keywordp name) (keywordp name))
313 (error "~S is not a valid external format name." name))
314
315 (when (eq name :default)
316 (setq name *default-external-format*))
317
318 (when (and (consp name) (not (cdr name)))
319 (setq name (car name)))
320
321 (flet ((not-found ()
322 (when (equal *default-external-format* name)
323 (setq *default-external-format* :iso8859-1))
324 (if error-p (error "External format ~S not found." name) nil)))
325 (if (consp name)
326 (let ((efs (mapcar #'%find-external-format name)))
327 (if (member nil efs)
328 (not-found)
329 (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
330 (or (gethash name *external-formats*)
331 (%intern-ef (reduce #'%compose-external-formats efs))))))
332 (or (%find-external-format name) (not-found)))))
333
334 (defun flush-external-formats ()
335 (maphash (lambda (name ef)
336 (declare (ignore name))
337 (fill (ef-cache ef) nil))
338 *external-formats*))
339
340 (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
341
342 (defun invert-table (table)
343 (declare (type (or (simple-array (unsigned-byte 31) *)
344 (simple-array (unsigned-byte 16) *))
345 table)
346 (optimize (speed 3) (space 0) (safety 0) (debug 0)
347 (ext:inhibit-warnings 3)))
348 (or (gethash table *.table-inverse.*)
349 (let* ((mbits (if (= (array-total-size table) 128) 7 8))
350 (lbits (cond ((> (array-total-size table) 256) 3)
351 ((< (array-total-size table) 100) 6)
352 (t 5)))
353 (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
354 :element-type '(unsigned-byte 16)
355 :initial-element #xFFFF))
356 (mvec (make-array 0 :element-type '(unsigned-byte 16)))
357 (lvec (make-array 0 :element-type '(unsigned-byte 16)))
358 (width (array-dimension table 0))
359 (power (1- (array-rank table)))
360 (base (if (= width 94) 1 0))
361 hx mx lx)
362 (assert (and (< power 2) (<= width 256)))
363 (dotimes (i (array-total-size table))
364 (declare (type (integer 0 (#.array-dimension-limit)) i))
365 (let ((tmp i) (val (row-major-aref table i)) (z 0))
366 (declare (type (integer 0 (#.array-dimension-limit)) tmp)
367 (type (unsigned-byte 16) z))
368 (unless (= val #xFFFE)
369 (when (plusp power)
370 (multiple-value-bind (x y) (floor tmp width)
371 (setq tmp x)
372 (setq z (logior z (ash (the (integer 0 255) (+ y base))
373 (the (integer 0 24)
374 (* 8 power)))))))
375 (setq hx (ash val (- 0 mbits lbits)))
376 (when (= (aref hvec hx) #xFFFF)
377 (setf (aref hvec hx) (length mvec))
378 (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
379 :element-type '(unsigned-byte 16)
380 :initial-element #xFFFF)))
381 (replace tmp mvec)
382 (setq mvec tmp)))
383 (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
384 (when (= (aref mvec (+ hx mx)) #xFFFF)
385 (setf (aref mvec (+ hx mx)) (length lvec))
386 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
387 :element-type '(unsigned-byte 16)
388 :initial-element #xFFFF)))
389 (replace tmp lvec)
390 (setq lvec tmp)))
391 (setq lx (logand val (lognot (ash -1 lbits))))
392 (setf (aref lvec (+ (aref mvec (+ hx mx)) lx))
393 (logior z (+ tmp base))))))
394 (setf (gethash table *.table-inverse.*)
395 (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
396 :hvec hvec :mvec mvec :lvec lvec)))))
397
398 (declaim (inline get-inverse))
399 (defun get-inverse (ntrie code)
400 (declare (type lisp::ntrie16 ntrie) (type lisp:codepoint code))
401 (let ((n (lisp::qref ntrie code)))
402 (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
403 (if (= m #xFFFF) nil m)))))
404
405
406 (define-condition void-external-format (error)
407 ()
408 (:report
409 (lambda (condition stream)
410 (declare (ignore condition))
411 (format stream "Attempting I/O through void external-format."))))
412
413 (define-external-format :void (:size 0) ()
414 (octets-to-code (state input unput)
415 `(error 'void-external-format))
416 (code-to-octets (code state output)
417 `(error 'void-external-format)))
418
419 (define-external-format :iso8859-1 (:size 1) ()
420 (octets-to-code (state input unput)
421 `(values ,input 1))
422 (code-to-octets (code state output)
423 `(,output (if (> ,code 255) #x3F ,code))))
424
425 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
426 ;;;
427 ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
428 ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
429 ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
430 ;;; to read or write texts containing characters not supported by your Lisp,
431 ;;; these macros can be used instead.
432 (defmacro octets-to-codepoint (external-format state count input unput)
433 (let ((tmp1 (gensym)) (tmp2 (gensym))
434 (ef (find-external-format external-format)))
435 `(multiple-value-bind (,tmp1 ,tmp2)
436 ,(funcall (ef-octets-to-code ef) state input unput)
437 (setf ,count (the kernel:index ,tmp2))
438 (the (or (unsigned-byte 21) null) ,tmp1))))
439
440 (defmacro codepoint-to-octets (external-format code state output)
441 (let ((ef (find-external-format external-format)))
442 (funcall (ef-code-to-octets ef) code state output)))
443
444
445
446 (defvar *ef-base* +ef-max+)
447 (defvar *ef-extensions* '())
448
449 (defun ensure-cache (ef id reqd)
450 (let ((base (or (getf *ef-extensions* id)
451 (setf (getf *ef-extensions* id)
452 (prog1 *ef-base* (incf *ef-base* reqd))))))
453 (when (< (length (ef-cache ef)) (+ base reqd))
454 (setf (efx-cache (ef-efx ef))
455 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
456 base))
457
458 ;;; DEF-EF-MACRO -- Public
459 ;;;
460 ;;;
461 (defmacro def-ef-macro (name (ef id reqd idx) body)
462 (let ((tmp1 (gensym))
463 (tmp2 (gensym))
464 (%name (intern (format nil "%~A" name) (symbol-package name))))
465 `(progn
466 (defun ,%name (,ef)
467 (let* ((,tmp1 (find-external-format ,ef))
468 (,tmp2 ,(if (eq id 'lisp::lisp)
469 idx
470 `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
471 (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
472 (setf (aref (ef-cache ,tmp1) ,tmp2)
473 (let ((*compile-print* nil)
474 ;; Set default format when we compile so we
475 ;; can see compiler messages. If we don't,
476 ;; we run into a problem that we might be
477 ;; changing the default format while we're
478 ;; compiling, and we don't know how to output
479 ;; the compiler messages.
480 (*default-external-format* :iso8859-1))
481 (compile nil `(lambda (%slots%)
482 (declare (ignorable %slots%))
483 ,,body)))))
484 (ef-slots ,tmp1))))
485 (declaim (inline ,name))
486 (defun ,name (,tmp1)
487 (let ((,tmp2 (load-time-value (cons nil nil))))
488 (when (eq ,tmp1 :default)
489 (setq ,tmp1 *default-external-format*))
490 (if (eq ,tmp1 (car ,tmp2))
491 (cdr ,tmp2)
492 (setf (car ,tmp2) ,tmp1
493 (cdr ,tmp2) (,%name ,tmp1))))))))
494
495
496
497 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
498 ;;;
499 ;;; Read and write one character through an external-format
500 ;;;
501 (defmacro octets-to-char (external-format state count input unput)
502 (let ((s (gensym "STATE-"))
503 (code (gensym "CODE-")))
504 `(let ((,s ,state))
505 (when (null ,s)
506 ;; Need our own state variable to hold our state and the
507 ;; state for the external format.
508 (setq ,s (setf ,state (cons nil nil))))
509 (if (car ,s)
510 ;; Return the trailing surrgate. Must set count to 0 to
511 ;; tell the stream code we didn't consume any octets!
512 (prog1 (the character (car ,s))
513 (setf (car ,s) nil)
514 (setf ,count 0))
515 (let ((,code (octets-to-codepoint ,external-format
516 (cdr ,s) ,count ,input ,unput)))
517 (declare (type (unsigned-byte 31) ,code))
518 (cond ((or (lisp::surrogatep ,code)
519 (> ,code #x10FFFF))
520 #-(and unicode (not unicode-bootstrap)) #\?
521 #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
522 #+unicode
523 ((> ,code #xFFFF)
524 (multiple-value-bind (hi lo)
525 (lisp::surrogates ,code)
526 (setf (car ,state) lo)
527 hi))
528 (t (code-char ,code))))))))
529
530 ;; This doesn't handle surrogate code units correctly. It just
531 ;; outputs the surrogate value to the external format. External
532 ;; formats almost never allow surrogate code points (except UTF-16).
533 (defmacro char-to-octets (external-format char state output)
534 `(codepoint-to-octets ,external-format (char-code ,char) ,state ,output))
535
536
537 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
538 `(lambda (string start end buffer &aux (ptr 0) (state nil) (code 0) (c 0) widep)
539 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
540 (type simple-string string)
541 (type kernel:index start end ptr)
542 (type (simple-array (unsigned-byte 8) (*)) buffer)
543 (type lisp:codepoint code c)
544 (type (or null fixnum) widep)
545 (ignorable state))
546 (loop with i of-type kernel:index = start
547 while (< i end)
548 do
549 (multiple-value-bind (c widep)
550 (lisp::codepoint string i end)
551 (incf i (if widep 2 1))
552 (codepoint-to-octets ,extfmt c state
553 (lambda (b)
554 (when (= ptr (length buffer))
555 (setq buffer (adjust-array buffer (* 2 ptr))))
556 (setf (aref buffer (1- (incf ptr))) b)))))))
557
558 (defun string-to-octets (string &key (start 0) end (external-format :default)
559 (buffer nil bufferp))
560 (declare (type string string)
561 (type kernel:index start)
562 (type (or kernel:index null) end)
563 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
564 (let* ((buffer (or buffer (make-array (length string)
565 :element-type '(unsigned-byte 8)))))
566 (multiple-value-bind (buffer ptr)
567 (lisp::with-array-data ((string string) (start start) (end end))
568 (funcall (ef-string-to-octets external-format)
569 string start end buffer))
570 (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
571
572 (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
573 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil) (code 0))
574 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
575 (type (simple-array (unsigned-byte 8) (*)) octets)
576 (type kernel:index end count)
577 (type (integer -1 (#.array-dimension-limit)) ptr pos)
578 (type simple-string string)
579 (type lisp:codepoint code)
580 (ignorable state))
581 (loop until (>= ptr end)
582 do (when (= pos (length string))
583 (setq string (adjust-array string (* 2 pos))))
584 (setf code
585 (octets-to-codepoint ,extfmt state count
586 (aref octets (incf ptr)) ;;@@ EOF??
587 (lambda (n) (decf ptr n))))
588 ;; Convert codepoint to UTF-16 surrogate pairs if needed
589 (multiple-value-bind (high low)
590 (surrogates code)
591 (setf (aref string (incf pos)) high)
592 (when low
593 (setf (aref string (incf pos)) low)))
594 finally (return (values string (1+ pos))))))
595
596 (defun octets-to-string (octets &key (start 0) end (external-format :default)
597 (string nil stringp))
598 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
599 (type kernel:index start)
600 (type (or kernel:index null) end)
601 (type (or simple-string null) string))
602 (multiple-value-bind (string pos)
603 (funcall (ef-octets-to-string external-format)
604 octets (1- start) (1- (or end (length octets)))
605 (or string (make-string (length octets))))
606 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
607
608
609
610 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
611 `(lambda (string start end result &aux (ptr 0) (state nil))
612 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
613 (type simple-string string)
614 (type kernel:index start end ptr)
615 (type simple-base-string result)
616 (ignorable state))
617 (dotimes (i (- end start) (values result ptr))
618 (declare (type kernel:index i))
619 (char-to-octets ,extfmt (schar string (+ start i)) state
620 (lambda (b)
621 (when (= ptr (length result))
622 (setq result (adjust-array result (* 2 ptr))))
623 (setf (aref result (1- (incf ptr)))
624 (code-char b)))))))
625
626 (defun string-encode (string external-format &optional (start 0) end)
627 "Encode the given String using External-Format and return a new
628 string. The characters of the new string are the octets of the
629 encoded result, with each octet converted to a character via
630 code-char. This is the inverse to String-Decode"
631 (when (zerop (length string))
632 (return-from string-encode string))
633 (multiple-value-bind (result ptr)
634 (lisp::with-array-data ((string string) (start start) (end end))
635 (funcall (ef-encode external-format) string start end
636 (make-string (length string) :element-type 'base-char)))
637 (lisp::shrink-vector result ptr)))
638
639 (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
640 `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
641 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
642 (type simple-string string)
643 (type kernel:index end count)
644 (type (integer -1 (#.array-dimension-limit)) ptr pos)
645 (type simple-string result)
646 (ignorable state))
647 (loop until (>= ptr end)
648 ;; increasing size of result shouldn't ever be necessary, unless
649 ;; someone implements an encoding smaller than the source string...
650 do (setf (schar result (incf pos))
651 (octets-to-char ,extfmt state count
652 ;; note the need to return NIL for EOF
653 (if (= (1+ ptr) (length string))
654 nil
655 (char-code (char string (incf ptr))))
656 (lambda (n) (decf ptr n))))
657 finally (return (values result (1+ pos))))))
658
659 (defun string-decode (string external-format &optional (start 0) end)
660 "Decode String using the given External-Format and return the new
661 string. The input string is treated as if it were an array of
662 octets, where the char-code of each character is the octet. This is
663 the inverse of String-Encode."
664 (when (zerop (length string))
665 (return-from string-decode string))
666 (multiple-value-bind (result pos)
667 (lisp::with-array-data ((string string) (start start) (end end))
668 (funcall (ef-decode external-format)
669 string (1- start) (1- end) (make-string (length string))))
670 (lisp::shrink-vector result pos)))

  ViewVC Help
Powered by ViewVC 1.1.5