/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2.4.3.2.14 - (hide annotations)
Thu Apr 23 15:10:08 2009 UTC (4 years, 11 months ago) by rtoy
Branch: unicode-utf16-extfmt-branch
Changes since 1.2.4.3.2.13: +20 -28 lines
string.lisp:
o Add Paul's SURROGATES-TO-CODEPOINT and remove
  CODEPOINT-FROM-SURROGATES.
o Change SURROGATES to return characters, not numbers.
o Update callers of SURROGATES to match.

extfmts.lisp:
o Update callers of SURROGATES to match.
o Use CODEPOINT to extract the correct codepoint from a string in
  EF-STRING-TO-OCTETS and EF-OCTETS-TO-STRING.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.2.4.3.2.14 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.2.4.3.2.14 2009/04/23 15:10:08 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16     (export '(string-to-octets octets-to-string *default-external-format*
17 rtoy 1.2.4.1 string-encode string-decode))
18 rtoy 1.1
19     (defvar *default-external-format* :iso8859-1)
20    
21 rtoy 1.2.4.1 (defvar *external-formats* (make-hash-table :test 'equal))
22 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
23    
24 rtoy 1.2.4.3.2.1 (defconstant +ef-cin+ 2)
25     (defconstant +ef-cout+ 3)
26     (defconstant +ef-sin+ 4)
27     (defconstant +ef-sout+ 5)
28     (defconstant +ef-os+ 6)
29     (defconstant +ef-so+ 7)
30     (defconstant +ef-en+ 8)
31     (defconstant +ef-de+ 9)
32     (defconstant +ef-max+ 10)
33 rtoy 1.2.4.1
34 rtoy 1.1 (define-condition external-format-not-implemented (error)
35     ()
36     (:report
37     (lambda (condition stream)
38     (declare (ignore condition))
39     (format stream "Attempting unimplemented external-format I/O."))))
40    
41     (defun %efni (a b c d)
42     (declare (ignore a b c d))
43     (error 'external-format-not-implemented))
44    
45 rtoy 1.2.4.3 (defstruct efx
46     (octets-to-code #'%efni :type function :read-only t)
47     (code-to-octets #'%efni :type function :read-only t)
48 rtoy 1.2.4.3.2.1 (cache nil :type (or null simple-vector))
49     (min 1 :type kernel:index :read-only t)
50     (max 1 :type kernel:index :read-only t))
51 rtoy 1.2.4.3
52 rtoy 1.1 (defstruct (external-format
53     (:conc-name ef-)
54     (:print-function %print-external-format)
55 rtoy 1.2.4.3 (:constructor make-external-format (name efx composingp
56     &optional slots slotd)))
57 rtoy 1.2.4.1 (name (ext:required-argument) :type (or keyword cons) :read-only t)
58 rtoy 1.2.4.3 (efx (ext:required-argument) :type efx :read-only t)
59 rtoy 1.2.4.1 (composingp (ext:required-argument) :type boolean :read-only t)
60 rtoy 1.1 (slots #() :type simple-vector :read-only t)
61 rtoy 1.2.4.3 (slotd nil :type list :read-only t))
62 rtoy 1.1
63     (defun %print-external-format (ef stream depth)
64     (declare (ignore depth))
65     (print-unreadable-object (ef stream :type t :identity t)
66     (princ (ef-name ef) stream)))
67    
68 rtoy 1.2.4.3.2.1 (defun %intern-ef (ef)
69 rtoy 1.2.4.1 (setf (gethash (ef-name ef) *external-formats*) ef))
70    
71 rtoy 1.2.4.3.2.1 (declaim (inline ef-octets-to-code ef-code-to-octets ef-cache
72     ef-min-octets ef-max-octets))
73 rtoy 1.2.4.3
74     (defun ef-octets-to-code (ef)
75     (efx-octets-to-code (ef-efx ef)))
76    
77     (defun ef-code-to-octets (ef)
78     (efx-code-to-octets (ef-efx ef)))
79    
80     (defun ef-cache (ef)
81     (efx-cache (ef-efx ef)))
82    
83 rtoy 1.2.4.3.2.1 (defun ef-min-octets (ef)
84     (efx-min (ef-efx ef)))
85    
86     (defun ef-max-octets (ef)
87     (efx-max (ef-efx ef)))
88    
89     (eval-when (:compile-toplevel :load-toplevel :execute)
90     (defun %merge-slots (old new)
91     (let* ((pos (length old))
92     (tmp (mapcar (lambda (x)
93     (let* ((name (if (consp x) (first x) x))
94     (init (if (consp x) (second x) nil))
95     (list (if (consp x) (nthcdr 2 x) nil))
96     (prev (assoc name old))
97     (posn (if prev (second prev) (1- (incf pos)))))
98     (list name posn init (getf list :type t))))
99     new)))
100     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
101     :key #'second))))
102    
103     ;;; DEFINE-EXTERNAL-FORMAT -- Public
104     ;;;
105     ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
106     ;;; Define a new external format. Min/Max/Size are the minimum and
107     ;;; maximum number of octets that make up a character (:size N is just
108     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
109     ;;; similar to defstruct.
110     ;;;
111     ;;; name (base) (&rest slots)
112     ;;; Define an external format based on a previously-defined external
113     ;;; format, Base. The slot names used in Slots must match those in Base.
114     ;;;
115 rtoy 1.2.4.3.2.10 ;;; octets-to-code (state input unput &rest vars)
116     ;;; Defines a form to be used by the external format to convert
117     ;;; octets to a code point. State is a form that can be used by the
118     ;;; body to access the state variable of the stream. Input is a
119     ;;; form that can be used to read one more octets from the input
120     ;;; strema. Similarly, Unput is a form to put back one octet to the
121     ;;; input stream. Vars is a list of vars that need to be defined
122     ;;; for any symbols used within the form.
123     ;;;
124     ;;; This should return two values: the code and the number of octets
125     ;;; read to form the code.
126     ;;;
127     ;;; code-to-octets (code state output &rest vars)
128     ;;; Defines a form to be used by the external format to convert a
129     ;;; code point to octets for output. Code is the code point to be
130     ;;; converted. State is a form to access the current value of the
131     ;;; stream's state variable. Output is a form that writes one octet
132     ;;; to the output stream.
133     ;;;
134 rtoy 1.2.4.3.2.12 ;;; Note: external-formats work on code-points, not
135 rtoy 1.2.4.3.2.10 ;;; characters, so that the entire 31 bit ISO-10646 range can be
136     ;;; used internally regardless of the size of a character recognized
137     ;;; by Lisp and external formats can be useful to people who want to
138     ;;; process characters outside the Lisp range (see
139     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
140     ;;;
141 rtoy 1.2.4.3.2.1 (defmacro define-external-format (name (&rest args) (&rest slots)
142     &optional octets-to-code code-to-octets)
143     (when (and (oddp (length args)) (not (= (length args) 1)))
144     (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
145 rtoy 1.2.4.3.2.8 (let* ((tmp (gensym))
146 rtoy 1.2.4.3.2.1 (min (if (evenp (length args))
147     (or (getf args :min) (getf args :size) 1)
148     1))
149     (max (if (evenp (length args))
150     (or (getf args :max) (getf args :size) 6)
151     6))
152     (base (if (= (length args) 1)
153     (find-external-format (first args))
154     nil))
155     (bslotd (if base (ef-slotd base) nil))
156     (slotd (%merge-slots bslotd slots))
157     (slotb (loop for slot in slotd
158     collect `(,(first slot)
159 rtoy 1.2.4.3.2.8 `(the ,',(fourth slot)
160     ;; IDENTITY is here to protect against SETF
161     (identity (svref %slots% ,',(second slot))))))))
162 rtoy 1.2.4.1 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
163 rtoy 1.2.4.3.2.8 `(lambda (,state ,input ,unput)
164     (declare (ignorable ,state ,input ,unput)
165 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
166 rtoy 1.2.4.3.2.1 (let (,@',slotb
167     (,input `(the (or (unsigned-byte 8) null) ,,input))
168 rtoy 1.2.4.1 ,@(loop for var in vars collect `(,var (gensym))))
169     ,body)))
170     (code-to-octets ((code state output &rest vars) body)
171 rtoy 1.2.4.3.2.8 `(lambda (,',tmp ,state ,output)
172     (declare (ignorable ,state ,output)
173 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
174 rtoy 1.2.4.3.2.1 (let (,@',slotb
175     (,code ',code)
176 rtoy 1.2.4.1 ,@(loop for var in vars collect `(,var (gensym))))
177 rtoy 1.2.4.3.2.8 `(let ((,',code (the (unsigned-byte 31) ,,',tmp)))
178 rtoy 1.2.4.1 (declare (ignorable ,',code))
179     ,,body)))))
180 rtoy 1.2.4.3.2.1 (%intern-ef (make-external-format ,name
181     ,(if base
182     `(ef-efx (find-external-format ,(ef-name base)))
183     `(make-efx :octets-to-code ,octets-to-code
184     :code-to-octets ,code-to-octets
185     :cache (make-array +ef-max+
186     :initial-element nil)
187     :min ,(min min max) :max ,(max min max)))
188     nil
189     (vector ,@(mapcar #'third slotd))
190     ',slotd)))))
191 rtoy 1.2.4.1
192 rtoy 1.2.4.3.2.1 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
193     ;;;
194     ;;; A composing-external-format differs from an (ordinary) external-format
195     ;;; in that it translates characters (really codepoints, of course) into
196     ;;; other characters, rather than translating between characters and binary
197     ;;; octets. They have to be composed with a non-composing external-format
198     ;;; to be of any use.
199     ;;;
200     (defmacro define-composing-external-format (name (&key min max size)
201     input output)
202 rtoy 1.2.4.3.2.8 (let ((tmp (gensym))
203 rtoy 1.2.4.3.2.1 (min (or min size 1))
204     (max (or max size 1)))
205 rtoy 1.2.4.1 `(macrolet ((input ((state input unput &rest vars) body)
206 rtoy 1.2.4.3.2.8 `(lambda (,state ,input ,unput)
207     (declare (ignorable ,state ,input ,unput)
208 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
209     (let ((,input `(the (values (or (unsigned-byte 31) null)
210 rtoy 1.2.4.3.2.1 kernel:index)
211 rtoy 1.2.4.1 ,,input))
212     ,@(loop for var in vars collect `(,var (gensym))))
213     ,body)))
214     (output ((code state output &rest vars) body)
215 rtoy 1.2.4.3.2.8 `(lambda (,',tmp ,state ,output)
216     (declare (ignorable ,state ,output)
217 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
218     (let ((,code ',code)
219     ,@(loop for var in vars collect `(,var (gensym))))
220 rtoy 1.2.4.3.2.8 `(let ((,',code (the (unsigned-byte 31) ,,',tmp)))
221 rtoy 1.2.4.1 (declare (ignorable ,',code))
222     ,,body)))))
223 rtoy 1.2.4.3.2.1 (%intern-ef (make-external-format ,name
224     (make-efx :octets-to-code ,input
225     :code-to-octets ,output
226     :min ,(min min max) :max ,(max min max))
227     t
228     #() '())))))
229 rtoy 1.1
230     (defun load-external-format-aliases ()
231 rtoy 1.2.4.3.2.1 (let ((*package* (find-package "KEYWORD"))
232     (unix::*filename-encoding* :iso8859-1))
233 rtoy 1.2.4.2 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil)
234 rtoy 1.1 (when stm
235 rtoy 1.2.4.3.2.1 (do ((alias (read stm nil stm) (read stm nil stm))
236     (value (read stm nil stm) (read stm nil stm)))
237     ((or (eq alias stm) (eq value stm))
238     (unless (eq alias stm)
239     (warn "External-format aliases file ends early.")))
240     (if (and (keywordp alias) (keywordp value))
241     (setf (gethash alias *external-format-aliases*) value)
242     (warn "Bad entry in external-format aliases file: ~S => ~S."
243     alias value)))))))
244 rtoy 1.1
245 rtoy 1.2.4.1 (defun %find-external-format (name)
246 rtoy 1.2.4.3.2.5 ;; avoid loading files, etc., early in the boot sequence
247     (when (or (eq name :iso8859-1)
248     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
249     (return-from %find-external-format
250     (gethash :iso8859-1 *external-formats*)))
251    
252     (when (zerop (hash-table-count *external-format-aliases*))
253     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
254     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
255     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
256     (load-external-format-aliases))
257 rtoy 1.1
258     (do ((tmp (gethash name *external-format-aliases*)
259     (gethash tmp *external-format-aliases*))
260     (cnt 0 (1+ cnt)))
261     ((or (null tmp) (= cnt 50))
262     (unless (null tmp)
263     (error "External-format aliasing depth exceeded.")))
264     (setq name tmp))
265    
266     (or (gethash name *external-formats*)
267 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
268 rtoy 1.2.4.3.2.1 (lisp::*enable-package-locked-errors* nil)
269     (*default-external-format* :iso8859-1)
270 rtoy 1.2.4.3.2.9 (unix::*filename-encoding* :iso8859-1)
271     (s (open (format nil "ext-formats:~(~A~).lisp" name) :if-does-not-exist nil)))
272     (when s
273     (null (nth-value 1 (ext:compile-from-stream s)))))
274 rtoy 1.2.4.1 (gethash name *external-formats*))))
275    
276     (defun %composed-ef-name (a b)
277     (if (consp a) (append a (list b)) (list a b)))
278    
279 rtoy 1.2.4.3.2.1 (defun %compose-external-formats (a b)
280 rtoy 1.2.4.1 (when (ef-composingp a)
281     (error "~S is a Composing-External-Format." (ef-name a)))
282     (unless (ef-composingp b)
283     (error "~S is not a Composing-External-Format." (ef-name b)))
284 rtoy 1.2.4.3 (make-external-format
285     (%composed-ef-name (ef-name a) (ef-name b))
286     (make-efx
287 rtoy 1.2.4.3.2.8 :octets-to-code (lambda (state input unput)
288     (funcall (ef-octets-to-code b) state
289     (funcall (ef-octets-to-code a)
290     state input unput)
291 rtoy 1.2.4.3 unput))
292 rtoy 1.2.4.3.2.8 :code-to-octets (lambda (code state output)
293     (funcall (ef-code-to-octets b) code state
294 rtoy 1.2.4.3 `(lambda (x)
295     ,(funcall (ef-code-to-octets a)
296     'x state output))))
297 rtoy 1.2.4.3.2.1 :cache (make-array +ef-max+ :initial-element nil)
298     :min (* (ef-min-octets a) (ef-min-octets b))
299     :max (* (ef-max-octets a) (ef-max-octets b)))
300 rtoy 1.2.4.3 nil #() '()))
301 rtoy 1.2.4.1
302     (defun find-external-format (name &optional (error-p t))
303     (when (external-format-p name)
304     (return-from find-external-format name))
305    
306     (or (if (consp name) (every #'keywordp name) (keywordp name))
307     (error "~S is not a valid external format name." name))
308 rtoy 1.1
309 rtoy 1.2.4.1 (when (eq name :default)
310     (setq name *default-external-format*))
311    
312     (when (and (consp name) (not (cdr name)))
313     (setq name (car name)))
314    
315 rtoy 1.2.4.3.2.5 (flet ((not-found ()
316     (when (equal *default-external-format* name)
317     (setq *default-external-format* :iso8859-1))
318     (if error-p (error "External format ~S not found." name) nil)))
319     (if (consp name)
320     (let ((efs (mapcar #'%find-external-format name)))
321     (if (member nil efs)
322     (not-found)
323     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
324     (or (gethash name *external-formats*)
325     (%intern-ef (reduce #'%compose-external-formats efs))))))
326     (or (%find-external-format name) (not-found)))))
327 rtoy 1.1
328 rtoy 1.2.4.3.2.1 (defun flush-external-formats ()
329     (maphash (lambda (name ef)
330     (declare (ignore name))
331     (fill (ef-cache ef) nil))
332     *external-formats*))
333    
334 rtoy 1.2.4.3.2.5 (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
335    
336     (defun invert-table (table)
337     (declare (type (or (simple-array (unsigned-byte 31) *)
338     (simple-array (unsigned-byte 16) *))
339     table)
340     (optimize (speed 3) (space 0) (safety 0) (debug 0)
341     (ext:inhibit-warnings 3)))
342     (or (gethash table *.table-inverse.*)
343     (let* ((result (make-hash-table))
344     (width (array-dimension table 0))
345     (power (1- (array-rank table)))
346     (base (if (= width 94) 1 0)))
347     (assert (and (< power 3) (<= width 256)))
348     (dotimes (i (array-total-size table))
349     (declare (type (integer 0 (#.array-dimension-limit)) i))
350     (let ((tmp i) (val (row-major-aref table i)) (z 0))
351     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
352     (type (unsigned-byte 32) z))
353     (unless (or (= val #xFFFE) (gethash val result))
354     (dotimes (j power)
355     ;; j is only ever 0 in reality, since no n^3 tables are
356     ;; defined; z was declared as 32-bit above, so that limits
357     ;; us to 0 <= j <= 2 (see the ASSERT)
358     (declare (type (integer 0 2) j))
359     (multiple-value-bind (x y) (floor tmp width)
360     (setq tmp x)
361     (setq z (logior z (ash (the (integer 0 255) (+ y base))
362     (the (integer 0 24)
363     (* 8 (- power j))))))))
364     (setf (gethash val result) (logior z (+ tmp base))))))
365     (setf (gethash table *.table-inverse.*) result))))
366    
367    
368 rtoy 1.1 (define-condition void-external-format (error)
369     ()
370     (:report
371     (lambda (condition stream)
372     (declare (ignore condition))
373     (format stream "Attempting I/O through void external-format."))))
374    
375 rtoy 1.2.4.3.2.1 (define-external-format :void (:size 0) ()
376 rtoy 1.1 (octets-to-code (state input unput)
377 rtoy 1.2.4.1 `(error 'void-external-format))
378 rtoy 1.1 (code-to-octets (code state output)
379 rtoy 1.2.4.1 `(error 'void-external-format)))
380 rtoy 1.1
381 rtoy 1.2.4.3.2.1 (define-external-format :iso8859-1 (:size 1) ()
382 rtoy 1.1 (octets-to-code (state input unput)
383 rtoy 1.2.4.1 `(values ,input 1))
384 rtoy 1.1 (code-to-octets (code state output)
385 rtoy 1.2.4.1 `(,output (if (> ,code 255) #x3F ,code))))
386 rtoy 1.1
387 rtoy 1.2.4.3.2.1 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
388     ;;;
389     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
390     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
391     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
392     ;;; to read or write texts containing characters not supported by your Lisp,
393     ;;; these macros can be used instead.
394 rtoy 1.1 (defmacro octets-to-codepoint (external-format state count input unput)
395 rtoy 1.2.4.3.2.8 (let ((tmp1 (gensym)) (tmp2 (gensym))
396     (ef (find-external-format external-format)))
397     `(multiple-value-bind (,tmp1 ,tmp2)
398     ,(funcall (ef-octets-to-code ef) state input unput)
399     (setf ,count (the kernel:index ,tmp2))
400     (the (or (unsigned-byte 31) null) ,tmp1))))
401 rtoy 1.1
402     (defmacro codepoint-to-octets (external-format code state output)
403 rtoy 1.2.4.3.2.8 (let ((ef (find-external-format external-format)))
404     (funcall (ef-code-to-octets ef) code state output)))
405 rtoy 1.2.4.1
406    
407    
408     (defvar *ef-base* +ef-max+)
409     (defvar *ef-extensions* '())
410    
411     (defun ensure-cache (ef id reqd)
412     (let ((base (or (getf *ef-extensions* id)
413     (setf (getf *ef-extensions* id)
414     (prog1 *ef-base* (incf *ef-base* reqd))))))
415     (when (< (length (ef-cache ef)) (+ base reqd))
416 rtoy 1.2.4.3 (setf (efx-cache (ef-efx ef))
417 rtoy 1.2.4.1 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
418     base))
419    
420 rtoy 1.2.4.3.2.1 ;;; DEF-EF-MACRO -- Public
421     ;;;
422     ;;;
423 rtoy 1.2.4.1 (defmacro def-ef-macro (name (ef id reqd idx) body)
424 rtoy 1.2.4.3.2.8 (let ((tmp1 (gensym))
425     (tmp2 (gensym))
426     (%name (intern (format nil "%~A" name) (symbol-package name))))
427     `(progn
428     (defun ,%name (,ef)
429     (let* ((,tmp1 (find-external-format ,ef))
430     (,tmp2 ,(if (eq id 'lisp::lisp)
431     idx
432     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
433     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
434     (setf (aref (ef-cache ,tmp1) ,tmp2)
435     (let ((*compile-print* nil)
436     ;; Set default format when we compile so we
437     ;; can see compiler messages. if we don't,
438     ;; we run into a problem that we might be
439     ;; changing the default format while we're
440     ;; compiling, and we don't know how to output
441     ;; the compiler messages.
442     (*default-external-format* :iso8859-1))
443     (compile nil `(lambda (%slots%)
444     (declare (ignorable %slots%))
445     ,,body)))))
446     (ef-slots ,tmp1))))
447     (declaim (inline ,name))
448     (defun ,name (,tmp1)
449     (let ((,tmp2 (load-time-value (cons nil nil))))
450     (when (eq ,tmp1 :default)
451     (setq ,tmp1 *default-external-format*))
452     (if (eq ,tmp1 (car ,tmp2))
453     (cdr ,tmp2)
454     (setf (car ,tmp2) ,tmp1
455     (cdr ,tmp2) (,%name ,tmp1))))))))
456 rtoy 1.1
457    
458    
459 rtoy 1.2.4.3.2.1 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
460     ;;;
461     ;;; Read and write one character through an external-format
462     ;;;
463 rtoy 1.1 (defmacro octets-to-char (external-format state count input unput)
464 rtoy 1.2.4.3.2.13 `(progn
465     ;; We need our own state variable. Update the state variable to
466     ;; be a cons whose car is for this function and whose cdr is the
467     ;; state for the external format.
468     (unless (consp ,state)
469     ;; What should we do if state is not a cons and not nil? Just
470     ;; stick the current state value in the cdr.
471     (setf ,state (cons nil ,state)))
472     (cond ((car ,state)
473     ;; Return the trailing surrogate. Be sure to set count to
474     ;; 0 to tell stream code we didn't read anything this
475     ;; time!
476 rtoy 1.2.4.3.2.14 (prog1 (car ,state)
477 rtoy 1.2.4.3.2.13 (setf (car ,state) nil)
478     (setf ,count 0)))
479     (t
480     (let ((code (octets-to-codepoint ,external-format
481     (cdr ,state) ,count ,input ,unput)))
482     (declare (type (unsigned-byte 21) code))
483     (cond ((< code #x10000)
484     (setf (car ,state) nil)
485     (code-char code))
486     #-(and unicode (not unicode-bootstrap))
487     (t #\?)
488     #+(and unicode (not unicode-bootstrap))
489     (t
490     ;; Character outside the BMP. Convert to a
491     ;; surrogate pair and save the low surrogate for
492     ;; later and return the high surrogate now.
493     (multiple-value-bind (hi lo)
494     (lisp::surrogates code)
495     (setf (car ,state) lo)
496 rtoy 1.2.4.3.2.14 hi))))))))
497 rtoy 1.1
498     (defmacro char-to-octets (external-format char state output)
499     `(codepoint-to-octets ,external-format (char-code ,char) ,state ,output))
500    
501 rtoy 1.2.4.3.2.1
502 rtoy 1.2.4.1 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
503 rtoy 1.2.4.3.2.11 `(lambda (string start end buffer &aux (ptr 0) (state nil) (code 0))
504 rtoy 1.2.4.1 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
505     (type simple-string string)
506 rtoy 1.2.4.3.2.1 (type kernel:index start end ptr)
507 rtoy 1.2.4.1 (type (simple-array (unsigned-byte 8) (*)) buffer)
508 rtoy 1.2.4.3.2.11 (type (integer 0 #x10ffff) code)
509 rtoy 1.2.4.1 (ignorable state))
510 rtoy 1.2.4.3.2.14 (loop with i of-type kernel_index = start
511     while (< i end)
512     do
513     (multiple-value-bind (c step)
514     (lisp::codepoint string i end)
515     (setf code c)
516     (incf i (if step 2 1))
517     (codepoint-to-octets ,extfmt code state
518     (lambda (b)
519     (when (= ptr (length buffer))
520     (setq buffer (adjust-array buffer (* 2 ptr))))
521     (setf (aref buffer (1- (incf ptr))) b)))))))
522 rtoy 1.2.4.1
523 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
524     (buffer nil bufferp))
525 rtoy 1.1 (declare (type string string)
526 rtoy 1.2.4.3.2.1 (type kernel:index start)
527     (type (or kernel:index null) end)
528 rtoy 1.2.4.1 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
529 rtoy 1.2.4.3.2.1 (let* ((buffer (or buffer (make-array (length string)
530 rtoy 1.2.4.1 :element-type '(unsigned-byte 8)))))
531 rtoy 1.2.4.3.2.1 (multiple-value-bind (buffer ptr)
532     (lisp::with-array-data ((string string) (start start) (end end))
533 rtoy 1.2.4.3.2.8 (funcall (ef-string-to-octets external-format)
534 rtoy 1.2.4.3.2.1 string start end buffer))
535     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
536 rtoy 1.2.4.1
537     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
538 rtoy 1.2.4.3.2.11 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil) (code 0))
539 rtoy 1.2.4.1 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
540     (type (simple-array (unsigned-byte 8) (*)) octets)
541 rtoy 1.2.4.3.2.1 (type kernel:index end count)
542 rtoy 1.2.4.1 (type (integer -1 (#.array-dimension-limit)) ptr pos)
543     (type simple-string string)
544 rtoy 1.2.4.3.2.11 (type (integer 0 #x10ffff) code)
545 rtoy 1.2.4.1 (ignorable state))
546     (loop until (>= ptr end)
547     do (when (= pos (length string))
548     (setq string (adjust-array string (* 2 pos))))
549 rtoy 1.2.4.3.2.11 (setf code
550     (octets-to-codepoint ,extfmt state count
551 rtoy 1.2.4.3.2.8 (aref octets (incf ptr)) ;;@@ EOF??
552     (lambda (n) (decf ptr n))))
553 rtoy 1.2.4.3.2.11 ;; Convert codepoint to UTF-16 surrogate pairs if needed
554 rtoy 1.2.4.3.2.14 (multiple-value-bind (high low)
555     (surrogates code)
556     (setf (aref string (incf pos)) high)
557     (when low
558     (setf (aref string (incf pos)) low)))
559 rtoy 1.2.4.3 finally (return (values string (1+ pos))))))
560 rtoy 1.1
561 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
562     (string nil stringp))
563 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
564 rtoy 1.2.4.3.2.1 (type kernel:index start)
565     (type (or kernel:index null) end)
566 rtoy 1.2.4.1 (type (or simple-string null) string))
567     (multiple-value-bind (string pos)
568 rtoy 1.2.4.3.2.8 (funcall (ef-octets-to-string external-format)
569 rtoy 1.2.4.1 octets (1- start) (1- (or end (length octets)))
570     (or string (make-string (length octets))))
571 rtoy 1.2 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
572 rtoy 1.1
573    
574    
575 rtoy 1.2.4.1 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
576     `(lambda (string start end result &aux (ptr 0) (state nil))
577     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
578     (type simple-string string)
579 rtoy 1.2.4.3.2.1 (type kernel:index start end ptr)
580 rtoy 1.2.4.1 (type simple-base-string result)
581     (ignorable state))
582     (dotimes (i (- end start) (values result ptr))
583 rtoy 1.2.4.3.2.1 (declare (type kernel:index i))
584 rtoy 1.2.4.3.2.8 (char-to-octets ,extfmt (schar string (+ start i)) state
585     (lambda (b)
586     (when (= ptr (length result))
587     (setq result (adjust-array result (* 2 ptr))))
588     (setf (aref result (1- (incf ptr)))
589     (code-char b)))))))
590 rtoy 1.2.4.1
591     (defun string-encode (string external-format &optional (start 0) end)
592 rtoy 1.2.4.3.2.8 (when (zerop (length string))
593     (return-from string-encode string))
594 rtoy 1.2.4.1 (multiple-value-bind (result ptr)
595     (lisp::with-array-data ((string string) (start start) (end end))
596 rtoy 1.2.4.3.2.8 (funcall (ef-encode external-format) string start end
597 rtoy 1.2.4.1 (make-string (length string) :element-type 'base-char)))
598     (lisp::shrink-vector result ptr)))
599    
600     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
601     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
602     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
603     (type simple-string string)
604 rtoy 1.2.4.3.2.1 (type kernel:index end count)
605 rtoy 1.2.4.1 (type (integer -1 (#.array-dimension-limit)) ptr pos)
606     (type simple-string result)
607     (ignorable state))
608     (loop until (>= ptr end)
609     ;; increasing size of result shouldn't ever be necessary, unless
610     ;; someone implements an encoding smaller than the source string...
611     do (setf (schar result (incf pos))
612 rtoy 1.2.4.3.2.8 (octets-to-char ,extfmt state count
613     ;; note the need to return NIL for EOF
614     (if (= (1+ ptr) (length string))
615     nil
616     (char-code (char string (incf ptr))))
617     (lambda (n) (decf ptr n))))
618 rtoy 1.2.4.3 finally (return (values result (1+ pos))))))
619 rtoy 1.2.4.1
620     (defun string-decode (string external-format &optional (start 0) end)
621 rtoy 1.2.4.3.2.8 (when (zerop (length string))
622     (return-from string-decode string))
623 rtoy 1.2.4.1 (multiple-value-bind (result pos)
624     (lisp::with-array-data ((string string) (start start) (end end))
625 rtoy 1.2.4.3.2.8 (funcall (ef-decode external-format)
626 rtoy 1.2.4.1 string (1- start) (1- end) (make-string (length string))))
627 rtoy 1.2.4.3 (lisp::shrink-vector result pos)))

  ViewVC Help
Powered by ViewVC 1.1.5