/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations)
Thu Jul 23 21:36:51 2009 UTC (4 years, 8 months ago) by rtoy
Branch: MAIN
Changes since 1.9: +8 -2 lines
code/extfmts.lisp:
o Move the +ss-ef-foo+ constants to here from strategy.lisp, and
  update them so they don't overlap with existing +ef-foo+ constants.
o Update +ef-max+ accordingly.

pcl/simple-streams/impl.lisp:
o Use +ss-ef-str+ instead of +ef-str+ in simple-stream-strlen.

pcl/simple-streams/strategy.lisp:
o Comment out +ss-ef-foo+ constants.
o Use +ef-max+ instead of +ss-ef-max+, which is no longer defined.
o Fix bugs in %dc-write-chars-fn:
  - Use ef variable
  - Need to call flush-out-buffer, not flush-buffer for dual-channel
    streams.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.10 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.10 2009/07/23 21:36:51 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16     (export '(string-to-octets octets-to-string *default-external-format*
17 rtoy 1.3 string-encode string-decode))
18 rtoy 1.1
19     (defvar *default-external-format* :iso8859-1)
20    
21 rtoy 1.3 (defvar *external-formats* (make-hash-table :test 'equal))
22 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
23    
24 rtoy 1.8 (defconstant +ef-str+ 1)
25 rtoy 1.6 (defconstant +ef-cin+ 2)
26     (defconstant +ef-cout+ 3)
27     (defconstant +ef-sin+ 4)
28     (defconstant +ef-sout+ 5)
29     (defconstant +ef-os+ 6)
30     (defconstant +ef-so+ 7)
31     (defconstant +ef-en+ 8)
32     (defconstant +ef-de+ 9)
33 rtoy 1.10 ;; For simple-streams support
34     (defconstant +ss-ef-str+ 10)
35     (defconstant +ss-ef-rchar+ 11)
36     (defconstant +ss-ef-rchars+ 12)
37     (defconstant +ss-ef-wchar+ 13)
38     (defconstant +ss-ef-wchars+ 14)
39     (defconstant +ef-max+ 15)
40 rtoy 1.6
41     ;; Unicode replacement character U+FFFD
42     (defconstant +replacement-character-code+ #xFFFD)
43 rtoy 1.3
44 rtoy 1.1 (define-condition external-format-not-implemented (error)
45     ()
46     (:report
47     (lambda (condition stream)
48     (declare (ignore condition))
49     (format stream "Attempting unimplemented external-format I/O."))))
50    
51     (defun %efni (a b c d)
52     (declare (ignore a b c d))
53     (error 'external-format-not-implemented))
54    
55 rtoy 1.5 (defstruct efx
56     (octets-to-code #'%efni :type function :read-only t)
57     (code-to-octets #'%efni :type function :read-only t)
58 rtoy 1.6 (cache nil :type (or null simple-vector))
59     (min 1 :type kernel:index :read-only t)
60     (max 1 :type kernel:index :read-only t))
61 rtoy 1.5
62 rtoy 1.1 (defstruct (external-format
63     (:conc-name ef-)
64     (:print-function %print-external-format)
65 rtoy 1.5 (:constructor make-external-format (name efx composingp
66     &optional slots slotd)))
67 rtoy 1.3 (name (ext:required-argument) :type (or keyword cons) :read-only t)
68 rtoy 1.5 (efx (ext:required-argument) :type efx :read-only t)
69 rtoy 1.3 (composingp (ext:required-argument) :type boolean :read-only t)
70 rtoy 1.1 (slots #() :type simple-vector :read-only t)
71 rtoy 1.5 (slotd nil :type list :read-only t))
72 rtoy 1.1
73     (defun %print-external-format (ef stream depth)
74     (declare (ignore depth))
75     (print-unreadable-object (ef stream :type t :identity t)
76     (princ (ef-name ef) stream)))
77    
78 rtoy 1.6 (defun %intern-ef (ef)
79 rtoy 1.3 (setf (gethash (ef-name ef) *external-formats*) ef))
80    
81 rtoy 1.6 (declaim (inline ef-octets-to-code ef-code-to-octets ef-cache
82     ef-min-octets ef-max-octets))
83 rtoy 1.5
84     (defun ef-octets-to-code (ef)
85     (efx-octets-to-code (ef-efx ef)))
86    
87     (defun ef-code-to-octets (ef)
88     (efx-code-to-octets (ef-efx ef)))
89    
90     (defun ef-cache (ef)
91     (efx-cache (ef-efx ef)))
92    
93 rtoy 1.6 (defun ef-min-octets (ef)
94     (efx-min (ef-efx ef)))
95    
96     (defun ef-max-octets (ef)
97     (efx-max (ef-efx ef)))
98    
99     (eval-when (:compile-toplevel :load-toplevel :execute)
100     (defun %merge-slots (old new)
101     (let* ((pos (length old))
102     (tmp (mapcar (lambda (x)
103     (let* ((name (if (consp x) (first x) x))
104     (init (if (consp x) (second x) nil))
105     (list (if (consp x) (nthcdr 2 x) nil))
106     (prev (assoc name old))
107     (posn (if prev (second prev) (1- (incf pos)))))
108     (list name posn init (getf list :type t))))
109     new)))
110     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
111     :key #'second))))
112    
113     ;;; DEFINE-EXTERNAL-FORMAT -- Public
114     ;;;
115     ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
116     ;;; Define a new external format. Min/Max/Size are the minimum and
117     ;;; maximum number of octets that make up a character (:size N is just
118     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
119     ;;; similar to defstruct.
120     ;;;
121     ;;; name (base) (&rest slots)
122     ;;; Define an external format based on a previously-defined external
123     ;;; format, Base. The slot names used in Slots must match those in Base.
124     ;;;
125     ;;; octets-to-code (state input unput &rest vars)
126     ;;; Defines a form to be used by the external format to convert
127     ;;; octets to a code point. State is a form that can be used by the
128     ;;; body to access the state variable of the stream. Input is a
129     ;;; form that can be used to read one more octets from the input
130     ;;; strema. Similarly, Unput is a form to put back one octet to the
131     ;;; input stream. Vars is a list of vars that need to be defined
132     ;;; for any symbols used within the form.
133     ;;;
134     ;;; This should return two values: the code and the number of octets
135     ;;; read to form the code.
136     ;;;
137     ;;; code-to-octets (code state output &rest vars)
138     ;;; Defines a form to be used by the external format to convert a
139     ;;; code point to octets for output. Code is the code point to be
140     ;;; converted. State is a form to access the current value of the
141     ;;; stream's state variable. Output is a form that writes one octet
142     ;;; to the output stream.
143     ;;;
144     ;;; Note: external-formats work on code-points, not
145     ;;; characters, so that the entire 31 bit ISO-10646 range can be
146     ;;; used internally regardless of the size of a character recognized
147     ;;; by Lisp and external formats can be useful to people who want to
148     ;;; process characters outside the Lisp range (see
149     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
150     ;;;
151     (defmacro define-external-format (name (&rest args) (&rest slots)
152     &optional octets-to-code code-to-octets)
153     (when (and (oddp (length args)) (not (= (length args) 1)))
154     (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
155     (let* ((tmp (gensym))
156     (min (if (evenp (length args))
157     (or (getf args :min) (getf args :size) 1)
158     1))
159     (max (if (evenp (length args))
160     (or (getf args :max) (getf args :size) 6)
161     6))
162     (base (if (= (length args) 1)
163     (find-external-format (first args))
164     nil))
165     (bslotd (if base (ef-slotd base) nil))
166     (slotd (%merge-slots bslotd slots))
167     (slotb (loop for slot in slotd
168     collect `(,(first slot)
169     `(the ,',(fourth slot)
170     ;; IDENTITY is here to protect against SETF
171     (identity (svref %slots% ,',(second slot))))))))
172 rtoy 1.3 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
173 rtoy 1.6 `(lambda (,state ,input ,unput)
174     (declare (ignorable ,state ,input ,unput)
175 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
176 rtoy 1.6 (let (,@',slotb
177     (,input `(the (or (unsigned-byte 8) null) ,,input))
178 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
179     ,body)))
180     (code-to-octets ((code state output &rest vars) body)
181 rtoy 1.6 `(lambda (,',tmp ,state ,output)
182     (declare (ignorable ,state ,output)
183 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
184 rtoy 1.6 (let (,@',slotb
185     (,code ',code)
186 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
187 rtoy 1.6 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
188 rtoy 1.3 (declare (ignorable ,',code))
189     ,,body)))))
190 rtoy 1.6 (%intern-ef (make-external-format ,name
191     ,(if base
192     `(ef-efx (find-external-format ,(ef-name base)))
193     `(make-efx :octets-to-code ,octets-to-code
194     :code-to-octets ,code-to-octets
195     :cache (make-array +ef-max+
196     :initial-element nil)
197     :min ,(min min max) :max ,(max min max)))
198     nil
199     (let* ,(loop for x in slotd
200     collect (list (first x) (third x)))
201     (vector ,@(mapcar #'first slotd)))
202     ',slotd)))))
203 rtoy 1.3
204 rtoy 1.6 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
205     ;;;
206     ;;; A composing-external-format differs from an (ordinary) external-format
207     ;;; in that it translates characters (really codepoints, of course) into
208     ;;; other characters, rather than translating between characters and binary
209     ;;; octets. They have to be composed with a non-composing external-format
210     ;;; to be of any use.
211     ;;;
212     (defmacro define-composing-external-format (name (&key min max size)
213     input output)
214     (let ((tmp (gensym))
215     (min (or min size 1))
216     (max (or max size 1)))
217 rtoy 1.3 `(macrolet ((input ((state input unput &rest vars) body)
218 rtoy 1.6 `(lambda (,state ,input ,unput)
219     (declare (ignorable ,state ,input ,unput)
220 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
221 rtoy 1.6 (let ((,input `(the (values (or (unsigned-byte 21) null)
222     kernel:index)
223 rtoy 1.3 ,,input))
224     ,@(loop for var in vars collect `(,var (gensym))))
225     ,body)))
226     (output ((code state output &rest vars) body)
227 rtoy 1.6 `(lambda (,',tmp ,state ,output)
228     (declare (ignorable ,state ,output)
229 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
230     (let ((,code ',code)
231     ,@(loop for var in vars collect `(,var (gensym))))
232 rtoy 1.6 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
233 rtoy 1.3 (declare (ignorable ,',code))
234     ,,body)))))
235 rtoy 1.6 (%intern-ef (make-external-format ,name
236     (make-efx :octets-to-code ,input
237     :code-to-octets ,output
238     :min ,(min min max) :max ,(max min max))
239     t
240     #() '())))))
241 rtoy 1.1
242     (defun load-external-format-aliases ()
243 rtoy 1.6 (let ((*package* (find-package "KEYWORD"))
244     (unix::*filename-encoding* :iso8859-1))
245     (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil :external-format :iso8859-1)
246 rtoy 1.1 (when stm
247 rtoy 1.6 (do ((alias (read stm nil stm) (read stm nil stm))
248     (value (read stm nil stm) (read stm nil stm)))
249     ((or (eq alias stm) (eq value stm))
250     (unless (eq alias stm)
251     (warn "External-format aliases file ends early.")))
252     (if (and (keywordp alias) (keywordp value))
253     (setf (gethash alias *external-format-aliases*) value)
254     (warn "Bad entry in external-format aliases file: ~S => ~S."
255     alias value)))))))
256 rtoy 1.1
257 rtoy 1.3 (defun %find-external-format (name)
258 rtoy 1.6 ;; avoid loading files, etc., early in the boot sequence
259     (when (or (eq name :iso8859-1)
260     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
261     (return-from %find-external-format
262     (gethash :iso8859-1 *external-formats*)))
263    
264 rtoy 1.1 (when (zerop (hash-table-count *external-format-aliases*))
265     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
266     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
267     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
268     (load-external-format-aliases))
269    
270     (do ((tmp (gethash name *external-format-aliases*)
271     (gethash tmp *external-format-aliases*))
272     (cnt 0 (1+ cnt)))
273     ((or (null tmp) (= cnt 50))
274     (unless (null tmp)
275     (error "External-format aliasing depth exceeded.")))
276     (setq name tmp))
277    
278     (or (gethash name *external-formats*)
279 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
280 rtoy 1.6 (lisp::*enable-package-locked-errors* nil)
281     (*default-external-format* :iso8859-1)
282     (unix::*filename-encoding* :iso8859-1)
283     (s (open (format nil "ext-formats:~(~A~).lisp" name) :if-does-not-exist nil
284     :external-format :iso8859-1)))
285     (when s
286     (null (nth-value 1 (ext:compile-from-stream s)))))
287 rtoy 1.3 (gethash name *external-formats*))))
288    
289     (defun %composed-ef-name (a b)
290     (if (consp a) (append a (list b)) (list a b)))
291    
292 rtoy 1.6 (defun %compose-external-formats (a b)
293 rtoy 1.3 (when (ef-composingp a)
294     (error "~S is a Composing-External-Format." (ef-name a)))
295     (unless (ef-composingp b)
296     (error "~S is not a Composing-External-Format." (ef-name b)))
297 rtoy 1.5 (make-external-format
298     (%composed-ef-name (ef-name a) (ef-name b))
299     (make-efx
300 rtoy 1.6 :octets-to-code (lambda (state input unput)
301     (funcall (ef-octets-to-code b) state
302     (funcall (ef-octets-to-code a)
303     state input unput)
304 rtoy 1.5 unput))
305 rtoy 1.6 :code-to-octets (lambda (code state output)
306     (funcall (ef-code-to-octets b) code state
307 rtoy 1.5 `(lambda (x)
308     ,(funcall (ef-code-to-octets a)
309     'x state output))))
310 rtoy 1.6 :cache (make-array +ef-max+ :initial-element nil)
311     :min (* (ef-min-octets a) (ef-min-octets b))
312     :max (* (ef-max-octets a) (ef-max-octets b)))
313 rtoy 1.5 nil #() '()))
314 rtoy 1.3
315     (defun find-external-format (name &optional (error-p t))
316     (when (external-format-p name)
317     (return-from find-external-format name))
318    
319     (or (if (consp name) (every #'keywordp name) (keywordp name))
320     (error "~S is not a valid external format name." name))
321 rtoy 1.1
322 rtoy 1.3 (when (eq name :default)
323     (setq name *default-external-format*))
324    
325     (when (and (consp name) (not (cdr name)))
326     (setq name (car name)))
327    
328 rtoy 1.6 (flet ((not-found ()
329     (when (equal *default-external-format* name)
330     (setq *default-external-format* :iso8859-1))
331     (if error-p (error "External format ~S not found." name) nil)))
332     (if (consp name)
333     (let ((efs (mapcar #'%find-external-format name)))
334     (if (member nil efs)
335     (not-found)
336     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
337     (or (gethash name *external-formats*)
338     (%intern-ef (reduce #'%compose-external-formats efs))))))
339     (or (%find-external-format name) (not-found)))))
340    
341     (defun flush-external-formats ()
342     (maphash (lambda (name ef)
343     (declare (ignore name))
344     (fill (ef-cache ef) nil))
345     *external-formats*))
346    
347     (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
348    
349     (defun invert-table (table)
350     (declare (type (or (simple-array (unsigned-byte 31) *)
351     (simple-array (unsigned-byte 16) *))
352     table)
353     (optimize (speed 3) (space 0) (safety 0) (debug 0)
354     (ext:inhibit-warnings 3)))
355     (or (gethash table *.table-inverse.*)
356     (let* ((mbits (if (= (array-total-size table) 128) 7 8))
357     (lbits (cond ((> (array-total-size table) 256) 3)
358     ((< (array-total-size table) 100) 6)
359     (t 5)))
360     (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
361     :element-type '(unsigned-byte 16)
362     :initial-element #xFFFF))
363     (mvec (make-array 0 :element-type '(unsigned-byte 16)))
364     (lvec (make-array 0 :element-type '(unsigned-byte 16)))
365     (width (array-dimension table 0))
366     (power (1- (array-rank table)))
367     (base (if (= width 94) 1 0))
368     hx mx lx)
369     (assert (and (< power 2) (<= width 256)))
370     (dotimes (i (array-total-size table))
371     (declare (type (integer 0 (#.array-dimension-limit)) i))
372     (let ((tmp i) (val (row-major-aref table i)) (z 0))
373     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
374     (type (unsigned-byte 16) z))
375     (unless (= val #xFFFE)
376     (when (plusp power)
377     (multiple-value-bind (x y) (floor tmp width)
378     (setq tmp x)
379     (setq z (logior z (ash (the (integer 0 255) (+ y base))
380     (the (integer 0 24)
381     (* 8 power)))))))
382     (setq hx (ash val (- 0 mbits lbits)))
383     (when (= (aref hvec hx) #xFFFF)
384     (setf (aref hvec hx) (length mvec))
385     (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
386     :element-type '(unsigned-byte 16)
387     :initial-element #xFFFF)))
388     (replace tmp mvec)
389     (setq mvec tmp)))
390     (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
391 rtoy 1.7 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
392     (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
393 rtoy 1.6 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
394     :element-type '(unsigned-byte 16)
395     :initial-element #xFFFF)))
396     (replace tmp lvec)
397     (setq lvec tmp)))
398     (setq lx (logand val (lognot (ash -1 lbits))))
399 rtoy 1.7 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
400 rtoy 1.6 (logior z (+ tmp base))))))
401     (setf (gethash table *.table-inverse.*)
402     (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
403     :hvec hvec :mvec mvec :lvec lvec)))))
404    
405     (declaim (inline get-inverse))
406     (defun get-inverse (ntrie code)
407     (declare (type lisp::ntrie16 ntrie) (type lisp:codepoint code))
408     (let ((n (lisp::qref ntrie code)))
409     (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
410     (if (= m #xFFFF) nil m)))))
411    
412 rtoy 1.1
413     (define-condition void-external-format (error)
414     ()
415     (:report
416     (lambda (condition stream)
417     (declare (ignore condition))
418     (format stream "Attempting I/O through void external-format."))))
419    
420 rtoy 1.6 (define-external-format :void (:size 0) ()
421 rtoy 1.1 (octets-to-code (state input unput)
422 rtoy 1.3 `(error 'void-external-format))
423 rtoy 1.1 (code-to-octets (code state output)
424 rtoy 1.3 `(error 'void-external-format)))
425 rtoy 1.1
426 rtoy 1.6 (define-external-format :iso8859-1 (:size 1) ()
427 rtoy 1.1 (octets-to-code (state input unput)
428 rtoy 1.3 `(values ,input 1))
429 rtoy 1.1 (code-to-octets (code state output)
430 rtoy 1.3 `(,output (if (> ,code 255) #x3F ,code))))
431 rtoy 1.1
432 rtoy 1.6 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
433     ;;;
434     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
435     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
436     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
437     ;;; to read or write texts containing characters not supported by your Lisp,
438     ;;; these macros can be used instead.
439 rtoy 1.1 (defmacro octets-to-codepoint (external-format state count input unput)
440 rtoy 1.6 (let ((tmp1 (gensym)) (tmp2 (gensym))
441     (ef (find-external-format external-format)))
442     `(multiple-value-bind (,tmp1 ,tmp2)
443     ,(funcall (ef-octets-to-code ef) state input unput)
444     (setf ,count (the kernel:index ,tmp2))
445     (the (or (unsigned-byte 21) null) ,tmp1))))
446 rtoy 1.1
447     (defmacro codepoint-to-octets (external-format code state output)
448 rtoy 1.6 (let ((ef (find-external-format external-format)))
449     (funcall (ef-code-to-octets ef) code state output)))
450 rtoy 1.3
451    
452    
453     (defvar *ef-base* +ef-max+)
454     (defvar *ef-extensions* '())
455    
456     (defun ensure-cache (ef id reqd)
457     (let ((base (or (getf *ef-extensions* id)
458     (setf (getf *ef-extensions* id)
459     (prog1 *ef-base* (incf *ef-base* reqd))))))
460     (when (< (length (ef-cache ef)) (+ base reqd))
461 rtoy 1.5 (setf (efx-cache (ef-efx ef))
462 rtoy 1.3 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
463     base))
464    
465 rtoy 1.6 ;;; DEF-EF-MACRO -- Public
466     ;;;
467     ;;;
468 rtoy 1.3 (defmacro def-ef-macro (name (ef id reqd idx) body)
469 rtoy 1.6 (let ((tmp1 (gensym))
470     (tmp2 (gensym))
471     (%name (intern (format nil "%~A" name) (symbol-package name))))
472     `(progn
473     (defun ,%name (,ef)
474     (let* ((,tmp1 (find-external-format ,ef))
475     (,tmp2 ,(if (eq id 'lisp::lisp)
476     idx
477     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
478     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
479     (setf (aref (ef-cache ,tmp1) ,tmp2)
480     (let ((*compile-print* nil)
481     ;; Set default format when we compile so we
482     ;; can see compiler messages. If we don't,
483     ;; we run into a problem that we might be
484     ;; changing the default format while we're
485     ;; compiling, and we don't know how to output
486     ;; the compiler messages.
487     (*default-external-format* :iso8859-1))
488     (compile nil `(lambda (%slots%)
489     (declare (ignorable %slots%))
490     ,,body)))))
491     (ef-slots ,tmp1))))
492     (declaim (inline ,name))
493     (defun ,name (,tmp1)
494     (let ((,tmp2 (load-time-value (cons nil nil))))
495     (when (eq ,tmp1 :default)
496     (setq ,tmp1 *default-external-format*))
497     (if (eq ,tmp1 (car ,tmp2))
498     (cdr ,tmp2)
499     (setf (car ,tmp2) ,tmp1
500     (cdr ,tmp2) (,%name ,tmp1))))))))
501 rtoy 1.1
502    
503    
504 rtoy 1.6 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
505     ;;;
506     ;;; Read and write one character through an external-format
507     ;;;
508 rtoy 1.1 (defmacro octets-to-char (external-format state count input unput)
509 rtoy 1.6 (let ((s (gensym "STATE-"))
510     (code (gensym "CODE-")))
511     `(let ((,s ,state))
512     (when (null ,s)
513     ;; Need our own state variable to hold our state and the
514     ;; state for the external format.
515     (setq ,s (setf ,state (cons nil nil))))
516     (if (car ,s)
517     ;; Return the trailing surrgate. Must set count to 0 to
518     ;; tell the stream code we didn't consume any octets!
519     (prog1 (the character (car ,s))
520     (setf (car ,s) nil)
521     (setf ,count 0))
522     (let ((,code (octets-to-codepoint ,external-format
523     (cdr ,s) ,count ,input ,unput)))
524     (declare (type (unsigned-byte 31) ,code))
525     (cond ((or (lisp::surrogatep ,code)
526     (> ,code #x10FFFF))
527     #-(and unicode (not unicode-bootstrap)) #\?
528     #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
529     #+unicode
530     ((> ,code #xFFFF)
531     (multiple-value-bind (hi lo)
532     (lisp::surrogates ,code)
533     (setf (car ,state) lo)
534     hi))
535     (t (code-char ,code))))))))
536    
537     ;; This doesn't handle surrogate code units correctly. It just
538     ;; outputs the surrogate value to the external format. External
539     ;; formats almost never allow surrogate code points (except UTF-16).
540 rtoy 1.1 (defmacro char-to-octets (external-format char state output)
541     `(codepoint-to-octets ,external-format (char-code ,char) ,state ,output))
542    
543 rtoy 1.6
544 rtoy 1.3 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
545 rtoy 1.6 `(lambda (string start end buffer &aux (ptr 0) (state nil) (code 0) (c 0) widep)
546 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
547     (type simple-string string)
548 rtoy 1.6 (type kernel:index start end ptr)
549 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) buffer)
550 rtoy 1.6 (type lisp:codepoint code c)
551     (type (or null fixnum) widep)
552 rtoy 1.3 (ignorable state))
553 rtoy 1.6 (loop with i of-type kernel:index = start
554     while (< i end)
555     do
556     (multiple-value-bind (c widep)
557     (lisp::codepoint string i end)
558     (incf i (if widep 2 1))
559     (codepoint-to-octets ,extfmt c state
560     (lambda (b)
561     (when (= ptr (length buffer))
562     (setq buffer (adjust-array buffer (* 2 ptr))))
563     (setf (aref buffer (1- (incf ptr))) b)))))))
564 rtoy 1.3
565 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
566     (buffer nil bufferp))
567 rtoy 1.1 (declare (type string string)
568 rtoy 1.6 (type kernel:index start)
569     (type (or kernel:index null) end)
570 rtoy 1.3 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
571 rtoy 1.6 (let* ((buffer (or buffer (make-array (length string)
572 rtoy 1.3 :element-type '(unsigned-byte 8)))))
573 rtoy 1.6 (multiple-value-bind (buffer ptr)
574     (lisp::with-array-data ((string string) (start start) (end end))
575     (funcall (ef-string-to-octets external-format)
576     string start end buffer))
577     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
578 rtoy 1.3
579     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
580 rtoy 1.6 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil) (code 0))
581 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
582     (type (simple-array (unsigned-byte 8) (*)) octets)
583 rtoy 1.6 (type kernel:index end count)
584 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
585     (type simple-string string)
586 rtoy 1.6 (type lisp:codepoint code)
587 rtoy 1.3 (ignorable state))
588     (loop until (>= ptr end)
589     do (when (= pos (length string))
590     (setq string (adjust-array string (* 2 pos))))
591 rtoy 1.6 (setf code
592     (octets-to-codepoint ,extfmt state count
593     (aref octets (incf ptr)) ;;@@ EOF??
594     (lambda (n) (decf ptr n))))
595     ;; Convert codepoint to UTF-16 surrogate pairs if needed
596     (multiple-value-bind (high low)
597     (surrogates code)
598     (setf (aref string (incf pos)) high)
599     (when low
600     (setf (aref string (incf pos)) low)))
601 rtoy 1.5 finally (return (values string (1+ pos))))))
602 rtoy 1.1
603 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
604     (string nil stringp))
605 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
606 rtoy 1.6 (type kernel:index start)
607     (type (or kernel:index null) end)
608 rtoy 1.3 (type (or simple-string null) string))
609     (multiple-value-bind (string pos)
610 rtoy 1.6 (funcall (ef-octets-to-string external-format)
611 rtoy 1.3 octets (1- start) (1- (or end (length octets)))
612     (or string (make-string (length octets))))
613 rtoy 1.2 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
614 rtoy 1.1
615    
616    
617 rtoy 1.3 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
618     `(lambda (string start end result &aux (ptr 0) (state nil))
619     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
620     (type simple-string string)
621 rtoy 1.6 (type kernel:index start end ptr)
622 rtoy 1.3 (type simple-base-string result)
623     (ignorable state))
624 rtoy 1.9 (do ((k start (1+ k)))
625     ((>= k end)
626     (values result ptr))
627     (multiple-value-bind (code widep)
628     (lisp::codepoint string k end)
629     (when widep (incf k))
630     (codepoint-to-octets ,extfmt code state
631     (lambda (b)
632     (when (= ptr (length result))
633     (setq result (adjust-array result (* 2 ptr))))
634     (setf (aref result (1- (incf ptr)))
635     (code-char b))))))))
636 rtoy 1.3
637     (defun string-encode (string external-format &optional (start 0) end)
638 rtoy 1.6 "Encode the given String using External-Format and return a new
639     string. The characters of the new string are the octets of the
640     encoded result, with each octet converted to a character via
641     code-char. This is the inverse to String-Decode"
642     (when (zerop (length string))
643     (return-from string-encode string))
644 rtoy 1.3 (multiple-value-bind (result ptr)
645     (lisp::with-array-data ((string string) (start start) (end end))
646 rtoy 1.6 (funcall (ef-encode external-format) string start end
647 rtoy 1.3 (make-string (length string) :element-type 'base-char)))
648     (lisp::shrink-vector result ptr)))
649    
650     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
651     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
652     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
653     (type simple-string string)
654 rtoy 1.6 (type kernel:index end count)
655 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
656     (type simple-string result)
657     (ignorable state))
658     (loop until (>= ptr end)
659     ;; increasing size of result shouldn't ever be necessary, unless
660     ;; someone implements an encoding smaller than the source string...
661 rtoy 1.9 do (let ((code (octets-to-codepoint ,extfmt state count
662     ;; note the need to return NIL for EOF
663     (if (= (1+ ptr) (length string))
664     nil
665     (char-code (char string (incf ptr))))
666     (lambda (n) (decf ptr n)))))
667     (multiple-value-bind (hi lo)
668     (lisp::surrogates code)
669     (setf (schar result (incf pos)) hi)
670     (when lo
671     (setf (schar result (incf pos)) lo))))
672 rtoy 1.5 finally (return (values result (1+ pos))))))
673 rtoy 1.3
674     (defun string-decode (string external-format &optional (start 0) end)
675 rtoy 1.6 "Decode String using the given External-Format and return the new
676     string. The input string is treated as if it were an array of
677     octets, where the char-code of each character is the octet. This is
678     the inverse of String-Encode."
679     (when (zerop (length string))
680     (return-from string-decode string))
681 rtoy 1.3 (multiple-value-bind (result pos)
682     (lisp::with-array-data ((string string) (start start) (end end))
683 rtoy 1.6 (funcall (ef-decode external-format)
684 rtoy 1.3 string (1- start) (1- end) (make-string (length string))))
685 rtoy 1.5 (lisp::shrink-vector result pos)))

  ViewVC Help
Powered by ViewVC 1.1.5