/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Diff of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15 by rtoy, Wed Aug 26 16:25:41 2009 UTC revision 1.16 by rtoy, Wed Sep 9 15:51:27 2009 UTC
# Line 63  Line 63 
63    ;; flushed to the output stream.  A NIL value means the external    ;; flushed to the output stream.  A NIL value means the external
64    ;; format does not need to do anything special.    ;; format does not need to do anything special.
65    (flush-state nil :type (or null function) :read-only t)    (flush-state nil :type (or null function) :read-only t)
66      ;;
67      ;; Function to copy the state of the external-format.  If NIL, then
68      ;; there is no state to be copied.
69      (copy-state nil :type (or null function) :read-only t)
70    (cache nil :type (or null simple-vector))    (cache nil :type (or null simple-vector))
71    ;;    ;;
72    ;; Minimum number of octets needed to form a codepoint    ;; Minimum number of octets needed to form a codepoint
# Line 90  Line 94 
94  (defun %intern-ef (ef)  (defun %intern-ef (ef)
95    (setf (gethash (ef-name ef) *external-formats*) ef))    (setf (gethash (ef-name ef) *external-formats*) ef))
96    
97  (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state  (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
98                   ef-cache ef-min-octets ef-max-octets))                   ef-cache ef-min-octets ef-max-octets))
99    
100  (defun ef-octets-to-code (ef)  (defun ef-octets-to-code (ef)
# Line 102  Line 106 
106  (defun ef-flush-state (ef)  (defun ef-flush-state (ef)
107    (efx-flush-state (ef-efx ef)))    (efx-flush-state (ef-efx ef)))
108    
109    (defun ef-copy-state (ef)
110      (efx-copy-state (ef-efx ef)))
111    
112  (defun ef-cache (ef)  (defun ef-cache (ef)
113    (efx-cache (ef-efx ef)))    (efx-cache (ef-efx ef)))
114    
# Line 127  Line 134 
134    
135  ;;; DEFINE-EXTERNAL-FORMAT  -- Public  ;;; DEFINE-EXTERNAL-FORMAT  -- Public
136  ;;;  ;;;
137  ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets flush-state  ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
138    ;;;       flush-state copy-state
139    ;;;
140  ;;;   Define a new external format.  Min/Max/Size are the minimum and  ;;;   Define a new external format.  Min/Max/Size are the minimum and
141  ;;;   maximum number of octets that make up a character (:size N is just  ;;;   maximum number of octets that make up a character (:size N is just
142  ;;;   shorthand for :min N :max N).  Slots is a list of slot descriptions  ;;;   shorthand for :min N :max N).  Slots is a list of slot descriptions
# Line 140  Line 149 
149  ;;; octets-to-code (state input unput &rest vars)  ;;; octets-to-code (state input unput &rest vars)
150  ;;;   Defines a form to be used by the external format to convert  ;;;   Defines a form to be used by the external format to convert
151  ;;;   octets to a code point.  State is a form that can be used by the  ;;;   octets to a code point.  State is a form that can be used by the
152  ;;;   body to access the state variable of the stream.  Input is a  ;;;   body to access the state of the stream.  Input is a form that
153  ;;;   form that can be used to read one more octets from the input  ;;;   can be used to read one octet from the input stream.  (It can be
154  ;;;   strema.  Similarly, Unput is a form to put back one octet to the  ;;;   called as many times as needed.)  Similarly, Unput is a form to
155  ;;;   input stream.  Vars is a list of vars that need to be defined  ;;;   put back one octet to the input stream.  Vars is a list of vars
156  ;;;   for any symbols used within the form.  ;;;   that need to be defined for any symbols used within the form.
157  ;;;  ;;;
158  ;;;   This should return two values: the code and the number of octets  ;;;   This should return two values: the code and the number of octets
159  ;;;   read to form the code.  ;;;   read to form the code.
# Line 161  Line 170 
170  ;;;   any state when an output stream is closed.  Similar to  ;;;   any state when an output stream is closed.  Similar to
171  ;;;   CODE-TO-OCTETS, but there is no code.  ;;;   CODE-TO-OCTETS, but there is no code.
172  ;;;  ;;;
173    ;;; copy-state (state &rest vars)
174    ;;;   Defines a form to copy any state needed by the external format.
175    ;;;   This should probably be a deep copy so that if the original
176    ;;;   state is modified, the copy is not.
177    ;;;
178  ;;; Note: external-formats work on code-points, not  ;;; Note: external-formats work on code-points, not
179  ;;;   characters, so that the entire 31 bit ISO-10646 range can be  ;;;   characters, so that the entire 31 bit ISO-10646 range can be
180  ;;;   used internally regardless of the size of a character recognized  ;;;   used internally regardless of the size of a character recognized
# Line 169  Line 183 
183  ;;;   CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)  ;;;   CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
184  ;;;  ;;;
185  (defmacro define-external-format (name (&rest args) (&rest slots)  (defmacro define-external-format (name (&rest args) (&rest slots)
186                                         &optional octets-to-code code-to-octets flush-state)                                         &optional octets-to-code code-to-octets
187                                           flush-state copy-state)
188    (when (and (oddp (length args)) (not (= (length args) 1)))    (when (and (oddp (length args)) (not (= (length args) 1)))
189      (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))      (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
190    (let* ((tmp (gensym))    (let* ((tmp (gensym))
# Line 204  Line 219 
219                       (let (,@',slotb                       (let (,@',slotb
220                             (,code ',code)                             (,code ',code)
221                             ,@(loop for var in vars collect `(,var (gensym))))                             ,@(loop for var in vars collect `(,var (gensym))))
222                         `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))                         `(let ((,',code (the lisp:codepoint ,,',tmp)))
223                            (declare (ignorable ,',code))                            (declare (ignorable ,',code))
224                            ,,body))))                            ,,body))))
225                  (flush-state ((state output &rest vars) body)                  (flush-state ((state output &rest vars) body)
# Line 212  Line 227 
227                       (declare (ignorable ,state ,output))                       (declare (ignorable ,state ,output))
228                       (let (,@',slotb                       (let (,@',slotb
229                             ,@(loop for var in vars collect `(,var (gensym))))                             ,@(loop for var in vars collect `(,var (gensym))))
230                           ,body)))
231                    (copy-state ((state &rest vars) body)
232                      `(lambda (,state)
233                         (declare (ignorable ,state))
234                         (let (,@',slotb
235                               ,@(loop for var in vars collect `(,var (gensym))))
236                         ,body))))                         ,body))))
237         (%intern-ef (make-external-format ,name         (%intern-ef (make-external-format ,name
238                      ,(if base                      ,(if base
239                           `(ef-efx (find-external-format ,(ef-name base)))                           `(ef-efx (find-external-format ,(ef-name base)))
240                           `(make-efx :octets-to-code ,octets-to-code                           `(make-efx :octets-to-code ,octets-to-code
241                                      :code-to-octets ,code-to-octets                                      :code-to-octets ,code-to-octets
242                                      :flush-state ,flush-state                                      :flush-state ,flush-state
243                                        :copy-state ,copy-state
244                                      :cache (make-array +ef-max+                                      :cache (make-array +ef-max+
245                                                            :initial-element nil)                                                            :initial-element nil)
246                                      :min ,(min min max) :max ,(max min max)))                                      :min ,(min min max) :max ,(max min max)))
# Line 236  Line 258 
258  ;;; octets.  They have to be composed with a non-composing external-format  ;;; octets.  They have to be composed with a non-composing external-format
259  ;;; to be of any use.  ;;; to be of any use.
260  ;;;  ;;;
261    ;;;
262    ;;; name (&key min max size) input output
263    ;;;   Defines a new composing external format.  The parameters Min,
264    ;;;   Max, and Size are the same as for defining an external format.
265    ;;;   The parameters input and output are forms to handle input and
266    ;;;   output.
267    ;;;
268    ;;; input (state input unput &rest vars)
269    ;;;   Defines a form to be used by the composing external format when
270    ;;;   reading input to transform a codepoint (or sequence of
271    ;;;   codepoints) to another.  State is a form that can be used by the
272    ;;;   body to access the state of the external format.  Input is a
273    ;;;   form that can be used to read one code point from the input
274    ;;;   stream.  (Input returns two values, the codepoint and the number
275    ;;;   of octets read.)  It may be called as many times as needed.
276    ;;;   This returns two values: the codepoint of the character (or NIL)
277    ;;;   and the number of octets read.  Similarly, Unput is a form to
278    ;;;   put back one octet to the input stream.  Vars is a list of vars
279    ;;;   that need to be defined for any symbols used within the form.
280    ;;;
281    ;;;   This should return two values: the code and the number of octets
282    ;;;   read to form the code.
283    ;;;
284    ;;; output (code state output &rest vars)
285    ;;;   Defines a form to be used by the composing external format to
286    ;;;   convert a code point to octets for output.  Code is the code
287    ;;;   point to be converted.  State is a form to access the current
288    ;;;   value of the stream's state variable.  Output is a form that
289    ;;;   writes one octet to the output stream.
290    
291  (defmacro define-composing-external-format (name (&key min max size)  (defmacro define-composing-external-format (name (&key min max size)
292                                                   input output)                                                   input output)
293    (let ((tmp (gensym))    (let ((tmp (gensym))
# Line 245  Line 297 
297                    `(lambda (,state ,input ,unput)                    `(lambda (,state ,input ,unput)
298                       (declare (ignorable ,state ,input ,unput)                       (declare (ignorable ,state ,input ,unput)
299                                (optimize (ext:inhibit-warnings 3)))                                (optimize (ext:inhibit-warnings 3)))
300                       (let ((,input `(the (values (or (unsigned-byte 21) null)                       (let ((,input `(the (values (or lisp:codepoint null)
301                                                   kernel:index)                                                   kernel:index)
302                                           ,,input))                                           ,,input))
303                             ,@(loop for var in vars collect `(,var (gensym))))                             ,@(loop for var in vars collect `(,var (gensym))))
# Line 256  Line 308 
308                                (optimize (ext:inhibit-warnings 3)))                                (optimize (ext:inhibit-warnings 3)))
309                       (let ((,code ',code)                       (let ((,code ',code)
310                             ,@(loop for var in vars collect `(,var (gensym))))                             ,@(loop for var in vars collect `(,var (gensym))))
311                         `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))                         `(let ((,',code (the lisp:codepoint ,,',tmp)))
312                            (declare (ignorable ,',code))                            (declare (ignorable ,',code))
313                            ,,body)))))                            ,,body)))))
314         (%intern-ef (make-external-format ,name         (%intern-ef (make-external-format ,name
# Line 479  Line 531 
531      `(multiple-value-bind (,tmp1 ,tmp2)      `(multiple-value-bind (,tmp1 ,tmp2)
532           ,(funcall (ef-octets-to-code ef) state input unput)           ,(funcall (ef-octets-to-code ef) state input unput)
533         (setf ,count (the kernel:index ,tmp2))         (setf ,count (the kernel:index ,tmp2))
534         (the (or (unsigned-byte 21) null) ,tmp1))))         (the (or lisp:codepoint null) ,tmp1))))
535    
536  (defmacro codepoint-to-octets (external-format code state output)  (defmacro codepoint-to-octets (external-format code state output)
537    (let ((ef (find-external-format external-format)))    (let ((ef (find-external-format external-format)))
# Line 555  Line 607 
607               (setf (car ,nstate) nil ,count 0))               (setf (car ,nstate) nil ,count 0))
608             (let ((code (octets-to-codepoint ,external-format             (let ((code (octets-to-codepoint ,external-format
609                                            (cdr ,nstate) ,count ,input ,unput)))                                            (cdr ,nstate) ,count ,input ,unput)))
610               (declare (type (unsigned-byte 21) code))               (declare (type lisp:codepoint code))
611               ;;@@ on non-Unicode builds, limit to 8-bit chars               ;;@@ on non-Unicode builds, limit to 8-bit chars
612               ;;@@ if unicode-bootstrap, can't use #\u+fffd               ;;@@ if unicode-bootstrap, can't use #\u+fffd
613               (cond ((or (<= #xD800 code #xDFFF) (> code #x10FFFF))               (cond ((or (lisp::surrogatep code) (> code #x10FFFF))
614                      #-(and unicode (not unicode-bootstrap)) #\?                      #-(and unicode (not unicode-bootstrap)) #\?
615                      #+(and unicode (not unicode-bootstrap)) #\U+FFFD)                      #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
616                     #+unicode                     #+unicode
# Line 602  Line 654 
654      (when f      (when f
655        (funcall f state output))))        (funcall f state output))))
656    
657    (defmacro copy-state (external-format state)
658      (let* ((ef (find-external-format external-format))
659             (f (ef-copy-state ef)))
660        (when f
661          (funcall f state))))
662    
663  (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)  (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
664    `(lambda (string start end buffer &aux (ptr 0) (state nil))    `(lambda (string start end buffer &aux (ptr 0) (state nil))
665       (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#       (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
# Line 619  Line 677 
677    
678  (defun string-to-octets (string &key (start 0) end (external-format :default)  (defun string-to-octets (string &key (start 0) end (external-format :default)
679                                       (buffer nil bufferp))                                       (buffer nil bufferp))
680      "Convert String to octets using the specified External-format.  The
681       string is bounded by Start (defaulting to 0) and End (defaulting to
682       the end of the string.  If Buffer is given, the octets are stored
683       there.  If not, a new buffer is created."
684    (declare (type string string)    (declare (type string string)
685             (type kernel:index start)             (type kernel:index start)
686             (type (or kernel:index null) end)             (type (or kernel:index null) end)
# Line 650  Line 712 
712    
713  (defun octets-to-string (octets &key (start 0) end (external-format :default)  (defun octets-to-string (octets &key (start 0) end (external-format :default)
714                                       (string nil stringp))                                       (string nil stringp))
715      "Octets-to-string converts an array of octets in Octets to a string
716      according to the specified External-format.  The array of octets is
717      bounded by Start (defaulting ot 0) and End (defaulting to the end of
718      the array.  If String is given, the string is stored there;
719      otherwise a new string is created."
720    (declare (type (simple-array (unsigned-byte 8) (*)) octets)    (declare (type (simple-array (unsigned-byte 8) (*)) octets)
721             (type kernel:index start)             (type kernel:index start)
722             (type (or kernel:index null) end)             (type (or kernel:index null) end)

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.16

  ViewVC Help
Powered by ViewVC 1.1.5