/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2.4.3.2.23 - (hide annotations)
Thu May 28 16:06:39 2009 UTC (4 years, 10 months ago) by rtoy
Branch: unicode-utf16-extfmt-branch
CVS Tags: unicode-snapshot-2009-06
Changes since 1.2.4.3.2.22: +4 -4 lines
Oops. The codepoint type is in the Lisp package.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.2.4.3.2.23 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.2.4.3.2.23 2009/05/28 16:06:39 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16     (export '(string-to-octets octets-to-string *default-external-format*
17 rtoy 1.2.4.1 string-encode string-decode))
18 rtoy 1.1
19     (defvar *default-external-format* :iso8859-1)
20    
21 rtoy 1.2.4.1 (defvar *external-formats* (make-hash-table :test 'equal))
22 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
23    
24 rtoy 1.2.4.3.2.1 (defconstant +ef-cin+ 2)
25     (defconstant +ef-cout+ 3)
26     (defconstant +ef-sin+ 4)
27     (defconstant +ef-sout+ 5)
28     (defconstant +ef-os+ 6)
29     (defconstant +ef-so+ 7)
30     (defconstant +ef-en+ 8)
31     (defconstant +ef-de+ 9)
32     (defconstant +ef-max+ 10)
33 rtoy 1.2.4.1
34 rtoy 1.1 (define-condition external-format-not-implemented (error)
35     ()
36     (:report
37     (lambda (condition stream)
38     (declare (ignore condition))
39     (format stream "Attempting unimplemented external-format I/O."))))
40    
41     (defun %efni (a b c d)
42     (declare (ignore a b c d))
43     (error 'external-format-not-implemented))
44    
45 rtoy 1.2.4.3 (defstruct efx
46     (octets-to-code #'%efni :type function :read-only t)
47     (code-to-octets #'%efni :type function :read-only t)
48 rtoy 1.2.4.3.2.1 (cache nil :type (or null simple-vector))
49     (min 1 :type kernel:index :read-only t)
50     (max 1 :type kernel:index :read-only t))
51 rtoy 1.2.4.3
52 rtoy 1.1 (defstruct (external-format
53     (:conc-name ef-)
54     (:print-function %print-external-format)
55 rtoy 1.2.4.3 (:constructor make-external-format (name efx composingp
56     &optional slots slotd)))
57 rtoy 1.2.4.1 (name (ext:required-argument) :type (or keyword cons) :read-only t)
58 rtoy 1.2.4.3 (efx (ext:required-argument) :type efx :read-only t)
59 rtoy 1.2.4.1 (composingp (ext:required-argument) :type boolean :read-only t)
60 rtoy 1.1 (slots #() :type simple-vector :read-only t)
61 rtoy 1.2.4.3 (slotd nil :type list :read-only t))
62 rtoy 1.1
63     (defun %print-external-format (ef stream depth)
64     (declare (ignore depth))
65     (print-unreadable-object (ef stream :type t :identity t)
66     (princ (ef-name ef) stream)))
67    
68 rtoy 1.2.4.3.2.1 (defun %intern-ef (ef)
69 rtoy 1.2.4.1 (setf (gethash (ef-name ef) *external-formats*) ef))
70    
71 rtoy 1.2.4.3.2.1 (declaim (inline ef-octets-to-code ef-code-to-octets ef-cache
72     ef-min-octets ef-max-octets))
73 rtoy 1.2.4.3
74     (defun ef-octets-to-code (ef)
75     (efx-octets-to-code (ef-efx ef)))
76    
77     (defun ef-code-to-octets (ef)
78     (efx-code-to-octets (ef-efx ef)))
79    
80     (defun ef-cache (ef)
81     (efx-cache (ef-efx ef)))
82    
83 rtoy 1.2.4.3.2.1 (defun ef-min-octets (ef)
84     (efx-min (ef-efx ef)))
85    
86     (defun ef-max-octets (ef)
87     (efx-max (ef-efx ef)))
88    
89     (eval-when (:compile-toplevel :load-toplevel :execute)
90     (defun %merge-slots (old new)
91     (let* ((pos (length old))
92     (tmp (mapcar (lambda (x)
93     (let* ((name (if (consp x) (first x) x))
94     (init (if (consp x) (second x) nil))
95     (list (if (consp x) (nthcdr 2 x) nil))
96     (prev (assoc name old))
97     (posn (if prev (second prev) (1- (incf pos)))))
98     (list name posn init (getf list :type t))))
99     new)))
100     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
101     :key #'second))))
102    
103     ;;; DEFINE-EXTERNAL-FORMAT -- Public
104     ;;;
105     ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
106     ;;; Define a new external format. Min/Max/Size are the minimum and
107     ;;; maximum number of octets that make up a character (:size N is just
108     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
109     ;;; similar to defstruct.
110     ;;;
111     ;;; name (base) (&rest slots)
112     ;;; Define an external format based on a previously-defined external
113     ;;; format, Base. The slot names used in Slots must match those in Base.
114     ;;;
115 rtoy 1.2.4.3.2.10 ;;; octets-to-code (state input unput &rest vars)
116     ;;; Defines a form to be used by the external format to convert
117     ;;; octets to a code point. State is a form that can be used by the
118     ;;; body to access the state variable of the stream. Input is a
119     ;;; form that can be used to read one more octets from the input
120     ;;; strema. Similarly, Unput is a form to put back one octet to the
121     ;;; input stream. Vars is a list of vars that need to be defined
122     ;;; for any symbols used within the form.
123     ;;;
124     ;;; This should return two values: the code and the number of octets
125     ;;; read to form the code.
126     ;;;
127     ;;; code-to-octets (code state output &rest vars)
128     ;;; Defines a form to be used by the external format to convert a
129     ;;; code point to octets for output. Code is the code point to be
130     ;;; converted. State is a form to access the current value of the
131     ;;; stream's state variable. Output is a form that writes one octet
132     ;;; to the output stream.
133     ;;;
134 rtoy 1.2.4.3.2.12 ;;; Note: external-formats work on code-points, not
135 rtoy 1.2.4.3.2.10 ;;; characters, so that the entire 31 bit ISO-10646 range can be
136     ;;; used internally regardless of the size of a character recognized
137     ;;; by Lisp and external formats can be useful to people who want to
138     ;;; process characters outside the Lisp range (see
139     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
140     ;;;
141 rtoy 1.2.4.3.2.1 (defmacro define-external-format (name (&rest args) (&rest slots)
142     &optional octets-to-code code-to-octets)
143     (when (and (oddp (length args)) (not (= (length args) 1)))
144     (warn "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT." args))
145 rtoy 1.2.4.3.2.8 (let* ((tmp (gensym))
146 rtoy 1.2.4.3.2.1 (min (if (evenp (length args))
147     (or (getf args :min) (getf args :size) 1)
148     1))
149     (max (if (evenp (length args))
150     (or (getf args :max) (getf args :size) 6)
151     6))
152     (base (if (= (length args) 1)
153     (find-external-format (first args))
154     nil))
155     (bslotd (if base (ef-slotd base) nil))
156     (slotd (%merge-slots bslotd slots))
157     (slotb (loop for slot in slotd
158     collect `(,(first slot)
159 rtoy 1.2.4.3.2.8 `(the ,',(fourth slot)
160     ;; IDENTITY is here to protect against SETF
161     (identity (svref %slots% ,',(second slot))))))))
162 rtoy 1.2.4.1 `(macrolet ((octets-to-code ((state input unput &rest vars) body)
163 rtoy 1.2.4.3.2.8 `(lambda (,state ,input ,unput)
164     (declare (ignorable ,state ,input ,unput)
165 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
166 rtoy 1.2.4.3.2.1 (let (,@',slotb
167     (,input `(the (or (unsigned-byte 8) null) ,,input))
168 rtoy 1.2.4.1 ,@(loop for var in vars collect `(,var (gensym))))
169     ,body)))
170     (code-to-octets ((code state output &rest vars) body)
171 rtoy 1.2.4.3.2.8 `(lambda (,',tmp ,state ,output)
172     (declare (ignorable ,state ,output)
173 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
174 rtoy 1.2.4.3.2.1 (let (,@',slotb
175     (,code ',code)
176 rtoy 1.2.4.1 ,@(loop for var in vars collect `(,var (gensym))))
177 rtoy 1.2.4.3.2.18 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
178 rtoy 1.2.4.1 (declare (ignorable ,',code))
179     ,,body)))))
180 rtoy 1.2.4.3.2.1 (%intern-ef (make-external-format ,name
181     ,(if base
182     `(ef-efx (find-external-format ,(ef-name base)))
183     `(make-efx :octets-to-code ,octets-to-code
184     :code-to-octets ,code-to-octets
185     :cache (make-array +ef-max+
186     :initial-element nil)
187     :min ,(min min max) :max ,(max min max)))
188     nil
189 rtoy 1.2.4.3.2.18 (let* ,(loop for x in slotd
190     collect (list (first x) (third x)))
191     (vector ,@(mapcar #'first slotd)))
192 rtoy 1.2.4.3.2.1 ',slotd)))))
193 rtoy 1.2.4.1
194 rtoy 1.2.4.3.2.1 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
195     ;;;
196     ;;; A composing-external-format differs from an (ordinary) external-format
197     ;;; in that it translates characters (really codepoints, of course) into
198     ;;; other characters, rather than translating between characters and binary
199     ;;; octets. They have to be composed with a non-composing external-format
200     ;;; to be of any use.
201     ;;;
202     (defmacro define-composing-external-format (name (&key min max size)
203     input output)
204 rtoy 1.2.4.3.2.8 (let ((tmp (gensym))
205 rtoy 1.2.4.3.2.1 (min (or min size 1))
206     (max (or max size 1)))
207 rtoy 1.2.4.1 `(macrolet ((input ((state input unput &rest vars) body)
208 rtoy 1.2.4.3.2.8 `(lambda (,state ,input ,unput)
209     (declare (ignorable ,state ,input ,unput)
210 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
211 rtoy 1.2.4.3.2.18 (let ((,input `(the (values (or (unsigned-byte 21) null)
212 rtoy 1.2.4.3.2.1 kernel:index)
213 rtoy 1.2.4.1 ,,input))
214     ,@(loop for var in vars collect `(,var (gensym))))
215     ,body)))
216     (output ((code state output &rest vars) body)
217 rtoy 1.2.4.3.2.8 `(lambda (,',tmp ,state ,output)
218     (declare (ignorable ,state ,output)
219 rtoy 1.2.4.1 (optimize (ext:inhibit-warnings 3)))
220     (let ((,code ',code)
221     ,@(loop for var in vars collect `(,var (gensym))))
222 rtoy 1.2.4.3.2.18 `(let ((,',code (the (unsigned-byte 21) ,,',tmp)))
223 rtoy 1.2.4.1 (declare (ignorable ,',code))
224     ,,body)))))
225 rtoy 1.2.4.3.2.1 (%intern-ef (make-external-format ,name
226     (make-efx :octets-to-code ,input
227     :code-to-octets ,output
228     :min ,(min min max) :max ,(max min max))
229     t
230     #() '())))))
231 rtoy 1.1
232     (defun load-external-format-aliases ()
233 rtoy 1.2.4.3.2.1 (let ((*package* (find-package "KEYWORD"))
234     (unix::*filename-encoding* :iso8859-1))
235 rtoy 1.2.4.3.2.19 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil :external-format :iso8859-1)
236 rtoy 1.1 (when stm
237 rtoy 1.2.4.3.2.1 (do ((alias (read stm nil stm) (read stm nil stm))
238     (value (read stm nil stm) (read stm nil stm)))
239     ((or (eq alias stm) (eq value stm))
240     (unless (eq alias stm)
241     (warn "External-format aliases file ends early.")))
242     (if (and (keywordp alias) (keywordp value))
243     (setf (gethash alias *external-format-aliases*) value)
244     (warn "Bad entry in external-format aliases file: ~S => ~S."
245     alias value)))))))
246 rtoy 1.1
247 rtoy 1.2.4.1 (defun %find-external-format (name)
248 rtoy 1.2.4.3.2.5 ;; avoid loading files, etc., early in the boot sequence
249     (when (or (eq name :iso8859-1)
250     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
251     (return-from %find-external-format
252     (gethash :iso8859-1 *external-formats*)))
253    
254     (when (zerop (hash-table-count *external-format-aliases*))
255     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
256     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
257     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
258     (load-external-format-aliases))
259 rtoy 1.1
260     (do ((tmp (gethash name *external-format-aliases*)
261     (gethash tmp *external-format-aliases*))
262     (cnt 0 (1+ cnt)))
263     ((or (null tmp) (= cnt 50))
264     (unless (null tmp)
265     (error "External-format aliasing depth exceeded.")))
266     (setq name tmp))
267    
268     (or (gethash name *external-formats*)
269 rtoy 1.2 (and (let ((*package* (find-package "STREAM"))
270 rtoy 1.2.4.3.2.1 (lisp::*enable-package-locked-errors* nil)
271     (*default-external-format* :iso8859-1)
272 rtoy 1.2.4.3.2.9 (unix::*filename-encoding* :iso8859-1)
273 rtoy 1.2.4.3.2.19 (s (open (format nil "ext-formats:~(~A~).lisp" name) :if-does-not-exist nil
274     :external-format :iso8859-1)))
275 rtoy 1.2.4.3.2.9 (when s
276     (null (nth-value 1 (ext:compile-from-stream s)))))
277 rtoy 1.2.4.1 (gethash name *external-formats*))))
278    
279     (defun %composed-ef-name (a b)
280     (if (consp a) (append a (list b)) (list a b)))
281    
282 rtoy 1.2.4.3.2.1 (defun %compose-external-formats (a b)
283 rtoy 1.2.4.1 (when (ef-composingp a)
284     (error "~S is a Composing-External-Format." (ef-name a)))
285     (unless (ef-composingp b)
286     (error "~S is not a Composing-External-Format." (ef-name b)))
287 rtoy 1.2.4.3 (make-external-format
288     (%composed-ef-name (ef-name a) (ef-name b))
289     (make-efx
290 rtoy 1.2.4.3.2.8 :octets-to-code (lambda (state input unput)
291     (funcall (ef-octets-to-code b) state
292     (funcall (ef-octets-to-code a)
293     state input unput)
294 rtoy 1.2.4.3 unput))
295 rtoy 1.2.4.3.2.8 :code-to-octets (lambda (code state output)
296     (funcall (ef-code-to-octets b) code state
297 rtoy 1.2.4.3 `(lambda (x)
298     ,(funcall (ef-code-to-octets a)
299     'x state output))))
300 rtoy 1.2.4.3.2.1 :cache (make-array +ef-max+ :initial-element nil)
301     :min (* (ef-min-octets a) (ef-min-octets b))
302     :max (* (ef-max-octets a) (ef-max-octets b)))
303 rtoy 1.2.4.3 nil #() '()))
304 rtoy 1.2.4.1
305     (defun find-external-format (name &optional (error-p t))
306     (when (external-format-p name)
307     (return-from find-external-format name))
308    
309     (or (if (consp name) (every #'keywordp name) (keywordp name))
310     (error "~S is not a valid external format name." name))
311 rtoy 1.1
312 rtoy 1.2.4.1 (when (eq name :default)
313     (setq name *default-external-format*))
314    
315     (when (and (consp name) (not (cdr name)))
316     (setq name (car name)))
317    
318 rtoy 1.2.4.3.2.5 (flet ((not-found ()
319     (when (equal *default-external-format* name)
320     (setq *default-external-format* :iso8859-1))
321     (if error-p (error "External format ~S not found." name) nil)))
322     (if (consp name)
323     (let ((efs (mapcar #'%find-external-format name)))
324     (if (member nil efs)
325     (not-found)
326     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
327     (or (gethash name *external-formats*)
328     (%intern-ef (reduce #'%compose-external-formats efs))))))
329     (or (%find-external-format name) (not-found)))))
330 rtoy 1.1
331 rtoy 1.2.4.3.2.1 (defun flush-external-formats ()
332     (maphash (lambda (name ef)
333     (declare (ignore name))
334     (fill (ef-cache ef) nil))
335     *external-formats*))
336    
337 rtoy 1.2.4.3.2.5 (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
338    
339     (defun invert-table (table)
340     (declare (type (or (simple-array (unsigned-byte 31) *)
341     (simple-array (unsigned-byte 16) *))
342     table)
343     (optimize (speed 3) (space 0) (safety 0) (debug 0)
344     (ext:inhibit-warnings 3)))
345     (or (gethash table *.table-inverse.*)
346 rtoy 1.2.4.3.2.18 (let* ((mbits (if (= (array-total-size table) 128) 7 8))
347     (lbits (cond ((> (array-total-size table) 256) 3)
348     ((< (array-total-size table) 100) 6)
349     (t 5)))
350     (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
351     :element-type '(unsigned-byte 16)
352     :initial-element #xFFFF))
353     (mvec (make-array 0 :element-type '(unsigned-byte 16)))
354     (lvec (make-array 0 :element-type '(unsigned-byte 16)))
355 rtoy 1.2.4.3.2.5 (width (array-dimension table 0))
356     (power (1- (array-rank table)))
357 rtoy 1.2.4.3.2.18 (base (if (= width 94) 1 0))
358     hx mx lx)
359     (assert (and (< power 2) (<= width 256)))
360 rtoy 1.2.4.3.2.5 (dotimes (i (array-total-size table))
361     (declare (type (integer 0 (#.array-dimension-limit)) i))
362     (let ((tmp i) (val (row-major-aref table i)) (z 0))
363     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
364 rtoy 1.2.4.3.2.18 (type (unsigned-byte 16) z))
365     (unless (= val #xFFFE)
366     (when (plusp power)
367 rtoy 1.2.4.3.2.5 (multiple-value-bind (x y) (floor tmp width)
368     (setq tmp x)
369     (setq z (logior z (ash (the (integer 0 255) (+ y base))
370     (the (integer 0 24)
371 rtoy 1.2.4.3.2.18 (* 8 power)))))))
372     (setq hx (ash val (- 0 mbits lbits)))
373     (when (= (aref hvec hx) #xFFFF)
374     (setf (aref hvec hx) (length mvec))
375     (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
376     :element-type '(unsigned-byte 16)
377     :initial-element #xFFFF)))
378     (replace tmp mvec)
379     (setq mvec tmp)))
380     (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
381     (when (= (aref mvec (+ hx mx)) #xFFFF)
382     (setf (aref mvec (+ hx mx)) (length lvec))
383     (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
384     :element-type '(unsigned-byte 16)
385     :initial-element #xFFFF)))
386     (replace tmp lvec)
387     (setq lvec tmp)))
388     (setq lx (logand val (lognot (ash -1 lbits))))
389     (setf (aref lvec (+ (aref mvec (+ hx mx)) lx))
390     (logior z (+ tmp base))))))
391     (setf (gethash table *.table-inverse.*)
392     (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
393     :hvec hvec :mvec mvec :lvec lvec)))))
394    
395     (declaim (inline get-inverse))
396     (defun get-inverse (ntrie code)
397 rtoy 1.2.4.3.2.23 (declare (type lisp::ntrie16 ntrie) (type lisp:codepoint code))
398 rtoy 1.2.4.3.2.18 (let ((n (lisp::qref ntrie code)))
399     (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
400     (if (= m #xFFFF) nil m)))))
401 rtoy 1.2.4.3.2.5
402    
403 rtoy 1.1 (define-condition void-external-format (error)
404     ()
405     (:report
406     (lambda (condition stream)
407     (declare (ignore condition))
408     (format stream "Attempting I/O through void external-format."))))
409    
410 rtoy 1.2.4.3.2.1 (define-external-format :void (:size 0) ()
411 rtoy 1.1 (octets-to-code (state input unput)
412 rtoy 1.2.4.1 `(error 'void-external-format))
413 rtoy 1.1 (code-to-octets (code state output)
414 rtoy 1.2.4.1 `(error 'void-external-format)))
415 rtoy 1.1
416 rtoy 1.2.4.3.2.1 (define-external-format :iso8859-1 (:size 1) ()
417 rtoy 1.1 (octets-to-code (state input unput)
418 rtoy 1.2.4.1 `(values ,input 1))
419 rtoy 1.1 (code-to-octets (code state output)
420 rtoy 1.2.4.1 `(,output (if (> ,code 255) #x3F ,code))))
421 rtoy 1.1
422 rtoy 1.2.4.3.2.1 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
423     ;;;
424     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
425     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
426     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
427     ;;; to read or write texts containing characters not supported by your Lisp,
428     ;;; these macros can be used instead.
429 rtoy 1.1 (defmacro octets-to-codepoint (external-format state count input unput)
430 rtoy 1.2.4.3.2.8 (let ((tmp1 (gensym)) (tmp2 (gensym))
431     (ef (find-external-format external-format)))
432     `(multiple-value-bind (,tmp1 ,tmp2)
433     ,(funcall (ef-octets-to-code ef) state input unput)
434     (setf ,count (the kernel:index ,tmp2))
435 rtoy 1.2.4.3.2.18 (the (or (unsigned-byte 21) null) ,tmp1))))
436 rtoy 1.1
437     (defmacro codepoint-to-octets (external-format code state output)
438 rtoy 1.2.4.3.2.8 (let ((ef (find-external-format external-format)))
439     (funcall (ef-code-to-octets ef) code state output)))
440 rtoy 1.2.4.1
441    
442    
443     (defvar *ef-base* +ef-max+)
444     (defvar *ef-extensions* '())
445    
446     (defun ensure-cache (ef id reqd)
447     (let ((base (or (getf *ef-extensions* id)
448     (setf (getf *ef-extensions* id)
449     (prog1 *ef-base* (incf *ef-base* reqd))))))
450     (when (< (length (ef-cache ef)) (+ base reqd))
451 rtoy 1.2.4.3 (setf (efx-cache (ef-efx ef))
452 rtoy 1.2.4.1 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
453     base))
454    
455 rtoy 1.2.4.3.2.1 ;;; DEF-EF-MACRO -- Public
456     ;;;
457     ;;;
458 rtoy 1.2.4.1 (defmacro def-ef-macro (name (ef id reqd idx) body)
459 rtoy 1.2.4.3.2.8 (let ((tmp1 (gensym))
460     (tmp2 (gensym))
461     (%name (intern (format nil "%~A" name) (symbol-package name))))
462     `(progn
463     (defun ,%name (,ef)
464     (let* ((,tmp1 (find-external-format ,ef))
465     (,tmp2 ,(if (eq id 'lisp::lisp)
466     idx
467     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
468     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
469     (setf (aref (ef-cache ,tmp1) ,tmp2)
470     (let ((*compile-print* nil)
471     ;; Set default format when we compile so we
472 rtoy 1.2.4.3.2.18 ;; can see compiler messages. If we don't,
473 rtoy 1.2.4.3.2.8 ;; we run into a problem that we might be
474     ;; changing the default format while we're
475     ;; compiling, and we don't know how to output
476     ;; the compiler messages.
477     (*default-external-format* :iso8859-1))
478     (compile nil `(lambda (%slots%)
479     (declare (ignorable %slots%))
480     ,,body)))))
481     (ef-slots ,tmp1))))
482     (declaim (inline ,name))
483     (defun ,name (,tmp1)
484     (let ((,tmp2 (load-time-value (cons nil nil))))
485     (when (eq ,tmp1 :default)
486     (setq ,tmp1 *default-external-format*))
487     (if (eq ,tmp1 (car ,tmp2))
488     (cdr ,tmp2)
489     (setf (car ,tmp2) ,tmp1
490     (cdr ,tmp2) (,%name ,tmp1))))))))
491 rtoy 1.1
492    
493    
494 rtoy 1.2.4.3.2.1 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
495     ;;;
496     ;;; Read and write one character through an external-format
497     ;;;
498 rtoy 1.1 (defmacro octets-to-char (external-format state count input unput)
499 rtoy 1.2.4.3.2.17 (let ((s (gensym "STATE-"))
500     (code (gensym "CODE-")))
501 rtoy 1.2.4.3.2.15 `(let ((,s ,state))
502     (when (null ,s)
503     ;; Need our own state variable to hold our state and the
504     ;; state for the external format.
505     (setq ,s (setf ,state (cons nil nil))))
506     (if (car ,s)
507     ;; Return the trailing surrgate. Must set count to 0 to
508     ;; tell the stream code we didn't consume any octets!
509     (prog1 (the character (car ,s))
510     (setf (car ,s) nil)
511     (setf ,count 0))
512 rtoy 1.2.4.3.2.17 (let ((,code (octets-to-codepoint ,external-format
513 rtoy 1.2.4.3.2.15 (cdr ,s) ,count ,input ,unput)))
514 rtoy 1.2.4.3.2.17 (declare (type (unsigned-byte 31) ,code))
515 rtoy 1.2.4.3.2.21 (cond ((or (lisp::surrogatep ,code)
516 rtoy 1.2.4.3.2.17 (> ,code #x10FFFF))
517 rtoy 1.2.4.3.2.15 #-(and unicode (not unicode-bootstrap)) #\?
518     #+(and unicode (not unicode-bootstrap)) #\U+FFFD)
519     #+unicode
520 rtoy 1.2.4.3.2.17 ((> ,code #xFFFF)
521 rtoy 1.2.4.3.2.15 (multiple-value-bind (hi lo)
522 rtoy 1.2.4.3.2.17 (lisp::surrogates ,code)
523 rtoy 1.2.4.3.2.15 (setf (car ,state) lo)
524     hi))
525 rtoy 1.2.4.3.2.17 (t (code-char ,code))))))))
526 rtoy 1.2.4.3.2.15
527     ;; This doesn't handle surrogate code units correctly. It just
528     ;; outputs the surrogate value to the external format. External
529     ;; formats almost never allow surrogate code points (except UTF-16).
530 rtoy 1.1 (defmacro char-to-octets (external-format char state output)
531     `(codepoint-to-octets ,external-format (char-code ,char) ,state ,output))
532    
533 rtoy 1.2.4.3.2.1
534 rtoy 1.2.4.1 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
535 rtoy 1.2.4.3.2.16 `(lambda (string start end buffer &aux (ptr 0) (state nil) (code 0) (c 0) widep)
536 rtoy 1.2.4.1 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
537     (type simple-string string)
538 rtoy 1.2.4.3.2.1 (type kernel:index start end ptr)
539 rtoy 1.2.4.1 (type (simple-array (unsigned-byte 8) (*)) buffer)
540 rtoy 1.2.4.3.2.23 (type lisp:codepoint code c)
541 rtoy 1.2.4.3.2.16 (type (or null fixnum) widep)
542 rtoy 1.2.4.1 (ignorable state))
543 rtoy 1.2.4.3.2.16 (loop with i of-type kernel:index = start
544 rtoy 1.2.4.3.2.14 while (< i end)
545     do
546 rtoy 1.2.4.3.2.16 (multiple-value-bind (c widep)
547 rtoy 1.2.4.3.2.14 (lisp::codepoint string i end)
548 rtoy 1.2.4.3.2.16 (incf i (if widep 2 1))
549     (codepoint-to-octets ,extfmt c state
550 rtoy 1.2.4.3.2.14 (lambda (b)
551     (when (= ptr (length buffer))
552     (setq buffer (adjust-array buffer (* 2 ptr))))
553     (setf (aref buffer (1- (incf ptr))) b)))))))
554 rtoy 1.2.4.1
555 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
556     (buffer nil bufferp))
557 rtoy 1.1 (declare (type string string)
558 rtoy 1.2.4.3.2.1 (type kernel:index start)
559     (type (or kernel:index null) end)
560 rtoy 1.2.4.1 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
561 rtoy 1.2.4.3.2.1 (let* ((buffer (or buffer (make-array (length string)
562 rtoy 1.2.4.1 :element-type '(unsigned-byte 8)))))
563 rtoy 1.2.4.3.2.1 (multiple-value-bind (buffer ptr)
564     (lisp::with-array-data ((string string) (start start) (end end))
565 rtoy 1.2.4.3.2.8 (funcall (ef-string-to-octets external-format)
566 rtoy 1.2.4.3.2.1 string start end buffer))
567     (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
568 rtoy 1.2.4.1
569     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
570 rtoy 1.2.4.3.2.11 `(lambda (octets ptr end string &aux (pos -1) (count 0) (state nil) (code 0))
571 rtoy 1.2.4.1 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
572     (type (simple-array (unsigned-byte 8) (*)) octets)
573 rtoy 1.2.4.3.2.1 (type kernel:index end count)
574 rtoy 1.2.4.1 (type (integer -1 (#.array-dimension-limit)) ptr pos)
575     (type simple-string string)
576 rtoy 1.2.4.3.2.23 (type lisp:codepoint code)
577 rtoy 1.2.4.1 (ignorable state))
578     (loop until (>= ptr end)
579     do (when (= pos (length string))
580     (setq string (adjust-array string (* 2 pos))))
581 rtoy 1.2.4.3.2.11 (setf code
582     (octets-to-codepoint ,extfmt state count
583 rtoy 1.2.4.3.2.8 (aref octets (incf ptr)) ;;@@ EOF??
584     (lambda (n) (decf ptr n))))
585 rtoy 1.2.4.3.2.11 ;; Convert codepoint to UTF-16 surrogate pairs if needed
586 rtoy 1.2.4.3.2.14 (multiple-value-bind (high low)
587     (surrogates code)
588     (setf (aref string (incf pos)) high)
589     (when low
590     (setf (aref string (incf pos)) low)))
591 rtoy 1.2.4.3 finally (return (values string (1+ pos))))))
592 rtoy 1.1
593 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
594     (string nil stringp))
595 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
596 rtoy 1.2.4.3.2.1 (type kernel:index start)
597     (type (or kernel:index null) end)
598 rtoy 1.2.4.1 (type (or simple-string null) string))
599     (multiple-value-bind (string pos)
600 rtoy 1.2.4.3.2.8 (funcall (ef-octets-to-string external-format)
601 rtoy 1.2.4.1 octets (1- start) (1- (or end (length octets)))
602     (or string (make-string (length octets))))
603 rtoy 1.2 (values (if stringp string (lisp::shrink-vector string pos)) pos)))
604 rtoy 1.1
605    
606    
607 rtoy 1.2.4.1 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
608     `(lambda (string start end result &aux (ptr 0) (state nil))
609     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
610     (type simple-string string)
611 rtoy 1.2.4.3.2.1 (type kernel:index start end ptr)
612 rtoy 1.2.4.1 (type simple-base-string result)
613     (ignorable state))
614     (dotimes (i (- end start) (values result ptr))
615 rtoy 1.2.4.3.2.1 (declare (type kernel:index i))
616 rtoy 1.2.4.3.2.8 (char-to-octets ,extfmt (schar string (+ start i)) state
617     (lambda (b)
618     (when (= ptr (length result))
619     (setq result (adjust-array result (* 2 ptr))))
620     (setf (aref result (1- (incf ptr)))
621     (code-char b)))))))
622 rtoy 1.2.4.1
623     (defun string-encode (string external-format &optional (start 0) end)
624 rtoy 1.2.4.3.2.20 "Encode the given String using External-Format and return a new
625     string. The characters of the new string are the octets of the
626     encoded result, with each octet converted to a character via
627     code-char. This is the inverse to String-Decode"
628 rtoy 1.2.4.3.2.8 (when (zerop (length string))
629     (return-from string-encode string))
630 rtoy 1.2.4.1 (multiple-value-bind (result ptr)
631     (lisp::with-array-data ((string string) (start start) (end end))
632 rtoy 1.2.4.3.2.8 (funcall (ef-encode external-format) string start end
633 rtoy 1.2.4.1 (make-string (length string) :element-type 'base-char)))
634     (lisp::shrink-vector result ptr)))
635    
636     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
637     `(lambda (string ptr end result &aux (pos -1) (count 0) (state nil))
638     (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
639     (type simple-string string)
640 rtoy 1.2.4.3.2.1 (type kernel:index end count)
641 rtoy 1.2.4.1 (type (integer -1 (#.array-dimension-limit)) ptr pos)
642     (type simple-string result)
643     (ignorable state))
644     (loop until (>= ptr end)
645     ;; increasing size of result shouldn't ever be necessary, unless
646     ;; someone implements an encoding smaller than the source string...
647     do (setf (schar result (incf pos))
648 rtoy 1.2.4.3.2.8 (octets-to-char ,extfmt state count
649     ;; note the need to return NIL for EOF
650     (if (= (1+ ptr) (length string))
651     nil
652     (char-code (char string (incf ptr))))
653     (lambda (n) (decf ptr n))))
654 rtoy 1.2.4.3 finally (return (values result (1+ pos))))))
655 rtoy 1.2.4.1
656     (defun string-decode (string external-format &optional (start 0) end)
657 rtoy 1.2.4.3.2.20 "Decode String using the given External-Format and return the new
658     string. The input string is treated as if it were an array of
659     octets, where the char-code of each character is the octet. This is
660     the inverse of String-Encode."
661 rtoy 1.2.4.3.2.8 (when (zerop (length string))
662     (return-from string-decode string))
663 rtoy 1.2.4.1 (multiple-value-bind (result pos)
664     (lisp::with-array-data ((string string) (start start) (end end))
665 rtoy 1.2.4.3.2.8 (funcall (ef-decode external-format)
666 rtoy 1.2.4.1 string (1- start) (1- end) (make-string (length string))))
667 rtoy 1.2.4.3 (lisp::shrink-vector result pos)))

  ViewVC Help
Powered by ViewVC 1.1.5