/[cmucl]/src/code/extfmts.lisp
ViewVC logotype

Contents of /src/code/extfmts.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (hide annotations)
Mon Jul 5 15:52:47 2010 UTC (3 years, 9 months ago) by rtoy
Branch: MAIN
Changes since 1.31: +3 -3 lines
extfmts.lisp:
o Revert previous incompatible change to STRING-DECODE and
  STRING-ENCODE.  Change the keyword parameters back to optional
  parameters, and make the error parameter the last one.

fd-stream.lisp:
o Update use of STRING-ENCODE.
1 rtoy 1.1 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
2     ;;;
3     ;;; **********************************************************************
4     ;;; This code was written by Paul Foley and has been placed in the public
5     ;;; domain.
6     ;;;
7     (ext:file-comment
8 rtoy 1.32 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/extfmts.lisp,v 1.32 2010/07/05 15:52:47 rtoy Exp $")
9 rtoy 1.1 ;;;
10     ;;; **********************************************************************
11     ;;;
12     ;;; Implementation of external-formats
13    
14     (in-package "STREAM")
15    
16 rtoy 1.23 (intl:textdomain "cmucl")
17    
18 rtoy 1.1 (export '(string-to-octets octets-to-string *default-external-format*
19 rtoy 1.15 string-encode string-decode set-system-external-format
20     +replacement-character-code+))
21 rtoy 1.1
22     (defvar *default-external-format* :iso8859-1)
23    
24 rtoy 1.3 (defvar *external-formats* (make-hash-table :test 'equal))
25 rtoy 1.1 (defvar *external-format-aliases* (make-hash-table))
26    
27 rtoy 1.15 (vm::defenum (:prefix "+EF-" :suffix "+" :start 1)
28     str ; string length
29     cin ; input a character
30     cout ; output a character
31     sin ; input string
32     sout ; output string
33     os ; octets to string
34     so ; string to octets
35     en ; encode
36     de ; decode
37     flush ; flush state
38 rtoy 1.20 copy-state ; copy state
39 rtoy 1.15 max)
40 rtoy 1.6
41     ;; Unicode replacement character U+FFFD
42     (defconstant +replacement-character-code+ #xFFFD)
43 rtoy 1.3
44 rtoy 1.1 (define-condition external-format-not-implemented (error)
45     ()
46     (:report
47     (lambda (condition stream)
48     (declare (ignore condition))
49 rtoy 1.25 (format stream (intl:gettext "Attempting unimplemented external-format I/O.")))))
50 rtoy 1.1
51     (defun %efni (a b c d)
52     (declare (ignore a b c d))
53     (error 'external-format-not-implemented))
54    
55 rtoy 1.5 (defstruct efx
56 rtoy 1.15 ;;
57     ;; Function to read a sequence of octets from a stream and convert
58     ;; them a code point.
59 rtoy 1.5 (octets-to-code #'%efni :type function :read-only t)
60 rtoy 1.15 ;;
61     ;; Function to convert a codepoint to a sequence of octets and write
62     ;; them to an output stream.
63 rtoy 1.5 (code-to-octets #'%efni :type function :read-only t)
64 rtoy 1.15 ;;
65     ;; Function (or NIL) to force any state in the external format to be
66     ;; flushed to the output stream. A NIL value means the external
67     ;; format does not need to do anything special.
68     (flush-state nil :type (or null function) :read-only t)
69 rtoy 1.16 ;;
70     ;; Function to copy the state of the external-format. If NIL, then
71     ;; there is no state to be copied.
72     (copy-state nil :type (or null function) :read-only t)
73 rtoy 1.6 (cache nil :type (or null simple-vector))
74 rtoy 1.15 ;;
75     ;; Minimum number of octets needed to form a codepoint
76 rtoy 1.6 (min 1 :type kernel:index :read-only t)
77 rtoy 1.15 ;;
78 rtoy 1.17 ;; Maximum number of octets needed to form a codepoint.
79 rtoy 1.6 (max 1 :type kernel:index :read-only t))
80 rtoy 1.5
81 rtoy 1.1 (defstruct (external-format
82     (:conc-name ef-)
83     (:print-function %print-external-format)
84 rtoy 1.5 (:constructor make-external-format (name efx composingp
85     &optional slots slotd)))
86 rtoy 1.3 (name (ext:required-argument) :type (or keyword cons) :read-only t)
87 rtoy 1.5 (efx (ext:required-argument) :type efx :read-only t)
88 rtoy 1.3 (composingp (ext:required-argument) :type boolean :read-only t)
89 rtoy 1.1 (slots #() :type simple-vector :read-only t)
90 rtoy 1.5 (slotd nil :type list :read-only t))
91 rtoy 1.1
92     (defun %print-external-format (ef stream depth)
93     (declare (ignore depth))
94     (print-unreadable-object (ef stream :type t :identity t)
95     (princ (ef-name ef) stream)))
96    
97 rtoy 1.6 (defun %intern-ef (ef)
98 rtoy 1.3 (setf (gethash (ef-name ef) *external-formats*) ef))
99    
100 rtoy 1.16 (declaim (inline ef-octets-to-code ef-code-to-octets ef-flush-state ef-copy-state
101 rtoy 1.15 ef-cache ef-min-octets ef-max-octets))
102 rtoy 1.5
103     (defun ef-octets-to-code (ef)
104     (efx-octets-to-code (ef-efx ef)))
105    
106     (defun ef-code-to-octets (ef)
107     (efx-code-to-octets (ef-efx ef)))
108    
109 rtoy 1.15 (defun ef-flush-state (ef)
110     (efx-flush-state (ef-efx ef)))
111    
112 rtoy 1.16 (defun ef-copy-state (ef)
113     (efx-copy-state (ef-efx ef)))
114    
115 rtoy 1.5 (defun ef-cache (ef)
116     (efx-cache (ef-efx ef)))
117    
118 rtoy 1.6 (defun ef-min-octets (ef)
119     (efx-min (ef-efx ef)))
120    
121     (defun ef-max-octets (ef)
122     (efx-max (ef-efx ef)))
123    
124     (eval-when (:compile-toplevel :load-toplevel :execute)
125     (defun %merge-slots (old new)
126     (let* ((pos (length old))
127     (tmp (mapcar (lambda (x)
128     (let* ((name (if (consp x) (first x) x))
129     (init (if (consp x) (second x) nil))
130     (list (if (consp x) (nthcdr 2 x) nil))
131     (prev (assoc name old))
132     (posn (if prev (second prev) (1- (incf pos)))))
133     (list name posn init (getf list :type t))))
134     new)))
135     (delete-duplicates (stable-sort (append old tmp) #'< :key #'second)
136     :key #'second))))
137    
138     ;;; DEFINE-EXTERNAL-FORMAT -- Public
139     ;;;
140 rtoy 1.16 ;;; name (&key min max size) (&rest slots) octets-to-code code-to-octets
141     ;;; flush-state copy-state
142     ;;;
143 rtoy 1.6 ;;; Define a new external format. Min/Max/Size are the minimum and
144     ;;; maximum number of octets that make up a character (:size N is just
145     ;;; shorthand for :min N :max N). Slots is a list of slot descriptions
146     ;;; similar to defstruct.
147     ;;;
148     ;;; name (base) (&rest slots)
149     ;;; Define an external format based on a previously-defined external
150     ;;; format, Base. The slot names used in Slots must match those in Base.
151     ;;;
152 rtoy 1.31 ;;; octets-to-code (state input unput error &rest vars)
153 rtoy 1.6 ;;; Defines a form to be used by the external format to convert
154     ;;; octets to a code point. State is a form that can be used by the
155 rtoy 1.16 ;;; body to access the state of the stream. Input is a form that
156     ;;; can be used to read one octet from the input stream. (It can be
157     ;;; called as many times as needed.) Similarly, Unput is a form to
158 rtoy 1.31 ;;; put back one octet to the input stream. Error is an error
159     ;;; handler. The default is NIL to indicate that the code should do
160     ;;; its default handling. Otherwise, it should be a function or
161     ;;; symbol to indicate how errors are handled. Vars is a list of
162     ;;; vars that need to be defined for any symbols used within the
163     ;;; form.
164     ;;;
165     ;;; The error handler is a function of 3 arguments: a format message
166     ;;; string, the offending octet (or NIL) and the number of octets
167     ;;; read for this encoding. If the function returns, it should
168     ;;; return the codepoint to be used in place of the erroneous
169     ;;; sequence.
170 rtoy 1.6 ;;;
171     ;;; This should return two values: the code and the number of octets
172     ;;; read to form the code.
173     ;;;
174 rtoy 1.31 ;;; code-to-octets (code state output error &rest vars)
175 rtoy 1.6 ;;; Defines a form to be used by the external format to convert a
176     ;;; code point to octets for output. Code is the code point to be
177     ;;; converted. State is a form to access the current value of the
178     ;;; stream's state variable. Output is a form that writes one octet
179 rtoy 1.31 ;;; to the output stream. Error is the error handler. A value of
180     ;;; NIL means the external format should use its default method.
181     ;;; Otherwise, it should be a symbol or function that will e called
182     ;;; to handle the error.
183     ;;;
184     ;;; The error function takes 2 arguments: a format message string
185     ;;; and the offending codepoint. If the function returns, it should
186     ;;; be the desired replacement codepoint.
187 rtoy 1.6 ;;;
188 rtoy 1.30 ;;; flush-state (state output error &rest vars)
189 rtoy 1.15 ;;; Defines a form to be used by the external format to flush out
190     ;;; any state when an output stream is closed. Similar to
191 rtoy 1.31 ;;; CODE-TO-OCTETS, but there is no code. Error is similar to the
192     ;;; error parameter for code-to-octets.
193 rtoy 1.15 ;;;
194 rtoy 1.16 ;;; copy-state (state &rest vars)
195     ;;; Defines a form to copy any state needed by the external format.
196     ;;; This should probably be a deep copy so that if the original
197     ;;; state is modified, the copy is not.
198     ;;;
199 rtoy 1.6 ;;; Note: external-formats work on code-points, not
200     ;;; characters, so that the entire 31 bit ISO-10646 range can be
201     ;;; used internally regardless of the size of a character recognized
202     ;;; by Lisp and external formats can be useful to people who want to
203     ;;; process characters outside the Lisp range (see
204     ;;; CODEPOINT-TO-OCTETS, OCTETS-TO-CODEPOINT)
205     ;;;
206     (defmacro define-external-format (name (&rest args) (&rest slots)
207 rtoy 1.16 &optional octets-to-code code-to-octets
208     flush-state copy-state)
209 rtoy 1.6 (when (and (oddp (length args)) (not (= (length args) 1)))
210 rtoy 1.25 (warn (intl:gettext "Nonsensical argument (~S) to DEFINE-EXTERNAL-FORMAT.") args))
211 rtoy 1.6 (let* ((tmp (gensym))
212     (min (if (evenp (length args))
213     (or (getf args :min) (getf args :size) 1)
214     1))
215     (max (if (evenp (length args))
216     (or (getf args :max) (getf args :size) 6)
217     6))
218     (base (if (= (length args) 1)
219     (find-external-format (first args))
220     nil))
221     (bslotd (if base (ef-slotd base) nil))
222     (slotd (%merge-slots bslotd slots))
223     (slotb (loop for slot in slotd
224     collect `(,(first slot)
225     `(the ,',(fourth slot)
226     ;; IDENTITY is here to protect against SETF
227     (identity (svref %slots% ,',(second slot))))))))
228 rtoy 1.26 `(macrolet ((octets-to-code ((state input unput error &rest vars) body)
229 rtoy 1.28 `(lambda (,state ,input ,unput ,error)
230 rtoy 1.26 (declare (ignorable ,state ,input ,unput ,error)
231 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
232 rtoy 1.6 (let (,@',slotb
233     (,input `(the (or (unsigned-byte 8) null) ,,input))
234 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
235     ,body)))
236 rtoy 1.26 (code-to-octets ((code state output error &rest vars) body)
237 rtoy 1.28 `(lambda (,',tmp ,state ,output ,error)
238 rtoy 1.26 (declare (ignorable ,state ,output ,error)
239 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
240 rtoy 1.6 (let (,@',slotb
241     (,code ',code)
242 rtoy 1.3 ,@(loop for var in vars collect `(,var (gensym))))
243 rtoy 1.16 `(let ((,',code (the lisp:codepoint ,,',tmp)))
244 rtoy 1.3 (declare (ignorable ,',code))
245 rtoy 1.15 ,,body))))
246 rtoy 1.30 (flush-state ((state output error &rest vars) body)
247     `(lambda (,state ,output ,error)
248     (declare (ignorable ,state ,output ,error))
249 rtoy 1.15 (let (,@',slotb
250     ,@(loop for var in vars collect `(,var (gensym))))
251 rtoy 1.16 ,body)))
252     (copy-state ((state &rest vars) body)
253     `(lambda (,state)
254     (declare (ignorable ,state))
255     (let (,@',slotb
256     ,@(loop for var in vars collect `(,var (gensym))))
257 rtoy 1.15 ,body))))
258 rtoy 1.6 (%intern-ef (make-external-format ,name
259     ,(if base
260     `(ef-efx (find-external-format ,(ef-name base)))
261     `(make-efx :octets-to-code ,octets-to-code
262     :code-to-octets ,code-to-octets
263 rtoy 1.16 :flush-state ,flush-state
264     :copy-state ,copy-state
265 rtoy 1.6 :cache (make-array +ef-max+
266     :initial-element nil)
267     :min ,(min min max) :max ,(max min max)))
268     nil
269     (let* ,(loop for x in slotd
270     collect (list (first x) (third x)))
271     (vector ,@(mapcar #'first slotd)))
272     ',slotd)))))
273 rtoy 1.3
274 rtoy 1.6 ;;; DEFINE-COMPOSING-EXTERNAL-FORMAT -- Public
275     ;;;
276     ;;; A composing-external-format differs from an (ordinary) external-format
277     ;;; in that it translates characters (really codepoints, of course) into
278     ;;; other characters, rather than translating between characters and binary
279     ;;; octets. They have to be composed with a non-composing external-format
280     ;;; to be of any use.
281     ;;;
282 rtoy 1.16 ;;;
283     ;;; name (&key min max size) input output
284     ;;; Defines a new composing external format. The parameters Min,
285     ;;; Max, and Size are the same as for defining an external format.
286     ;;; The parameters input and output are forms to handle input and
287     ;;; output.
288     ;;;
289     ;;; input (state input unput &rest vars)
290     ;;; Defines a form to be used by the composing external format when
291     ;;; reading input to transform a codepoint (or sequence of
292     ;;; codepoints) to another. State is a form that can be used by the
293     ;;; body to access the state of the external format. Input is a
294     ;;; form that can be used to read one code point from the input
295     ;;; stream. (Input returns two values, the codepoint and the number
296     ;;; of octets read.) It may be called as many times as needed.
297     ;;; This returns two values: the codepoint of the character (or NIL)
298     ;;; and the number of octets read. Similarly, Unput is a form to
299     ;;; put back one octet to the input stream. Vars is a list of vars
300     ;;; that need to be defined for any symbols used within the form.
301     ;;;
302     ;;; This should return two values: the code and the number of octets
303     ;;; read to form the code.
304     ;;;
305     ;;; output (code state output &rest vars)
306     ;;; Defines a form to be used by the composing external format to
307     ;;; convert a code point to octets for output. Code is the code
308     ;;; point to be converted. State is a form to access the current
309     ;;; value of the stream's state variable. Output is a form that
310     ;;; writes one octet to the output stream.
311    
312 rtoy 1.6 (defmacro define-composing-external-format (name (&key min max size)
313     input output)
314     (let ((tmp (gensym))
315     (min (or min size 1))
316     (max (or max size 1)))
317 rtoy 1.3 `(macrolet ((input ((state input unput &rest vars) body)
318 rtoy 1.6 `(lambda (,state ,input ,unput)
319     (declare (ignorable ,state ,input ,unput)
320 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
321 rtoy 1.16 (let ((,input `(the (values (or lisp:codepoint null)
322 rtoy 1.6 kernel:index)
323 rtoy 1.3 ,,input))
324     ,@(loop for var in vars collect `(,var (gensym))))
325     ,body)))
326     (output ((code state output &rest vars) body)
327 rtoy 1.6 `(lambda (,',tmp ,state ,output)
328     (declare (ignorable ,state ,output)
329 rtoy 1.3 (optimize (ext:inhibit-warnings 3)))
330     (let ((,code ',code)
331     ,@(loop for var in vars collect `(,var (gensym))))
332 rtoy 1.16 `(let ((,',code (the lisp:codepoint ,,',tmp)))
333 rtoy 1.3 (declare (ignorable ,',code))
334     ,,body)))))
335 rtoy 1.6 (%intern-ef (make-external-format ,name
336     (make-efx :octets-to-code ,input
337     :code-to-octets ,output
338     :min ,(min min max) :max ,(max min max))
339     t
340     #() '())))))
341 rtoy 1.1
342     (defun load-external-format-aliases ()
343 rtoy 1.6 (let ((*package* (find-package "KEYWORD"))
344     (unix::*filename-encoding* :iso8859-1))
345 rtoy 1.11 (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
346     :external-format :iso8859-1)
347 rtoy 1.1 (when stm
348 rtoy 1.6 (do ((alias (read stm nil stm) (read stm nil stm))
349     (value (read stm nil stm) (read stm nil stm)))
350     ((or (eq alias stm) (eq value stm))
351     (unless (eq alias stm)
352 rtoy 1.25 (warn (intl:gettext "External-format aliases file ends early."))))
353 rtoy 1.11 (if (and (keywordp alias) (or (keywordp value)
354     (and (consp value)
355     (every #'keywordp value))))
356 rtoy 1.6 (setf (gethash alias *external-format-aliases*) value)
357 rtoy 1.25 (warn (intl:gettext "Bad entry in external-format aliases file: ~S => ~S.")
358 rtoy 1.6 alias value)))))))
359 rtoy 1.1
360 rtoy 1.3 (defun %find-external-format (name)
361 rtoy 1.6 ;; avoid loading files, etc., early in the boot sequence
362     (when (or (eq name :iso8859-1)
363     (and (eq name :default) (eq *default-external-format* :iso8859-1)))
364     (return-from %find-external-format
365     (gethash :iso8859-1 *external-formats*)))
366    
367 rtoy 1.1 (when (zerop (hash-table-count *external-format-aliases*))
368     (setf (gethash :latin1 *external-format-aliases*) :iso8859-1)
369     (setf (gethash :latin-1 *external-format-aliases*) :iso8859-1)
370     (setf (gethash :iso-8859-1 *external-format-aliases*) :iso8859-1)
371     (load-external-format-aliases))
372    
373     (do ((tmp (gethash name *external-format-aliases*)
374     (gethash tmp *external-format-aliases*))
375     (cnt 0 (1+ cnt)))
376     ((or (null tmp) (= cnt 50))
377     (unless (null tmp)
378 rtoy 1.25 (error (intl:gettext "External-format aliasing depth exceeded."))))
379 rtoy 1.1 (setq name tmp))
380    
381     (or (gethash name *external-formats*)
382 rtoy 1.11 (and (consp name) (find-external-format name))
383 rtoy 1.22 (and (with-standard-io-syntax
384     ;; Use standard IO syntax so that changes by the user
385     ;; don't mess up compiling the external format.
386     (let ((*package* (find-package "STREAM"))
387     (lisp::*enable-package-locked-errors* nil)
388     (s (open (format nil "ext-formats:~(~A~).lisp" name)
389     :if-does-not-exist nil :external-format :iso8859-1)))
390     (when s
391     (null (nth-value 1 (ext:compile-from-stream s))))))
392 rtoy 1.3 (gethash name *external-formats*))))
393    
394     (defun %composed-ef-name (a b)
395     (if (consp a) (append a (list b)) (list a b)))
396    
397 rtoy 1.6 (defun %compose-external-formats (a b)
398 rtoy 1.3 (when (ef-composingp a)
399 rtoy 1.25 (error (intl:gettext "~S is a Composing-External-Format.") (ef-name a)))
400 rtoy 1.3 (unless (ef-composingp b)
401 rtoy 1.25 (error (intl:gettext "~S is not a Composing-External-Format.") (ef-name b)))
402 rtoy 1.5 (make-external-format
403     (%composed-ef-name (ef-name a) (ef-name b))
404     (make-efx
405 rtoy 1.29 :octets-to-code (lambda (state input unput error)
406 rtoy 1.11 (let ((nstate (gensym "STATE-")))
407     `(let ((,nstate ,state))
408     (when (null ,nstate)
409     (setq ,nstate (setf ,state (cons nil nil))))
410     ,(funcall (ef-octets-to-code b) `(car ,nstate)
411     (funcall (ef-octets-to-code a)
412 rtoy 1.29 `(cdr ,nstate) input unput error)
413 rtoy 1.11 unput))))
414 rtoy 1.29 :code-to-octets (lambda (code state output error)
415 rtoy 1.11 (let ((nstate (gensym "STATE-")))
416     `(let ((,nstate ,state))
417     (when (null ,nstate)
418     (setq ,nstate (setf ,state (cons nil nil))))
419     ,(funcall (ef-code-to-octets b) code `(car ,nstate)
420     `(lambda (x)
421     ,(funcall (ef-code-to-octets a)
422 rtoy 1.29 'x `(cdr ,nstate) output error))))))
423 rtoy 1.6 :cache (make-array +ef-max+ :initial-element nil)
424     :min (* (ef-min-octets a) (ef-min-octets b))
425     :max (* (ef-max-octets a) (ef-max-octets b)))
426 rtoy 1.5 nil #() '()))
427 rtoy 1.3
428     (defun find-external-format (name &optional (error-p t))
429     (when (external-format-p name)
430     (return-from find-external-format name))
431    
432     (or (if (consp name) (every #'keywordp name) (keywordp name))
433 rtoy 1.25 (error (intl:gettext "~S is not a valid external format name.") name))
434 rtoy 1.1
435 rtoy 1.3 (when (eq name :default)
436     (setq name *default-external-format*))
437    
438     (when (and (consp name) (not (cdr name)))
439     (setq name (car name)))
440    
441 rtoy 1.6 (flet ((not-found ()
442     (when (equal *default-external-format* name)
443     (setq *default-external-format* :iso8859-1))
444 rtoy 1.25 (if error-p (error (intl:gettext "External format ~S not found.") name) nil)))
445 rtoy 1.6 (if (consp name)
446     (let ((efs (mapcar #'%find-external-format name)))
447     (if (member nil efs)
448     (not-found)
449     (let ((name (reduce #'%composed-ef-name (mapcar #'ef-name efs))))
450     (or (gethash name *external-formats*)
451     (%intern-ef (reduce #'%compose-external-formats efs))))))
452     (or (%find-external-format name) (not-found)))))
453    
454     (defun flush-external-formats ()
455     (maphash (lambda (name ef)
456     (declare (ignore name))
457     (fill (ef-cache ef) nil))
458     *external-formats*))
459    
460     (defvar *.table-inverse.* (make-hash-table :test 'eq :size 7))
461    
462     (defun invert-table (table)
463     (declare (type (or (simple-array (unsigned-byte 31) *)
464     (simple-array (unsigned-byte 16) *))
465     table)
466     (optimize (speed 3) (space 0) (safety 0) (debug 0)
467     (ext:inhibit-warnings 3)))
468     (or (gethash table *.table-inverse.*)
469     (let* ((mbits (if (= (array-total-size table) 128) 7 8))
470     (lbits (cond ((> (array-total-size table) 256) 3)
471     ((< (array-total-size table) 100) 6)
472     (t 5)))
473     (hvec (make-array (1+ (ash #x110000 (- 0 mbits lbits)))
474     :element-type '(unsigned-byte 16)
475     :initial-element #xFFFF))
476     (mvec (make-array 0 :element-type '(unsigned-byte 16)))
477     (lvec (make-array 0 :element-type '(unsigned-byte 16)))
478     (width (array-dimension table 0))
479     (power (1- (array-rank table)))
480     (base (if (= width 94) 1 0))
481     hx mx lx)
482     (assert (and (< power 2) (<= width 256)))
483     (dotimes (i (array-total-size table))
484     (declare (type (integer 0 (#.array-dimension-limit)) i))
485     (let ((tmp i) (val (row-major-aref table i)) (z 0))
486     (declare (type (integer 0 (#.array-dimension-limit)) tmp)
487     (type (unsigned-byte 16) z))
488     (unless (= val #xFFFE)
489     (when (plusp power)
490     (multiple-value-bind (x y) (floor tmp width)
491     (setq tmp x)
492     (setq z (logior z (ash (the (integer 0 255) (+ y base))
493     (the (integer 0 24)
494     (* 8 power)))))))
495     (setq hx (ash val (- 0 mbits lbits)))
496     (when (= (aref hvec hx) #xFFFF)
497     (setf (aref hvec hx) (length mvec))
498     (let ((tmp (make-array (+ (length mvec) (ash 1 mbits))
499     :element-type '(unsigned-byte 16)
500     :initial-element #xFFFF)))
501     (replace tmp mvec)
502     (setq mvec tmp)))
503     (setq mx (logand (ash val (- lbits)) (lognot (ash -1 mbits))))
504 rtoy 1.7 (when (= (aref mvec (+ (aref hvec hx) mx)) #xFFFF)
505     (setf (aref mvec (+ (aref hvec hx) mx)) (length lvec))
506 rtoy 1.6 (let ((tmp (make-array (+ (length lvec) (ash 1 lbits))
507     :element-type '(unsigned-byte 16)
508     :initial-element #xFFFF)))
509     (replace tmp lvec)
510     (setq lvec tmp)))
511     (setq lx (logand val (lognot (ash -1 lbits))))
512 rtoy 1.7 (setf (aref lvec (+ (aref mvec (+ (aref hvec hx) mx)) lx))
513 rtoy 1.6 (logior z (+ tmp base))))))
514     (setf (gethash table *.table-inverse.*)
515     (lisp::make-ntrie16 :split (logior (ash (1- mbits) 4) (1- lbits))
516     :hvec hvec :mvec mvec :lvec lvec)))))
517    
518     (declaim (inline get-inverse))
519     (defun get-inverse (ntrie code)
520 rtoy 1.11 (declare (type lisp::ntrie16 ntrie) (type (integer 0 #x10FFFF) code))
521 rtoy 1.6 (let ((n (lisp::qref ntrie code)))
522     (and n (let ((m (aref (lisp::ntrie16-lvec ntrie) n)))
523     (if (= m #xFFFF) nil m)))))
524    
525 rtoy 1.1
526     (define-condition void-external-format (error)
527     ()
528     (:report
529     (lambda (condition stream)
530     (declare (ignore condition))
531 rtoy 1.25 (format stream (intl:gettext "Attempting I/O through void external-format.")))))
532 rtoy 1.1
533 rtoy 1.6 (define-external-format :void (:size 0) ()
534 rtoy 1.26 (octets-to-code (state input unput error)
535 rtoy 1.3 `(error 'void-external-format))
536 rtoy 1.26 (code-to-octets (code state output error)
537 rtoy 1.3 `(error 'void-external-format)))
538 rtoy 1.1
539 rtoy 1.6 (define-external-format :iso8859-1 (:size 1) ()
540 rtoy 1.26 (octets-to-code (state input unput error)
541 rtoy 1.3 `(values ,input 1))
542 rtoy 1.26 (code-to-octets (code state output error)
543 rtoy 1.27 `(,output (if (> ,code 255)
544     (if ,error
545     (funcall ,error "Cannot output codepoint #x~X to ISO8859-1 stream"
546     ,code 1)
547     #x3F)
548     ,code))))
549 rtoy 1.1
550 rtoy 1.6 ;;; OCTETS-TO-CODEPOINT, CODEPOINT-TO-OCTETS -- Semi-Public
551     ;;;
552     ;;; Normally you'd want to use OCTETS-TO-CHAR and CHAR-TO-OCTETS instead of
553     ;;; these, but that limits you to Lisp's idea of a character - either Latin-1
554     ;;; in 8 bit Lisp images, or the Unicode BMP in 16 bit images. If you want
555     ;;; to read or write texts containing characters not supported by your Lisp,
556     ;;; these macros can be used instead.
557 rtoy 1.26 (defmacro octets-to-codepoint (external-format state count input unput &optional error)
558 rtoy 1.6 (let ((tmp1 (gensym)) (tmp2 (gensym))
559     (ef (find-external-format external-format)))
560     `(multiple-value-bind (,tmp1 ,tmp2)
561 rtoy 1.27 ,(funcall (ef-octets-to-code ef) state input unput error)
562 rtoy 1.6 (setf ,count (the kernel:index ,tmp2))
563 rtoy 1.16 (the (or lisp:codepoint null) ,tmp1))))
564 rtoy 1.1
565 rtoy 1.26 (defmacro codepoint-to-octets (external-format code state output &optional error)
566 rtoy 1.6 (let ((ef (find-external-format external-format)))
567 rtoy 1.27 (funcall (ef-code-to-octets ef) code state output error)))
568 rtoy 1.3
569    
570    
571     (defvar *ef-base* +ef-max+)
572     (defvar *ef-extensions* '())
573    
574     (defun ensure-cache (ef id reqd)
575     (let ((base (or (getf *ef-extensions* id)
576     (setf (getf *ef-extensions* id)
577     (prog1 *ef-base* (incf *ef-base* reqd))))))
578     (when (< (length (ef-cache ef)) (+ base reqd))
579 rtoy 1.5 (setf (efx-cache (ef-efx ef))
580 rtoy 1.3 (adjust-array (ef-cache ef) (+ base reqd) :initial-element nil)))
581     base))
582    
583 rtoy 1.6 ;;; DEF-EF-MACRO -- Public
584     ;;;
585     ;;;
586 rtoy 1.3 (defmacro def-ef-macro (name (ef id reqd idx) body)
587 rtoy 1.11 (let* ((tmp1 (gensym))
588     (tmp2 (gensym))
589     (blknm (nth-value 1 (lisp::valid-function-name-p name)))
590     (%name (intern (format nil "%~A" name) #|(symbol-package blknm)|#)))
591 rtoy 1.6 `(progn
592     (defun ,%name (,ef)
593     (let* ((,tmp1 (find-external-format ,ef))
594     (,tmp2 ,(if (eq id 'lisp::lisp)
595     idx
596     `(+ (ensure-cache ,tmp1 ',id ,reqd) ,idx))))
597     (funcall (or (aref (ef-cache ,tmp1) ,tmp2)
598     (setf (aref (ef-cache ,tmp1) ,tmp2)
599 rtoy 1.20 (let ((*compile-print* nil)
600     ;; Set default format when we compile so we
601     ;; can see compiler messages. If we don't,
602     ;; we run into a problem that we might be
603     ;; changing the default format while we're
604     ;; compiling, and we don't know how to output
605     ;; the compiler messages.
606     #|(*default-external-format* :iso8859-1)|#)
607     (compile nil `(lambda (%slots%)
608     (declare (ignorable %slots%))
609     (block ,',blknm
610     ,,body))))))
611 rtoy 1.6 (ef-slots ,tmp1))))
612     (declaim (inline ,name))
613     (defun ,name (,tmp1)
614     (let ((,tmp2 (load-time-value (cons nil nil))))
615     (when (eq ,tmp1 :default)
616     (setq ,tmp1 *default-external-format*))
617     (if (eq ,tmp1 (car ,tmp2))
618     (cdr ,tmp2)
619     (setf (car ,tmp2) ,tmp1
620     (cdr ,tmp2) (,%name ,tmp1))))))))
621 rtoy 1.1
622    
623    
624 rtoy 1.6 ;;; OCTETS-TO-CHAR, CHAR-TO-OCTETS -- Public
625     ;;;
626     ;;; Read and write one character through an external-format
627     ;;;
628 rtoy 1.26 (defmacro octets-to-char (external-format state count input unput &optional error)
629 rtoy 1.11 (let ((nstate (gensym)))
630     `(let ((,nstate ,state))
631     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
632     (if (car ,nstate)
633 rtoy 1.13 ;; Return the trailing surrgate. Must set count to 0 to
634     ;; tell the stream code we didn't consume any octets!
635 rtoy 1.11 (prog1 (the character (car ,nstate))
636     (setf (car ,nstate) nil ,count 0))
637     (let ((code (octets-to-codepoint ,external-format
638 rtoy 1.26 (cdr ,nstate) ,count ,input ,unput ,error)))
639 rtoy 1.16 (declare (type lisp:codepoint code))
640 rtoy 1.11 ;;@@ on non-Unicode builds, limit to 8-bit chars
641     ;;@@ if unicode-bootstrap, can't use #\u+fffd
642 rtoy 1.16 (cond ((or (lisp::surrogatep code) (> code #x10FFFF))
643 rtoy 1.27 (if ,error
644     (funcall ,error "Cannot output codepoint #x~X" code)
645     #-(and unicode (not unicode-bootstrap)) #\?
646     #+(and unicode (not unicode-bootstrap)) #\U+FFFD))
647 rtoy 1.13 #+unicode
648 rtoy 1.11 ((> code #xFFFF)
649     (multiple-value-bind (hi lo) (surrogates code)
650     (setf (car ,nstate) lo)
651 rtoy 1.6 hi))
652 rtoy 1.11 (t (code-char code))))))))
653 rtoy 1.6
654 rtoy 1.26 (defmacro char-to-octets (external-format char state output &optional error)
655 rtoy 1.11 (let ((nchar (gensym))
656     (nstate (gensym))
657     (wryte (gensym))
658     (ch (gensym)))
659     `(let ((,nchar ,char)
660     (,nstate ,state))
661     (when (null ,nstate) (setq ,nstate (setf ,state (cons nil nil))))
662 rtoy 1.14 (if (lisp::surrogatep (char-code ,nchar) :high)
663 rtoy 1.11 (setf (car ,nstate) ,nchar)
664     (flet ((,wryte (,ch)
665     (codepoint-to-octets ,external-format ,ch (cdr ,nstate)
666 rtoy 1.26 ,output ,error)))
667 rtoy 1.11 (declare (dynamic-extent #',wryte))
668     (if (car ,nstate)
669 rtoy 1.14 (prog1
670     ;; Invalid surrogate sequences get replaced with
671     ;; the replacement character.
672     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
673     (surrogates-to-codepoint (car ,nstate) ,nchar)
674 rtoy 1.27 (if ,error
675     (funcall ,error "Cannot convert invalid surrogate #x~X to character"
676     ,nchar)
677     +replacement-character-code+)))
678 rtoy 1.11 (setf (car ,nstate) nil))
679 rtoy 1.14 ;; A lone trailing (low) surrogate gets replaced with
680     ;; the replacement character.
681     (,wryte (if (lisp::surrogatep (char-code ,nchar) :low)
682 rtoy 1.27 (if ,error
683     (funcall ,error "Cannot convert lone trailing surrogate #x~X to character"
684     ,nchar)
685     +replacement-character-code+)
686 rtoy 1.14 (char-code ,nchar)))))))))
687 rtoy 1.6
688 rtoy 1.30 (defmacro flush-state (external-format state output &optional error)
689 rtoy 1.15 (let* ((ef (find-external-format external-format))
690     (f (ef-flush-state ef)))
691     (when f
692 rtoy 1.30 (funcall f state output error))))
693 rtoy 1.15
694 rtoy 1.16 (defmacro copy-state (external-format state)
695     (let* ((ef (find-external-format external-format))
696     (f (ef-copy-state ef)))
697     (when f
698     (funcall f state))))
699    
700 rtoy 1.3 (def-ef-macro ef-string-to-octets (extfmt lisp::lisp +ef-max+ +ef-so+)
701 rtoy 1.28 `(lambda (string start end buffer error &aux (ptr 0) (state nil))
702 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
703     (type simple-string string)
704 rtoy 1.6 (type kernel:index start end ptr)
705 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) buffer)
706     (ignorable state))
707 rtoy 1.11 (dotimes (i (- end start) (values buffer ptr))
708     (declare (type kernel:index i))
709     (char-to-octets ,extfmt (schar string (+ start i)) state
710     (lambda (b)
711     (when (= ptr (length buffer))
712     (setq buffer (adjust-array buffer (* 2 ptr))))
713 rtoy 1.27 (setf (aref buffer (1- (incf ptr))) b))
714     error))))
715 rtoy 1.3
716 rtoy 1.2 (defun string-to-octets (string &key (start 0) end (external-format :default)
717 rtoy 1.27 (buffer nil bufferp)
718     error)
719 rtoy 1.24 "Convert String to octets using the specified External-format. The
720 rtoy 1.16 string is bounded by Start (defaulting to 0) and End (defaulting to
721     the end of the string. If Buffer is given, the octets are stored
722     there. If not, a new buffer is created."
723 rtoy 1.1 (declare (type string string)
724 rtoy 1.6 (type kernel:index start)
725     (type (or kernel:index null) end)
726 rtoy 1.3 (type (or (simple-array (unsigned-byte 8) (*)) null) buffer))
727 rtoy 1.6 (let* ((buffer (or buffer (make-array (length string)
728 rtoy 1.3 :element-type '(unsigned-byte 8)))))
729 rtoy 1.6 (multiple-value-bind (buffer ptr)
730     (lisp::with-array-data ((string string) (start start) (end end))
731     (funcall (ef-string-to-octets external-format)
732 rtoy 1.27 string start end buffer error))
733 rtoy 1.6 (values (if bufferp buffer (lisp::shrink-vector buffer ptr)) ptr))))
734 rtoy 1.3
735     (def-ef-macro ef-octets-to-string (extfmt lisp::lisp +ef-max+ +ef-os+)
736 rtoy 1.28 `(lambda (octets ptr end state string s-start s-end error
737 rtoy 1.27 &aux (pos s-start) (count 0) (last-octet 0))
738 rtoy 1.20 (declare (optimize (speed 3) (safety 0) #|(space 0) (debug 0)|#)
739 rtoy 1.3 (type (simple-array (unsigned-byte 8) (*)) octets)
740 rtoy 1.20 (type kernel:index pos end count last-octet s-start s-end)
741 rtoy 1.18 (type (integer -1 (#.array-dimension-limit)) ptr)
742 rtoy 1.3 (type simple-string string)
743     (ignorable state))
744 rtoy 1.18 (catch 'end-of-octets
745 rtoy 1.20 (loop while (< pos s-end)
746     do (setf (schar string pos)
747     (octets-to-char ,extfmt state count
748     (if (>= ptr end)
749     (throw 'end-of-octets nil)
750     (aref octets (incf ptr)))
751 rtoy 1.27 (lambda (n) (decf ptr n))
752     error))
753 rtoy 1.18 (incf pos)
754     (incf last-octet count)))
755 rtoy 1.20 (values string pos last-octet state)))
756 rtoy 1.1
757 rtoy 1.2 (defun octets-to-string (octets &key (start 0) end (external-format :default)
758 rtoy 1.20 (string nil stringp)
759     (s-start 0) (s-end nil s-end-p)
760 rtoy 1.27 (state nil)
761     error)
762 rtoy 1.24 "Octets-to-string converts an array of octets in Octets to a string
763 rtoy 1.16 according to the specified External-format. The array of octets is
764     bounded by Start (defaulting ot 0) and End (defaulting to the end of
765 rtoy 1.20 the array. If String is not given, a new string is created. If
766     String is given, the converted octets are stored in String, starting
767     at S-Start (defaulting to the 0) and ending at S-End (defaulting to
768     the length of String). If the string is not large enough to hold
769     all of characters, then some octets will not be converted. A State
770     may also be specified; this is used as the state of the external
771     format.
772    
773     Four values are returned: the string, the number of characters read,
774     the number of octets actually consumed and the new state of the
775     external format."
776 rtoy 1.1 (declare (type (simple-array (unsigned-byte 8) (*)) octets)
777 rtoy 1.20 (type kernel:index start s-start)
778 rtoy 1.6 (type (or kernel:index null) end)
779 rtoy 1.3 (type (or simple-string null) string))
780 rtoy 1.20 (let ((s-end (if s-end-p
781     s-end
782     (if stringp
783     (length string)
784     (length octets)))))
785     (multiple-value-bind (string pos last-octet new-state)
786     (funcall (ef-octets-to-string external-format)
787     octets (1- start) (1- (or end (length octets)))
788     state
789     (or string (make-string (length octets)))
790 rtoy 1.27 s-start s-end
791     error)
792 rtoy 1.20 (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
793 rtoy 1.1
794    
795    
796 rtoy 1.3 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
797 rtoy 1.28 `(lambda (string start end result error &aux (ptr 0) (state nil))
798 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
799     (type simple-string string)
800 rtoy 1.6 (type kernel:index start end ptr)
801 rtoy 1.3 (type simple-base-string result)
802     (ignorable state))
803 rtoy 1.11 (dotimes (i (- end start) (values result ptr))
804     (declare (type kernel:index i))
805     (char-to-octets ,extfmt (schar string (+ start i)) state
806     (lambda (b)
807     (when (= ptr (length result))
808     (setq result (adjust-array result (* 2 ptr))))
809     (setf (aref result (1- (incf ptr)))
810 rtoy 1.27 (code-char b)))
811     error))))
812 rtoy 1.3
813 rtoy 1.32 (defun string-encode (string external-format &optional (start 0) end error)
814 rtoy 1.24 "Encode the given String using External-Format and return a new
815 rtoy 1.6 string. The characters of the new string are the octets of the
816     encoded result, with each octet converted to a character via
817     code-char. This is the inverse to String-Decode"
818     (when (zerop (length string))
819     (return-from string-encode string))
820 rtoy 1.3 (multiple-value-bind (result ptr)
821     (lisp::with-array-data ((string string) (start start) (end end))
822 rtoy 1.6 (funcall (ef-encode external-format) string start end
823 rtoy 1.27 (make-string (length string) :element-type 'base-char)
824     error))
825 rtoy 1.3 (lisp::shrink-vector result ptr)))
826    
827     (def-ef-macro ef-decode (extfmt lisp::lisp +ef-max+ +ef-de+)
828 rtoy 1.28 `(lambda (string ptr end result error &aux (pos -1) (count 0) (state nil))
829 rtoy 1.3 (declare #|(optimize (speed 3) (safety 0) (space 0) (debug 0))|#
830     (type simple-string string)
831 rtoy 1.6 (type kernel:index end count)
832 rtoy 1.3 (type (integer -1 (#.array-dimension-limit)) ptr pos)
833     (type simple-string result)
834     (ignorable state))
835     (loop until (>= ptr end)
836     ;; increasing size of result shouldn't ever be necessary, unless
837     ;; someone implements an encoding smaller than the source string...
838 rtoy 1.11 do (setf (schar result (incf pos))
839     (octets-to-char ,extfmt state count
840     ;; note the need to return NIL for EOF
841     (if (= (1+ ptr) (length string))
842     nil
843     (char-code (char string (incf ptr))))
844 rtoy 1.27 (lambda (n) (decf ptr n))
845     error))
846 rtoy 1.5 finally (return (values result (1+ pos))))))
847 rtoy 1.3
848 rtoy 1.32 (defun string-decode (string external-format &optional (start 0) end error)
849 rtoy 1.24 "Decode String using the given External-Format and return the new
850 rtoy 1.6 string. The input string is treated as if it were an array of
851     octets, where the char-code of each character is the octet. This is
852     the inverse of String-Encode."
853     (when (zerop (length string))
854     (return-from string-decode string))
855 rtoy 1.3 (multiple-value-bind (result pos)
856     (lisp::with-array-data ((string string) (start start) (end end))
857 rtoy 1.6 (funcall (ef-decode external-format)
858 rtoy 1.27 string (1- start) (1- end) (make-string (length string))
859     error))
860 rtoy 1.5 (lisp::shrink-vector result pos)))
861 rtoy 1.11
862    
863     (defun set-system-external-format (terminal &optional filenames)
864 rtoy 1.24 "Change the external format of the standard streams to Terminal.
865 rtoy 1.19 The standard streams are sys::*stdin*, sys::*stdout*, and
866     sys::*stderr*, which are normally the input and/or output streams
867     for *standard-input* and *standard-output*. Also sets sys::*tty*
868     (normally *terminal-io* to the given external format. If the
869     optional argument Filenames is gvien, then the filename encoding is
870     set to the specified format."
871 rtoy 1.11 (unless (find-external-format terminal)
872 rtoy 1.25 (error (intl:gettext "Can't find external-format ~S.") terminal))
873 rtoy 1.11 (setf (stream-external-format sys:*stdin*) terminal
874     (stream-external-format sys:*stdout*) terminal
875     (stream-external-format sys:*stderr*) terminal)
876     (when (lisp::fd-stream-p sys:*tty*)
877     (setf (stream-external-format sys:*tty*) terminal))
878     (when filenames
879     (unless (find-external-format filenames)
880 rtoy 1.25 (error (intl:gettext "Can't find external-format ~S.") filenames))
881 rtoy 1.11 (when (and unix::*filename-encoding*
882     (not (eq unix::*filename-encoding* filenames)))
883 rtoy 1.25 (cerror (intl:gettext "Change it anyway.")
884     (intl:gettext "The external-format for encoding filenames is already set."))
885 rtoy 1.11 (setq unix::*filename-encoding* filenames)))
886     t)
887    
888    
889     ;; Despite its name, this doesn't actually compile anything at all. What it
890     ;; does is expand into a lambda expression that can be compiled by the file
891     ;; compiler.
892     (defmacro precompile-ef-slot (ef slot)
893     (let* ((ef (find-external-format ef)))
894     ;; if there's no lambda expression available, flush it and regenerate
895     (unless (and (aref (ef-cache ef) slot)
896     (function-lambda-expression (aref (ef-cache ef) slot)))
897     (setf (aref (ef-cache ef) slot) nil)
898     (ecase slot
899     (#.+ef-cin+ (lisp::%ef-cin ef))
900     (#.+ef-cout+ (lisp::%ef-cout ef))
901     (#.+ef-sout+ (lisp::%ef-sout ef))
902     (#.+ef-os+ (%ef-octets-to-string ef))
903     (#.+ef-so+ (%ef-string-to-octets ef))
904     (#.+ef-en+ (%ef-encode ef))
905     (#.+ef-de+ (%ef-decode ef))))
906     `(setf (aref (ef-cache (find-external-format ,(ef-name ef))) ,slot)
907     ,(subst (ef-name ef) ef
908     (function-lambda-expression (aref (ef-cache ef) slot))))))

  ViewVC Help
Powered by ViewVC 1.1.5