/[cmucl]/src/code/char.lisp
ViewVC logotype

Contents of /src/code/char.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (show annotations)
Tue Apr 20 17:57:43 2010 UTC (4 years ago) by rtoy
Branch: MAIN
CVS Tags: sparc-tramp-assem-base, release-20b-pre1, release-20b-pre2, sparc-tramp-assem-2010-07-19, GIT-CONVERSION, cross-sol-x86-merged, RELEASE_20b, cross-sol-x86-base, snapshot-2010-12, snapshot-2010-11, snapshot-2011-09, snapshot-2011-06, snapshot-2011-07, snapshot-2011-04, snapshot-2011-02, snapshot-2011-03, snapshot-2011-01, snapshot-2010-05, snapshot-2010-07, snapshot-2010-06, snapshot-2010-08, cross-sol-x86-2010-12-20, cross-sparc-branch-base, HEAD
Branch point for: cross-sparc-branch, RELEASE-20B-BRANCH, sparc-tramp-assem-branch, cross-sol-x86-branch
Changes since 1.20: +4 -4 lines
Change uses of _"foo" to (intl:gettext "foo").  This is because slime
may get confused with source locations if the reader macros are
installed.
1 ;;; -*- Log: code.log; Package: Lisp -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written as part of the CMU Common Lisp project at
5 ;;; Carnegie Mellon University, and has been placed in the public domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/char.lisp,v 1.21 2010/04/20 17:57:43 rtoy Rel $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Character functions for Spice Lisp. Part of the standard Spice Lisp
13 ;;; environment.
14 ;;;
15 ;;; This file assumes the use of ASCII codes and the specific character formats
16 ;;; used in Spice Lisp and Vax Common Lisp. It is optimized for performance
17 ;;; rather than for portability and elegance, and may have to be rewritten if
18 ;;; the character representation is changed.
19 ;;;
20 ;;; Written by Guy Steele.
21 ;;; Rewritten by David Dill.
22 ;;; Hacked up for speed by Scott Fahlman.
23 ;;; Font support flushed and type hackery rewritten by Rob MacLachlan.
24 ;;;
25 (in-package "LISP")
26
27 (intl:textdomain "cmucl")
28
29 (export '(char-code-limit standard-char-p graphic-char-p
30 alpha-char-p upper-case-p lower-case-p both-case-p digit-char-p
31 alphanumericp char= char/= char< char> char<= char>= char-equal
32 char-not-equal char-lessp char-greaterp char-not-greaterp
33 char-not-lessp character char-code code-char char-upcase
34 char-titlecase title-case-p
35 char-downcase digit-char char-int char-name name-char
36 codepoint-limit codepoint))
37
38
39 ;;; Compile some trivial character operations via inline expansion:
40 ;;;
41 (declaim (inline standard-char-p graphic-char-p alpha-char-p
42 upper-case-p lower-case-p both-case-p alphanumericp
43 char-int))
44
45 (declaim (maybe-inline digit-char-p digit-weight))
46
47 (defconstant char-code-limit
48 #-unicode 256
49 #+unicode 65536
50 "The upper exclusive bound on values produced by CHAR-CODE.")
51
52 (deftype char-code ()
53 `(integer 0 (,char-code-limit)))
54
55 (defconstant codepoint-limit
56 #x110000
57 "The upper exclusive bound on the value of a Unicode codepoint")
58
59 ;;; The range of a Unicode code point
60 (deftype codepoint ()
61 `(integer 0 (,codepoint-limit)))
62
63
64 (macrolet ((frob (char-names-list)
65 (collect ((results))
66 (dolist (code char-names-list)
67 (destructuring-bind (ccode names)
68 code
69 (dolist (name names)
70 (results (cons name (code-char ccode))))))
71 `(defparameter char-name-alist ',(results)
72 "This is the alist of (character-name . character) for characters with
73 long names. The first name in this list for a given character is used
74 on typeout and is the preferred form for input."))))
75 ;; Note: the char-name listed here should be what string-capitalize
76 ;; would produce. This is needed to match what format ~:C would
77 ;; produce.
78 (frob ((#x00 ("Null" "^@" "NUL"))
79 (#x01 ("^A" "SOH"))
80 (#x02 ("^B" "STX"))
81 (#x03 ("^C" "ETX"))
82 (#x04 ("^D" "EOT"))
83 (#x05 ("^E" "ENQ"))
84 (#x06 ("^F" "ACK"))
85 (#x07 ("Bell" "^g" "BEL"))
86 (#x08 ("Backspace" "^h" "BS"))
87 (#x09 ("Tab" "^i" "HT"))
88 (#x0A ("Newline" "Linefeed" "^j" "LF" "NL" ))
89 (#x0B ("Vt" "^k"))
90 (#x0C ("Page" "^l" "Form" "Formfeed" "FF" "NP"))
91 (#x0D ("Return" "^m" "RET" "CR"))
92 (#x0E ("^N" "SO"))
93 (#x0F ("^O" "SI"))
94 (#x10 ("^P" "DLE"))
95 (#x11 ("^Q" "DC1"))
96 (#x12 ("^R" "DC2"))
97 (#x13 ("^S" "DC3"))
98 (#x14 ("^T" "DC4"))
99 (#x15 ("^U" "NAK"))
100 (#x16 ("^V" "SYN"))
101 (#x17 ("^W" "ETB"))
102 (#x18 ("^X" "CAN"))
103 (#x19 ("^Y" "EM" "EOM"))
104 (#x1A ("^Z" "SUB"))
105 (#x1B ("Escape" "^[" "Altmode" "ESC" "Alt"))
106 (#x1C ("Is4" "FS" "^\\"))
107 (#x1D ("Is3" "GS" "^]"))
108 (#x1E ("Is2" "RS" "^^"))
109 (#x1F ("Is1" "US" "^_"))
110 (#x20 ("Space" "SP" "SPC"))
111 (#x7f ("Rubout" "Delete" "DEL")))))
112
113
114 ;;;; Accessor functions:
115
116 (defun char-code (char)
117 "Returns the integer code of CHAR."
118 (etypecase char
119 (base-char (char-code (truly-the base-char char)))))
120
121
122 (defun char-int (char)
123 "Returns the integer code of CHAR. This is the same as char-code, as
124 CMU Common Lisp does not implement character bits or fonts."
125 (char-code char))
126
127
128 (defun code-char (code)
129 "Returns the character with the code CODE."
130 (declare (type char-code code))
131 (code-char code))
132
133
134 (defun character (object)
135 "Coerces its argument into a character object if possible. Accepts
136 characters, strings and symbols of length 1."
137 (flet ((do-error (control args)
138 (error 'simple-type-error
139 :datum object
140 ;;?? how to express "symbol with name of length 1"?
141 :expected-type '(or character (string 1))
142 :format-control control
143 :format-arguments args)))
144 (typecase object
145 (character object)
146 (string (if (= 1 (length (the string object)))
147 (char object 0)
148 (do-error
149 (intl:gettext "String is not of length one: ~S") (list object))))
150 (symbol (if (= 1 (length (symbol-name object)))
151 (schar (symbol-name object) 0)
152 (do-error
153 (intl:gettext "Symbol name is not of length one: ~S") (list object))))
154 (t (do-error (intl:gettext "~S cannot be coerced to a character.") (list object))))))
155
156
157 (defun char-name (char)
158 "Given a character object, char-name returns the name for that
159 object (a symbol)."
160 (let ((name (car (rassoc char char-name-alist))))
161 (if name
162 name
163 #-unicode nil
164 #+unicode
165 ;; Return the Unicode name of the character,
166 ;; or U+xxxx if it doesn't have a name
167 (let* ((code (char-code char))
168 (name (unicode-name code)))
169 (if name
170 (nstring-capitalize (nsubstitute #\_ #\Space name))
171 (format nil "U+~4,'0X" code))))))
172
173 (defun name-char (name)
174 "Given an argument acceptable to string, name-char returns a character
175 object whose name is that symbol, if one exists, otherwise NIL."
176 (if (and (stringp name) (> (length name) 2) (string-equal name "U+" :end1 2))
177 (code-char (parse-integer name :radix 16 :start 1))
178 (or (cdr (assoc (string name) char-name-alist :test #'string-equal))
179 #-unicode nil
180 #+unicode
181 (let ((code (unicode-name-to-codepoint
182 (nsubstitute #\Space #\_ (string-upcase name)))))
183 (if code
184 (code-char code)
185 nil)))))
186
187
188 ;;;; Predicates:
189
190 (defun standard-char-p (char)
191 "The argument must be a character object. Standard-char-p returns T if the
192 argument is a standard character -- one of the 95 ASCII printing characters
193 or <return>."
194 (declare (character char))
195 (and (typep char 'base-char)
196 (let ((n (char-code (the base-char char))))
197 (or (< 31 n 127)
198 (= n 10)))))
199
200 (defun %standard-char-p (thing)
201 "Return T if and only if THING is a standard-char. Differs from
202 standard-char-p in that THING doesn't have to be a character."
203 (and (characterp thing) (standard-char-p thing)))
204
205 (defun graphic-char-p (char)
206 "The argument must be a character object. Graphic-char-p returns T if the
207 argument is a printing character, otherwise returns NIL."
208 (declare (character char))
209 (and (typep char 'base-char)
210 (let ((m (char-code (the base-char char))))
211 (or (< 31 m 127)
212 #+(and unicode (not unicode-bootstrap))
213 (and (> m 127)
214 (>= (unicode-category m) +unicode-category-graphic+))))))
215
216
217 (defun alpha-char-p (char)
218 "The argument must be a character object. Alpha-char-p returns T if the
219 argument is an alphabetic character; otherwise NIL."
220 (declare (character char))
221 (let ((m (char-code char)))
222 (or (< 64 m 91) (< 96 m 123)
223 #+(and unicode (not unicode-bootstrap))
224 (and (> m 127)
225 (<= +unicode-category-letter+ (unicode-category m)
226 (+ +unicode-category-letter+ #x0F))))))
227
228
229 (defun upper-case-p (char)
230 "The argument must be a character object; upper-case-p returns T if the
231 argument is an upper-case character, NIL otherwise."
232 (declare (character char))
233 (let ((m (char-code char)))
234 (or (< 64 m 91)
235 #+(and unicode (not unicode-bootstrap))
236 (and (> m 127)
237 (= (unicode-category m) +unicode-category-upper+)))))
238
239
240 (defun lower-case-p (char)
241 "The argument must be a character object; lower-case-p returns T if the
242 argument is a lower-case character, NIL otherwise."
243 (declare (character char))
244 (let ((m (char-code char)))
245 (or (< 96 m 123)
246 #+(and unicode (not unicode-bootstrap))
247 (and (> m 127)
248 (= (unicode-category m) +unicode-category-lower+)))))
249
250 (defun title-case-p (char)
251 "The argument must be a character object; title-case-p returns T if the
252 argument is a title-case character, NIL otherwise."
253 (declare (character char))
254 (let ((m (char-code char)))
255 (or (< 64 m 91)
256 #+(and unicode (not unicode-bootstrap))
257 (and (> m 127)
258 (= (unicode-category m) +unicode-category-title+)))))
259
260
261 (defun both-case-p (char)
262 "The argument must be a character object. Both-case-p returns T if the
263 argument is an alphabetic character and if the character exists in
264 both upper and lower case. For ASCII, this is the same as Alpha-char-p."
265 (declare (character char))
266 (let ((m (char-code char)))
267 (or (< 64 m 91) (< 96 m 123)
268 #+(and unicode (not unicode-bootstrap))
269 (and (> m 127)
270 (<= +unicode-category-upper+
271 (unicode-category m)
272 +unicode-category-title+)))))
273
274
275 (defun digit-char-p (char &optional (radix 10.))
276 "If char is a digit in the specified radix, returns the fixnum for
277 which that digit stands, else returns NIL. Radix defaults to 10
278 (decimal)."
279 (declare (character char) (type (integer 2 36) radix))
280 (let ((m (- (char-code char) 48)))
281 (declare (fixnum m))
282 (cond ((<= radix 10.)
283 ;; Special-case decimal and smaller radices.
284 (if (and (>= m 0) (< m radix)) m nil))
285 ;; Digits 0 - 9 are used as is, since radix is larger.
286 ((and (>= m 0) (< m 10)) m)
287 ;; Check for upper case A - Z.
288 ((and (>= (setq m (- m 7)) 10) (< m radix)) m)
289 ;; Also check lower case a - z.
290 ((and (>= (setq m (- m 32)) 10) (< m radix)) m)
291 ;; Else, fail.
292 (t nil))))
293
294
295 (defun alphanumericp (char)
296 "Given a character-object argument, alphanumericp returns T if the
297 argument is either numeric or alphabetic."
298 (declare (character char))
299 (let ((m (char-code char)))
300 ;; Shortcut for ASCII digits and upper and lower case ASCII letters
301 (or (< 47 m 58) (< 64 m 91) (< 96 m 123)
302 #+(and unicode (not unicode-bootstrap))
303 (and (> m 127)
304 (<= +unicode-category-letter+ (unicode-category m)
305 (+ +unicode-category-letter+ #x0F))))))
306
307
308 (defun char= (character &rest more-characters)
309 "Returns T if all of its arguments are the same character."
310 (do ((clist more-characters (cdr clist)))
311 ((atom clist) T)
312 (unless (eq (car clist) character) (return nil))))
313
314
315 (defun char/= (character &rest more-characters)
316 "Returns T if no two of its arguments are the same character."
317 (do* ((head character (car list))
318 (list more-characters (cdr list)))
319 ((atom list) T)
320 (unless (do* ((l list (cdr l))) ;inner loop returns T
321 ((atom l) T) ; iff head /= rest.
322 (if (eq head (car l)) (return nil)))
323 (return nil))))
324
325
326 (defun char< (character &rest more-characters)
327 "Returns T if its arguments are in strictly increasing alphabetic order."
328 (do* ((c character (car list))
329 (list more-characters (cdr list)))
330 ((atom list) T)
331 (unless (< (char-int c)
332 (char-int (car list)))
333 (return nil))))
334
335
336 (defun char> (character &rest more-characters)
337 "Returns T if its arguments are in strictly decreasing alphabetic order."
338 (do* ((c character (car list))
339 (list more-characters (cdr list)))
340 ((atom list) T)
341 (unless (> (char-int c)
342 (char-int (car list)))
343 (return nil))))
344
345
346 (defun char<= (character &rest more-characters)
347 "Returns T if its arguments are in strictly non-decreasing alphabetic order."
348 (do* ((c character (car list))
349 (list more-characters (cdr list)))
350 ((atom list) T)
351 (unless (<= (char-int c)
352 (char-int (car list)))
353 (return nil))))
354
355
356 (defun char>= (character &rest more-characters)
357 "Returns T if its arguments are in strictly non-increasing alphabetic order."
358 (do* ((c character (car list))
359 (list more-characters (cdr list)))
360 ((atom list) T)
361 (unless (>= (char-int c)
362 (char-int (car list)))
363 (return nil))))
364
365
366
367 ;;; Equal-Char-Code is used by the following functions as a version of char-int
368 ;;; which loses case info. We convert to lower case
369
370 (defmacro equal-char-code (character)
371 `(let ((ch (char-code ,character)))
372 ;; Handle ASCII separately for bootstrapping and for unidata missing.
373 (if (< 64 ch 91)
374 (+ ch 32)
375 #-(and unicode (not unicode-bootstrap))
376 ch
377 #+(and unicode (not unicode-bootstrap))
378 (if (> ch 127) (unicode-lower ch) ch))))
379
380
381 (defun char-equal (character &rest more-characters)
382 "Returns T if all of its arguments are the same character.
383 Case is ignored."
384 (do ((clist more-characters (cdr clist)))
385 ((atom clist) T)
386 (unless (= (equal-char-code (car clist))
387 (equal-char-code character))
388 (return nil))))
389
390
391 (defun char-not-equal (character &rest more-characters)
392 "Returns T if no two of its arguments are the same character.
393 Case is ignored."
394 (do* ((head character (car list))
395 (list more-characters (cdr list)))
396 ((atom list) T)
397 (unless (do* ((l list (cdr l)))
398 ((atom l) T)
399 (if (= (equal-char-code head)
400 (equal-char-code (car l)))
401 (return nil)))
402 (return nil))))
403
404
405 (defun char-lessp (character &rest more-characters)
406 "Returns T if its arguments are in strictly increasing alphabetic order.
407 Case is ignored."
408 (do* ((c character (car list))
409 (list more-characters (cdr list)))
410 ((atom list) T)
411 (unless (< (equal-char-code c)
412 (equal-char-code (car list)))
413 (return nil))))
414
415
416 (defun char-greaterp (character &rest more-characters)
417 "Returns T if its arguments are in strictly decreasing alphabetic order.
418 Case is ignored."
419 (do* ((c character (car list))
420 (list more-characters (cdr list)))
421 ((atom list) T)
422 (unless (> (equal-char-code c)
423 (equal-char-code (car list)))
424 (return nil))))
425
426
427 (defun char-not-greaterp (character &rest more-characters)
428 "Returns T if its arguments are in strictly non-decreasing alphabetic order.
429 Case is ignored."
430 (do* ((c character (car list))
431 (list more-characters (cdr list)))
432 ((atom list) T)
433 (unless (<= (equal-char-code c)
434 (equal-char-code (car list)))
435 (return nil))))
436
437
438 (defun char-not-lessp (character &rest more-characters)
439 "Returns T if its arguments are in strictly non-increasing alphabetic order.
440 Case is ignored."
441 (do* ((c character (car list))
442 (list more-characters (cdr list)))
443 ((atom list) T)
444 (unless (>= (equal-char-code c)
445 (equal-char-code (car list)))
446 (return nil))))
447
448
449
450
451 ;;;; Miscellaneous functions:
452
453 (defun char-upcase (char)
454 "Returns CHAR converted to upper-case if that is possible."
455 (declare (character char))
456 #-(and unicode (not unicode-bootstrap))
457 (if (lower-case-p char)
458 (code-char (- (char-code char) 32))
459 char)
460 #+(and unicode (not unicode-bootstrap))
461 (let ((m (char-code char)))
462 (cond ((> m 127) (code-char (unicode-upper m)))
463 ((< 96 m 123) (code-char (- m 32)))
464 (t char))))
465
466 (defun char-titlecase (char)
467 "Returns CHAR converted to title-case if that is possible."
468 (declare (character char))
469 #-(and unicode (not unicode-bootstrap))
470 (if (lower-case-p char)
471 (code-char (- (char-code char) 32))
472 char)
473 #+(and unicode (not unicode-bootstrap))
474 (let ((m (char-code char)))
475 (cond ((> m 127) (code-char (unicode-title m)))
476 ((< 96 m 123) (code-char (- m 32)))
477 (t char))))
478
479 (defun char-downcase (char)
480 "Returns CHAR converted to lower-case if that is possible."
481 (declare (character char))
482 #-(and unicode (not unicode-bootstrap))
483 (if (upper-case-p char)
484 (code-char (+ (char-code char) 32))
485 char)
486 #+(and unicode (not unicode-bootstrap))
487 (let ((m (char-code char)))
488 (cond ((> m 127) (code-char (unicode-lower m)))
489 ((< 64 m 91) (code-char (+ m 32)))
490 (t char))))
491
492 (defun digit-char (weight &optional (radix 10))
493 "All arguments must be integers. Returns a character object that
494 represents a digit of the given weight in the specified radix. Returns
495 NIL if no such character exists."
496 (declare (type (integer 2 36) radix) (type unsigned-byte weight))
497 (and (typep weight 'fixnum)
498 (>= weight 0) (< weight radix) (< weight 36)
499 (code-char (if (< weight 10) (+ 48 weight) (+ 55 weight)))))

  ViewVC Help
Powered by ViewVC 1.1.5