/[cmucl]/src/code/string.lisp
ViewVC logotype

Contents of /src/code/string.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12.30.5 - (show annotations)
Mon Apr 20 19:46:48 2009 UTC (4 years, 11 months ago) by rtoy
Branch: unicode-utf16-extfmt-branch
Changes since 1.12.30.4: +7 -7 lines
NSTRING-UPCASE and NSTRING-DOWNCASE were referencing the unknown
symbols NEWSTRING and NEW-INDEX.  Replace with STRING and INDEX,
respectively.  I think that's what was intended.
1 ;;; -*- Log: code.log; Package: Lisp -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written as part of the CMU Common Lisp project at
5 ;;; Carnegie Mellon University, and has been placed in the public domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/string.lisp,v 1.12.30.5 2009/04/20 19:46:48 rtoy Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Functions to implement strings for CMU Common Lisp
13 ;;; Written by David Dill
14 ;;; Rewritten by Skef Wholey, Bill Chiles and Rob MacLachlan.
15 ;;;
16 ;;; ****************************************************************
17 ;;;
18 (in-package "LISP")
19 (export '(char schar glyph sglyph string
20 string= string-equal string< string> string<= string>= string/=
21 string-lessp string-greaterp string-not-lessp string-not-greaterp
22 string-not-equal
23 string-to-nfd string-to-nfkd string-to-nfc string-to-nfkc
24 make-string
25 string-trim string-left-trim string-right-trim
26 string-upcase
27 string-downcase string-capitalize nstring-upcase nstring-downcase
28 nstring-capitalize))
29
30
31 (defun string (X)
32 "Coerces X into a string. If X is a string, X is returned. If X is a
33 symbol, X's pname is returned. If X is a character then a one element
34 string containing that character is returned. If X cannot be coerced
35 into a string, an error occurs."
36 (cond ((stringp x) x)
37 ((symbolp x) (symbol-name x))
38 ((characterp x)
39 (let ((res (make-string 1)))
40 (setf (schar res 0) x) res))
41 (t
42 (error 'simple-type-error
43 :datum x
44 :expected-type '(or string symbol character)
45 :format-control "~S cannot be coerced to a string."
46 :format-arguments (list x)))))
47
48 ;;; With-One-String is used to set up some string hacking things. The keywords
49 ;;; are parsed, and the string is hacked into a simple-string.
50
51 (eval-when (compile)
52
53 (defmacro with-one-string (string start end cum-offset &rest forms)
54 `(let ((,string (if (stringp ,string) ,string (string ,string))))
55 ;; Optimizer may prove STRING is one.
56 (declare (optimize (ext:inhibit-warnings 3)))
57 (with-array-data ((,string ,string :offset-var ,cum-offset)
58 (,start ,start)
59 (,end (or ,end (length (the vector ,string)))))
60 ,@forms)))
61
62 )
63
64 ;;; With-String is like With-One-String, but doesn't parse keywords.
65
66 (eval-when (compile)
67
68 (defmacro with-string (string &rest forms)
69 `(let ((,string (if (stringp ,string) ,string (string ,string))))
70 (with-array-data ((,string ,string)
71 (start)
72 (end (length (the vector ,string))))
73 ,@forms)))
74
75 )
76
77 ;;; With-Two-Strings is used to set up string comparison operations. The
78 ;;; keywords are parsed, and the strings are hacked into simple-strings.
79
80 (eval-when (compile)
81
82 (defmacro with-two-strings (string1 string2 start1 end1 cum-offset-1
83 start2 end2 &rest forms)
84 `(let ((,string1 (if (stringp ,string1) ,string1 (string ,string1)))
85 (,string2 (if (stringp ,string2) ,string2 (string ,string2))))
86 (with-array-data ((,string1 ,string1 :offset-var ,cum-offset-1)
87 (,start1 ,start1)
88 (,end1 (or ,end1 (length (the vector ,string1)))))
89 (with-array-data ((,string2 ,string2)
90 (,start2 ,start2)
91 (,end2 (or ,end2 (length (the vector ,string2)))))
92 ,@forms))))
93
94 )
95
96
97 (defun char (string index)
98 "Given a string and a non-negative integer index less than the length of
99 the string, returns the character object representing the character at
100 that position in the string."
101 (declare (optimize (safety 1)))
102 (char string index))
103
104 (defun %charset (string index new-el)
105 (declare (optimize (safety 1)))
106 (setf (char string index) new-el))
107
108 (defun schar (string index)
109 "SCHAR returns the character object at an indexed position in a string
110 just as CHAR does, except the string must be a simple-string."
111 (declare (optimize (safety 1)))
112 (schar string index))
113
114 (defun %scharset (string index new-el)
115 (declare (optimize (safety 1)))
116 (setf (schar string index) new-el))
117
118 (defun string=* (string1 string2 start1 end1 start2 end2)
119 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
120 (not (%sp-string-compare string1 start1 end1 string2 start2 end2))))
121
122
123 (defun string/=* (string1 string2 start1 end1 start2 end2)
124 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
125 (let ((comparison (%sp-string-compare string1 start1 end1
126 string2 start2 end2)))
127 (if comparison (- (the fixnum comparison) offset1)))))
128
129 (eval-when (compile eval)
130
131 ;;; Lessp is true if the desired expansion is for string<* or string<=*.
132 ;;; Equalp is true if the desired expansion is for string<=* or string>=*.
133 (defmacro string<>=*-body (lessp equalp)
134 (let ((offset1 (gensym)))
135 `(with-two-strings string1 string2 start1 end1 ,offset1 start2 end2
136 (let ((index (%sp-string-compare string1 start1 end1
137 string2 start2 end2)))
138 (if index
139 (cond ((= (the fixnum index) (the fixnum end1))
140 ,(if lessp
141 `(- (the fixnum index) ,offset1)
142 `nil))
143 ((= (+ (the fixnum index) (- start2 start1))
144 (the fixnum end2))
145 ,(if lessp
146 `nil
147 `(- (the fixnum index) ,offset1)))
148 #-unicode
149 ((,(if lessp 'char< 'char>)
150 (schar string1 index)
151 (schar string2 (+ (the fixnum index) (- start2 start1))))
152 (- (the fixnum index) ,offset1))
153 #-unicode
154 (t nil)
155 #+unicode
156 (t
157 ;; Compare in code point order. See
158 ;; http://icu-project.org/docs/papers/utf16_code_point_order.html
159 (flet ((fixup (code)
160 (if (>= code #xe000)
161 (- code #x800)
162 (+ code #x2000))))
163 (declare (inline fixup))
164 (let* ((c1 (char-code (schar string1 index)))
165 (c2 (char-code (schar string2 (+ (the fixnum index) (- start2 start1))))))
166 (cond ((and (>= c1 #xd800)
167 (>= c2 #xd800))
168 (let ((fix-c1 (fixup c1))
169 (fix-c2 (fixup c2)))
170 (if (,(if lessp '< '>) fix-c1 fix-c2)
171 (- (the fixnum index) ,offset1)
172 nil)))
173 (t
174 (if (,(if lessp '< '>) c1 c2)
175 (- (the fixnum index) ,offset1)
176 nil)))))))
177 ,(if equalp `(- (the fixnum end1) ,offset1) 'nil))))))
178 ) ; eval-when
179
180 (defun string<* (string1 string2 start1 end1 start2 end2)
181 (declare (fixnum start1 start2))
182 (string<>=*-body t nil))
183
184 (defun string>* (string1 string2 start1 end1 start2 end2)
185 (declare (fixnum start1 start2))
186 (string<>=*-body nil nil))
187
188 (defun string<=* (string1 string2 start1 end1 start2 end2)
189 (declare (fixnum start1 start2))
190 (string<>=*-body t t))
191
192 (defun string>=* (string1 string2 start1 end1 start2 end2)
193 (declare (fixnum start1 start2))
194 (string<>=*-body nil t))
195
196
197
198 (defun string< (string1 string2 &key (start1 0) end1 (start2 0) end2)
199 "Given two strings, if the first string is lexicographically less than
200 the second string, returns the longest common prefix (using char=)
201 of the two strings. Otherwise, returns ()."
202 (string<* string1 string2 start1 end1 start2 end2))
203
204 (defun string> (string1 string2 &key (start1 0) end1 (start2 0) end2)
205 "Given two strings, if the first string is lexicographically greater than
206 the second string, returns the longest common prefix (using char=)
207 of the two strings. Otherwise, returns ()."
208 (string>* string1 string2 start1 end1 start2 end2))
209
210
211 (defun string<= (string1 string2 &key (start1 0) end1 (start2 0) end2)
212 "Given two strings, if the first string is lexicographically less than
213 or equal to the second string, returns the longest common prefix
214 (using char=) of the two strings. Otherwise, returns ()."
215 (string<=* string1 string2 start1 end1 start2 end2))
216
217 (defun string>= (string1 string2 &key (start1 0) end1 (start2 0) end2)
218 "Given two strings, if the first string is lexicographically greater
219 than or equal to the second string, returns the longest common prefix
220 (using char=) of the two strings. Otherwise, returns ()."
221 (string>=* string1 string2 start1 end1 start2 end2))
222
223 (defun string= (string1 string2 &key (start1 0) end1 (start2 0) end2)
224 "Given two strings (string1 and string2), and optional integers start1,
225 start2, end1 and end2, compares characters in string1 to characters in
226 string2 (using char=)."
227 (string=* string1 string2 start1 end1 start2 end2))
228
229 (defun string/= (string1 string2 &key (start1 0) end1 (start2 0) end2)
230 "Given two strings, if the first string is not lexicographically equal
231 to the second string, returns the longest common prefix (using char=)
232 of the two strings. Otherwise, returns ()."
233 (string/=* string1 string2 start1 end1 start2 end2))
234
235
236 (eval-when (compile eval)
237
238 ;;; STRING-NOT-EQUAL-LOOP is used to generate character comparison loops for
239 ;;; STRING-EQUAL and STRING-NOT-EQUAL.
240 (defmacro string-not-equal-loop (end end-value
241 &optional (abort-value nil abortp))
242 (declare (fixnum end))
243 (let ((end-test (if (= end 1)
244 `(= index1 (the fixnum end1))
245 `(= index2 (the fixnum end2)))))
246 `(do ((index1 start1 (1+ index1))
247 (index2 start2 (1+ index2)))
248 (,(if abortp
249 end-test
250 `(or ,end-test
251 (not (char-equal (schar string1 index1)
252 (schar string2 index2)))))
253 ,end-value)
254 (declare (fixnum index1 index2))
255 ,@(if abortp
256 `((if (not (char-equal (schar string1 index1)
257 (schar string2 index2)))
258 (return ,abort-value)))))))
259
260 ) ; eval-when
261
262 (defun string-equal (string1 string2 &key (start1 0) end1 (start2 0) end2)
263 "Given two strings (string1 and string2), and optional integers start1,
264 start2, end1 and end2, compares characters in string1 to characters in
265 string2 (using char-equal)."
266 (declare (fixnum start1 start2))
267 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
268 (let ((slen1 (- (the fixnum end1) start1))
269 (slen2 (- (the fixnum end2) start2)))
270 (declare (fixnum slen1 slen2))
271 (if (or (minusp slen1) (minusp slen2))
272 ;;prevent endless looping later.
273 (error "Improper bounds for string comparison."))
274 (if (= slen1 slen2)
275 ;;return () immediately if lengths aren't equal.
276 (string-not-equal-loop 1 t nil)))))
277
278 (defun string-not-equal (string1 string2 &key (start1 0) end1 (start2 0) end2)
279 "Given two strings, if the first string is not lexicographically equal
280 to the second string, returns the longest common prefix (using char-equal)
281 of the two strings. Otherwise, returns ()."
282 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
283 (let ((slen1 (- end1 start1))
284 (slen2 (- end2 start2)))
285 (declare (fixnum slen1 slen2))
286 (if (or (minusp slen1) (minusp slen2))
287 ;;prevent endless looping later.
288 (error "Improper bounds for string comparison."))
289 (cond ((or (minusp slen1) (or (minusp slen2)))
290 (error "Improper substring for comparison."))
291 ((= slen1 slen2)
292 (string-not-equal-loop 1 nil (- index1 offset1)))
293 ((< slen1 slen2)
294 (string-not-equal-loop 1 (- index1 offset1)))
295 (t
296 (string-not-equal-loop 2 (- index1 offset1)))))))
297
298
299
300 (eval-when (compile eval)
301
302 ;;; STRING-LESS-GREATER-EQUAL-TESTS returns a test on the lengths of string1
303 ;;; and string2 and a test on the current characters from string1 and string2
304 ;;; for the following macro.
305 (defun string-less-greater-equal-tests (lessp equalp)
306 (if lessp
307 (if equalp
308 ;; STRING-NOT-GREATERP
309 (values '<= `(not (char-greaterp char1 char2)))
310 ;; STRING-LESSP
311 (values '< `(char-lessp char1 char2)))
312 (if equalp
313 ;; STRING-NOT-LESSP
314 (values '>= `(not (char-lessp char1 char2)))
315 ;; STRING-GREATERP
316 (values '> `(char-greaterp char1 char2)))))
317
318 (defmacro string-less-greater-equal (lessp equalp)
319 (multiple-value-bind (length-test character-test)
320 (string-less-greater-equal-tests lessp equalp)
321 `(with-two-strings string1 string2 start1 end1 offset1 start2 end2
322 (let ((slen1 (- (the fixnum end1) start1))
323 (slen2 (- (the fixnum end2) start2)))
324 (declare (fixnum slen1 slen2))
325 (if (or (minusp slen1) (minusp slen2))
326 ;;prevent endless looping later.
327 (error "Improper bounds for string comparison."))
328 (do ((index1 start1 (1+ index1))
329 (index2 start2 (1+ index2))
330 (char1)
331 (char2))
332 ((or (= index1 (the fixnum end1)) (= index2 (the fixnum end2)))
333 (if (,length-test slen1 slen2) (- index1 offset1)))
334 (declare (fixnum index1 index2))
335 (setq char1 (schar string1 index1))
336 (setq char2 (schar string2 index2))
337 (if (not (char-equal char1 char2))
338 (if ,character-test
339 (return (- index1 offset1))
340 (return ()))))))))
341
342 ) ; eval-when
343
344 (defun string-lessp* (string1 string2 start1 end1 start2 end2)
345 (declare (fixnum start1 start2))
346 (string-less-greater-equal t nil))
347
348 (defun string-greaterp* (string1 string2 start1 end1 start2 end2)
349 (declare (fixnum start1 start2))
350 (string-less-greater-equal nil nil))
351
352 (defun string-not-lessp* (string1 string2 start1 end1 start2 end2)
353 (declare (fixnum start1 start2))
354 (string-less-greater-equal nil t))
355
356 (defun string-not-greaterp* (string1 string2 start1 end1 start2 end2)
357 (declare (fixnum start1 start2))
358 (string-less-greater-equal t t))
359
360 (defun string-lessp (string1 string2 &key (start1 0) end1 (start2 0) end2)
361 "Given two strings, if the first string is lexicographically less than
362 the second string, returns the longest common prefix (using char-equal)
363 of the two strings. Otherwise, returns ()."
364 (string-lessp* string1 string2 start1 end1 start2 end2))
365
366 (defun string-greaterp (string1 string2 &key (start1 0) end1 (start2 0) end2)
367 "Given two strings, if the first string is lexicographically greater than
368 the second string, returns the longest common prefix (using char-equal)
369 of the two strings. Otherwise, returns ()."
370 (string-greaterp* string1 string2 start1 end1 start2 end2))
371
372 (defun string-not-lessp (string1 string2 &key (start1 0) end1 (start2 0) end2)
373 "Given two strings, if the first string is lexicographically greater
374 than or equal to the second string, returns the longest common prefix
375 (using char-equal) of the two strings. Otherwise, returns ()."
376 (string-not-lessp* string1 string2 start1 end1 start2 end2))
377
378 (defun string-not-greaterp (string1 string2 &key (start1 0) end1 (start2 0)
379 end2)
380 "Given two strings, if the first string is lexicographically less than
381 or equal to the second string, returns the longest common prefix
382 (using char-equal) of the two strings. Otherwise, returns ()."
383 (string-not-greaterp* string1 string2 start1 end1 start2 end2))
384
385
386 (defun make-string (count &key element-type ((:initial-element fill-char)))
387 "Given a character count and an optional fill character, makes and returns
388 a new string Count long filled with the fill character."
389 (declare (type fixnum count))
390 (assert (subtypep element-type 'character))
391 (if fill-char
392 (do ((i 0 (1+ i))
393 (string (make-string count)))
394 ((= i count) string)
395 (declare (fixnum i))
396 (setf (schar string i) fill-char))
397 (make-string count)))
398
399 (defun string-upcase (string &key (start 0) end)
400 "Given a string, returns a new string that is a copy of it with
401 all lower case alphabetic characters converted to uppercase."
402 (declare (fixnum start))
403 (let* ((string (if (stringp string) string (string string)))
404 (slen (length string)))
405 (declare (fixnum slen))
406 (with-one-string string start end offset
407 (let ((offset-slen (+ slen offset))
408 (newstring (make-string slen)))
409 (declare (fixnum offset-slen))
410 (do ((index offset (1+ index))
411 (new-index 0 (1+ new-index)))
412 ((= index start))
413 (declare (fixnum index new-index))
414 (setf (schar newstring new-index) (schar string index)))
415 (do ((index start (1+ index))
416 (new-index (- start offset) (1+ new-index)))
417 ((= index (the fixnum end)))
418 (declare (fixnum index new-index))
419 (let ((code (char-code (schar string index))))
420 (when (and (<= #xD800 code #xDBFF) (< (1+ index) (the fixnum end)))
421 (setq code (+ (ash (- code #xD800) 10) #x2400
422 (char-code (schar string (incf index))))))
423 (setq code (unicode-upper code))
424 (if (< code #x10000)
425 (setf (schar newstring new-index) (code-char code))
426 (let* ((tmp (- code #x10000))
427 (hi (logior (ldb (byte 10 10) tmp) #xD800))
428 (lo (logior (ldb (byte 10 0) tmp) #xDC00)))
429 (setf (schar newstring new-index) (code-char hi))
430 ;;@@ WARNING: this may, in theory, need to extend newstring
431 ;; but that never actually occurs as of Unicode 5.1.0,
432 ;; so I'm just going to ignore it for now...
433 (setf (schar newstring (incf new-index)) (code-char lo))))))
434 ;;@@ WARNING: see above
435 (do ((index end (1+ index))
436 (new-index (- (the fixnum end) offset) (1+ new-index)))
437 ((= index offset-slen))
438 (declare (fixnum index new-index))
439 (setf (schar newstring new-index) (schar string index)))
440 newstring))))
441
442 (defun string-downcase (string &key (start 0) end)
443 "Given a string, returns a new string that is a copy of it with
444 all upper case alphabetic characters converted to lowercase."
445 (declare (fixnum start))
446 (let* ((string (if (stringp string) string (string string)))
447 (slen (length string)))
448 (declare (fixnum slen))
449 (with-one-string string start end offset
450 (let ((offset-slen (+ slen offset))
451 (newstring (make-string slen)))
452 (declare (fixnum offset-slen))
453 (do ((index offset (1+ index))
454 (new-index 0 (1+ new-index)))
455 ((= index start))
456 (declare (fixnum index new-index))
457 (setf (schar newstring new-index) (schar string index)))
458 (do ((index start (1+ index))
459 (new-index (- start offset) (1+ new-index)))
460 ((= index (the fixnum end)))
461 (declare (fixnum index new-index))
462 (let ((code (char-code (schar string index))))
463 (when (and (<= #xD800 code #xDBFF) (< (1+ index) (the fixnum end)))
464 (setq code (+ (ash (- code #xD800) 10) #x2400
465 (char-code (schar string (incf index))))))
466 (setq code (unicode-lower code))
467 (if (< code #x10000)
468 (setf (schar newstring new-index) (code-char code))
469 (let* ((tmp (- code #x10000))
470 (hi (logior (ldb (byte 10 10) tmp) #xD800))
471 (lo (logior (ldb (byte 10 0) tmp) #xDC00)))
472 (setf (schar newstring new-index) (code-char hi))
473 ;;@@ WARNING: this may, in theory, need to extend newstring
474 ;; but that never actually occurs as of Unicode 5.1.0,
475 ;; so I'm just going to ignore it for now...
476 (setf (schar newstring (incf new-index)) (code-char lo))))))
477 ;;@@ WARNING: see above
478 (do ((index end (1+ index))
479 (new-index (- (the fixnum end) offset) (1+ new-index)))
480 ((= index offset-slen))
481 (declare (fixnum index new-index))
482 (setf (schar newstring new-index) (schar string index)))
483 newstring))))
484
485 (defun string-capitalize (string &key (start 0) end)
486 "Given a string, returns a copy of the string with the first
487 character of each ``word'' converted to upper-case, and remaining
488 chars in the word converted to lower case. A ``word'' is defined
489 to be a string of case-modifiable characters delimited by
490 non-case-modifiable chars."
491 (declare (fixnum start))
492 (let* ((string (if (stringp string) string (string string)))
493 (slen (length string)))
494 (declare (fixnum slen))
495 (with-one-string string start end offset
496 (let ((offset-slen (+ slen offset))
497 (newstring (make-string slen)))
498 (declare (fixnum offset-slen))
499 (do ((index offset (1+ index))
500 (new-index 0 (1+ new-index)))
501 ((= index start))
502 (declare (fixnum index new-index))
503 (setf (schar newstring new-index) (schar string index)))
504 (do ((index start (1+ index))
505 (new-index (- start offset) (1+ new-index))
506 (newword t)
507 (char ()))
508 ((= index (the fixnum end)))
509 (declare (fixnum index new-index))
510 (setq char (schar string index))
511 (cond ((not (alphanumericp char))
512 (setq newword t))
513 (newword
514 ;;char is first case-modifiable after non-case-modifiable
515 (setq char (char-titlecase char))
516 (setq newword ()))
517 ;;char is case-modifiable, but not first
518 (t (setq char (char-downcase char))))
519 (setf (schar newstring new-index) char))
520 (do ((index end (1+ index))
521 (new-index (- (the fixnum end) offset) (1+ new-index)))
522 ((= index offset-slen))
523 (declare (fixnum index new-index))
524 (setf (schar newstring new-index) (schar string index)))
525 newstring))))
526
527 (defun nstring-upcase (string &key (start 0) end)
528 "Given a string, returns that string with all lower case alphabetic
529 characters converted to uppercase."
530 (declare (fixnum start))
531 (let ((save-header string))
532 (with-one-string string start end offset
533 (do ((index start (1+ index)))
534 ((= index (the fixnum end)))
535 (declare (fixnum index))
536 (let ((code (char-code (schar string index))))
537 (when (and (<= #xD800 code #xDBFF) (< (1+ index) (the fixnum end)))
538 (setq code (+ (ash (- code #xD800) 10) #x2400
539 (char-code (schar string (incf index))))))
540 (setq code (unicode-upper code))
541 (if (< code #x10000)
542 (setf (schar string index) (code-char code))
543 (let* ((tmp (- code #x10000))
544 (hi (logior (ldb (byte 10 10) tmp) #xD800))
545 (lo (logior (ldb (byte 10 0) tmp) #xDC00)))
546 (setf (schar newstring index) (code-char hi))
547 ;;@@ WARNING: this may, in theory, need to extend newstring
548 ;; (which, obviously, we can't do here. Unless
549 ;; STRING is adjustable, maybe)
550 ;; but that never actually occurs as of Unicode 5.1.0,
551 ;; so I'm just going to ignore it for now...
552 (setf (schar string (incf index)) (code-char lo)))))))
553 save-header))
554
555 (defun nstring-downcase (string &key (start 0) end)
556 "Given a string, returns that string with all upper case alphabetic
557 characters converted to lowercase."
558 (declare (fixnum start))
559 (let ((save-header string))
560 (with-one-string string start end offset
561 (do ((index start (1+ index)))
562 ((= index (the fixnum end)))
563 (declare (fixnum index))
564 (let ((code (char-code (schar string index))))
565 (when (and (<= #xD800 code #xDBFF) (< (1+ index) (the fixnum end)))
566 (setq code (+ (ash (- code #xD800) 10) #x2400
567 (char-code (schar string (incf index))))))
568 (setq code (unicode-lower code))
569 (if (< code #x10000)
570 (setf (schar string index) (code-char code))
571 (let* ((tmp (- code #x10000))
572 (hi (logior (ldb (byte 10 10) tmp) #xD800))
573 (lo (logior (ldb (byte 10 0) tmp) #xDC00)))
574 (setf (schar string index) (code-char hi))
575 ;;@@ WARNING: this may, in theory, need to extend newstring
576 ;; (which, obviously, we can't do here. Unless
577 ;; STRING is adjustable, maybe)
578 ;; but that never actually occurs as of Unicode 5.1.0,
579 ;; so I'm just going to ignore it for now...
580 (setf (schar string (incf index)) (code-char lo)))))))
581 save-header))
582
583 (defun nstring-capitalize (string &key (start 0) end)
584 "Given a string, returns that string with the first
585 character of each ``word'' converted to upper-case, and remaining
586 chars in the word converted to lower case. A ``word'' is defined
587 to be a string of case-modifiable characters delimited by
588 non-case-modifiable chars."
589 (declare (fixnum start))
590 (let ((save-header string))
591 (with-one-string string start end offset
592 (do ((index start (1+ index))
593 (newword t)
594 (char ()))
595 ((= index (the fixnum end)))
596 (declare (fixnum index))
597 (setq char (schar string index))
598 (cond ((not (alphanumericp char))
599 (setq newword t))
600 (newword
601 ;;char is first case-modifiable after non-case-modifiable
602 (setf (schar string index) (char-titlecase char))
603 (setq newword ()))
604 (t
605 (setf (schar string index) (char-downcase char))))))
606 save-header))
607
608 (defun string-left-trim (char-bag string)
609 "Given a set of characters (a list or string) and a string, returns
610 a copy of the string with the characters in the set removed from the
611 left end."
612 (with-string string
613 (do ((index start (1+ index)))
614 ((or (= index (the fixnum end))
615 (not (find (schar string index) char-bag)))
616 (subseq (the simple-string string) index end))
617 (declare (fixnum index)))))
618
619 (defun string-right-trim (char-bag string)
620 "Given a set of characters (a list or string) and a string, returns
621 a copy of the string with the characters in the set removed from the
622 right end."
623 (with-string string
624 (do ((index (1- (the fixnum end)) (1- index)))
625 ((or (< index start) (not (find (schar string index) char-bag)))
626 (subseq (the simple-string string) start (1+ index)))
627 (declare (fixnum index)))))
628
629 (defun string-trim (char-bag string)
630 "Given a set of characters (a list or string) and a string, returns a
631 copy of the string with the characters in the set removed from both
632 ends."
633 (with-string string
634 (let* ((left-end (do ((index start (1+ index)))
635 ((or (= index (the fixnum end))
636 (not (find (schar string index) char-bag)))
637 index)
638 (declare (fixnum index))))
639 (right-end (do ((index (1- (the fixnum end)) (1- index)))
640 ((or (< index left-end)
641 (not (find (schar string index) char-bag)))
642 (1+ index))
643 (declare (fixnum index)))))
644 (subseq (the simple-string string) left-end right-end))))
645
646 (declaim (inline %glyph-f %glyph-b))
647 (defun %glyph-f (string index)
648 (declare (optimize (speed 3) (space 0) (safety 0) (debug 0))
649 (type simple-string string) (type kernel:index index))
650 (flet ((xchar (string index)
651 (let ((c (char-code (schar string index))))
652 (declare (type (integer 0 #x10FFFF) c))
653 (cond ((<= #xD800 c #xDBFF)
654 (if (= (1+ index) (length string))
655 (error "String ends with an unpaired surrogate.")
656 (let ((c2 (char-code (schar string (1+ index)))))
657 (if (<= #xDC00 c2 #xDFFF)
658 (+ (ash (- c #xD800) 10) c2 #x2400)
659 (error "Naked high surrogate in string.")))))
660 ((<= #xDC00 c #xDFFF)
661 (error "Naked low surrogate in string."))
662 (t c)))))
663 (let* ((prev 0)
664 (l (length string))
665 (c (xchar string index))
666 (n (+ index (if (> c #xFFFF) 2 1))))
667 (declare (type (integer 0 #x10FFFF) c) (type kernel:index l n))
668 (loop while (< n l) do
669 (let* ((c (xchar string n))
670 (d (the (unsigned-byte 8) (unicode-combining-class c))))
671 (when (or (zerop d) (< d prev))
672 (return))
673 (setq prev d)
674 (incf n (if (> c #xFFFF) 2 1))))
675 n)))
676
677 (defun %glyph-b (string index)
678 (declare (optimize (speed 3) (space 0) (safety 0) (debug 0))
679 (type simple-string string) (type kernel:index index))
680 (flet ((xchar (string index)
681 (let ((c (char-code (schar string index))))
682 (declare (type (integer 0 #x10FFFF) c))
683 (cond ((<= #xDC00 c #xDFFF)
684 (let ((c2 (char-code (schar string (1- index)))))
685 (if (<= #xD800 c2 #xDBFF)
686 (+ (ash (- c2 #xD800) 10) c #x2400)
687 (error "Naked low surrogate in string."))))
688 ((<= #xD800 c #xDBFF)
689 (error "Naked high surrogate in string."))
690 (t c)))))
691 (let ((prev 255)
692 (n (1- index)))
693 (declare (type kernel:index n))
694 (loop while (> n 0) do
695 (let* ((c (xchar string n))
696 (d (the (unsigned-byte 8) (unicode-combining-class c))))
697 (cond ((zerop d) (return))
698 ((> d prev) (incf n (if (> c #xFFFF) 2 1)) (return)))
699 (setq prev d)
700 (decf n (if (> c #xFFFF) 2 1))))
701 n)))
702
703 (defun glyph (string index &key (from-end nil))
704 "GLYPH returns the glyph at the indexed position in a string, and the
705 position of the next glyph (or NIL) as a second value. A glyph is
706 a substring consisting of the character at INDEX followed by all
707 subsequent combining characters."
708 (declare (type simple-string string) (type kernel:index index))
709 #-unicode
710 (char string index)
711 #+unicode
712 (with-array-data ((string string) (start) (end))
713 (declare (ignore start end))
714 (let ((n (if from-end (%glyph-b string index) (%glyph-f string index))))
715 (if from-end
716 (values (subseq string n index) (and (> n 0) n))
717 (values (subseq string index n) (and (< n (length string)) n))))))
718
719 (defun sglyph (string index &key (from-end nil))
720 "SGLYPH returns the glyph at the indexed position, the same as GLYPH,
721 except that the string must be a simple-string"
722 (declare (type simple-string string) (type kernel:index index))
723 #-unicode
724 (schar string index)
725 #+unicode
726 (let ((n (if from-end (%glyph-b string index) (%glyph-f string index))))
727 (if from-end
728 (values (subseq string n index) (and (> n 0) n))
729 (values (subseq string index n) (and (< n (length string)) n)))))
730
731 (defun string-to-nfd (string)
732 ;;@@ Implement me
733 string)
734
735 (defun string-to-nfkd (string)
736 ;;@@ Implement me
737 string)
738
739 (defun string-to-nfc (string)
740 ;;@@ Implement me
741 string)
742
743 (defun string-to-nfkc (string)
744 ;;@@ Implement me
745 string)

  ViewVC Help
Powered by ViewVC 1.1.5