/[cmucl]/src/code/string.lisp
ViewVC logotype

Contents of /src/code/string.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11.2.1 - (show annotations)
Fri Oct 4 23:13:36 2002 UTC (11 years, 6 months ago) by pmai
Branch: UNICODE-BRANCH
Changes since 1.11: +27 -7 lines
Checked in Brian Spilsbury's experimental Unicode, locales, and dialect
support patchset.  This lives on its own branch, so that people can
play with it and tweak it, without disturbing 18e release engineering
on the main branch.  Bootstrapping has only been tried on LINKAGE_TABLE
x86/Linux builds.  A working cross-compile script is checked in under
bootfiles/19a/boot1-cross-unicode.lisp.  The script still leaves you
with some interactive errors, on the cross compile, which you should
answer with 2.  See the mailing list for more information.
1 ;;; -*- Log: code.log; Package: Lisp -*-
2 ;;;
3 ;;; **********************************************************************
4 ;;; This code was written as part of the CMU Common Lisp project at
5 ;;; Carnegie Mellon University, and has been placed in the public domain.
6 ;;;
7 (ext:file-comment
8 "$Header: /tiger/var/lib/cvsroots/cmucl/src/code/string.lisp,v 1.11.2.1 2002/10/04 23:13:36 pmai Exp $")
9 ;;;
10 ;;; **********************************************************************
11 ;;;
12 ;;; Functions to implement strings for CMU Common Lisp
13 ;;; Written by David Dill
14 ;;; Rewritten by Skef Wholey, Bill Chiles and Rob MacLachlan.
15 ;;;
16 ;;; ****************************************************************
17 ;;;
18 (in-package "LISP")
19 (export '(char schar string
20 string= string-equal string< string> string<= string>= string/=
21 string-lessp string-greaterp string-not-lessp string-not-greaterp
22 string-not-equal
23 make-string
24 string-trim string-left-trim string-right-trim
25 string-upcase
26 string-downcase string-capitalize nstring-upcase nstring-downcase
27 nstring-capitalize))
28
29
30 (defun string (X)
31 "Coerces X into a string. If X is a string, X is returned. If X is a
32 symbol, X's pname is returned. If X is a character then a one element
33 string containing that character is returned. If X cannot be coerced
34 into a string, an error occurs."
35 (cond ((stringp x) x)
36 ((symbolp x) (symbol-name x))
37 ((characterp x)
38 (let ((res (make-string 1)))
39 (setf (schar res 0) x) res))
40 (t
41 (error 'simple-type-error
42 :datum x
43 :expected-type '(or string symbol character)
44 :format-control "~S cannot be coerced to a string."
45 :format-arguments (list x)))))
46
47 #+unicode
48 (defun maybe-simple-base-string (string)
49 "Attempts to turn string into a simple-base-string if possible without data-loss."
50 (etypecase string
51 (simple-base-string
52 string)
53 (string
54 (ext:dovector (elt string (coerce string 'simple-base-string))
55 (unless (base-char-p elt)
56 ; couldn't simple-base-stringify without loss
57 (return string))))))
58
59 ;;; With-One-String is used to set up some string hacking things. The keywords
60 ;;; are parsed, and the string is hacked into a simple-string.
61
62 (eval-when (compile)
63
64 (defmacro with-one-string (string start end cum-offset &rest forms)
65 `(let ((,string (if (stringp ,string) ,string (string ,string))))
66 ;; Optimizer may prove STRING is one.
67 (declare (optimize (ext:inhibit-warnings 3)))
68 (with-array-data ((,string ,string :offset-var ,cum-offset)
69 (,start ,start)
70 (,end (or ,end (length (the vector ,string)))))
71 ,@forms)))
72
73 )
74
75 ;;; With-String is like With-One-String, but doesn't parse keywords.
76
77 (eval-when (compile)
78
79 (defmacro with-string (string &rest forms)
80 `(let ((,string (if (stringp ,string) ,string (string ,string))))
81 (with-array-data ((,string ,string)
82 (start)
83 (end (length (the vector ,string))))
84 ,@forms)))
85
86 )
87
88 ;;; With-Two-Strings is used to set up string comparison operations. The
89 ;;; keywords are parsed, and the strings are hacked into simple-strings.
90
91 (eval-when (compile)
92
93 (defmacro with-two-strings (string1 string2 start1 end1 cum-offset-1
94 start2 end2 &rest forms)
95 `(let ((,string1 (if (stringp ,string1) ,string1 (string ,string1)))
96 (,string2 (if (stringp ,string2) ,string2 (string ,string2))))
97 (with-array-data ((,string1 ,string1 :offset-var ,cum-offset-1)
98 (,start1 ,start1)
99 (,end1 (or ,end1 (length (the vector ,string1)))))
100 (with-array-data ((,string2 ,string2)
101 (,start2 ,start2)
102 (,end2 (or ,end2 (length (the vector ,string2)))))
103 ,@forms))))
104
105 )
106
107
108 (defun char (string index)
109 "Given a string and a non-negative integer index less than the length of
110 the string, returns the character object representing the character at
111 that position in the string."
112 (declare (optimize (safety 1)))
113 (char string index))
114
115 (defun %charset (string index new-el)
116 (declare (optimize (safety 1)))
117 (setf (char string index) new-el))
118
119 (defun schar (string index)
120 "SCHAR returns the character object at an indexed position in a string
121 just as CHAR does, except the string must be a simple-string."
122 (declare (optimize (safety 1)))
123 (schar string index))
124
125 (defun %scharset (string index new-el)
126 (declare (optimize (safety 1)))
127 (setf (schar string index) new-el))
128
129 (defun string=* (string1 string2 start1 end1 start2 end2)
130 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
131 (not (%sp-string-compare string1 start1 end1 string2 start2 end2))))
132
133
134 (defun string/=* (string1 string2 start1 end1 start2 end2)
135 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
136 (let ((comparison (%sp-string-compare string1 start1 end1
137 string2 start2 end2)))
138 (if comparison (- (the fixnum comparison) offset1)))))
139
140 (eval-when (compile eval)
141
142 ;;; Lessp is true if the desired expansion is for string<* or string<=*.
143 ;;; Equalp is true if the desired expansion is for string<=* or string>=*.
144 (defmacro string<>=*-body (lessp equalp)
145 (let ((offset1 (gensym)))
146 `(with-two-strings string1 string2 start1 end1 ,offset1 start2 end2
147 (let ((index (%sp-string-compare string1 start1 end1
148 string2 start2 end2)))
149 (if index
150 (cond ((= (the fixnum index) (the fixnum end1))
151 ,(if lessp
152 `(- (the fixnum index) ,offset1)
153 `nil))
154 ((= (+ (the fixnum index) (- start2 start1))
155 (the fixnum end2))
156 ,(if lessp
157 `nil
158 `(- (the fixnum index) ,offset1)))
159 ((,(if lessp 'char< 'char>)
160 (schar string1 index)
161 (schar string2 (+ (the fixnum index) (- start2 start1))))
162 (- (the fixnum index) ,offset1))
163 (t nil))
164 ,(if equalp `(- (the fixnum end1) ,offset1) 'nil))))))
165 ) ; eval-when
166
167 (defun string<* (string1 string2 start1 end1 start2 end2)
168 (declare (fixnum start1 start2))
169 (string<>=*-body t nil))
170
171 (defun string>* (string1 string2 start1 end1 start2 end2)
172 (declare (fixnum start1 start2))
173 (string<>=*-body nil nil))
174
175 (defun string<=* (string1 string2 start1 end1 start2 end2)
176 (declare (fixnum start1 start2))
177 (string<>=*-body t t))
178
179 (defun string>=* (string1 string2 start1 end1 start2 end2)
180 (declare (fixnum start1 start2))
181 (string<>=*-body nil t))
182
183
184
185 (defun string< (string1 string2 &key (start1 0) end1 (start2 0) end2)
186 "Given two strings, if the first string is lexicographically less than
187 the second string, returns the longest common prefix (using char=)
188 of the two strings. Otherwise, returns ()."
189 (string<* string1 string2 start1 end1 start2 end2))
190
191 (defun string> (string1 string2 &key (start1 0) end1 (start2 0) end2)
192 "Given two strings, if the first string is lexicographically greater than
193 the second string, returns the longest common prefix (using char=)
194 of the two strings. Otherwise, returns ()."
195 (string>* string1 string2 start1 end1 start2 end2))
196
197
198 (defun string<= (string1 string2 &key (start1 0) end1 (start2 0) end2)
199 "Given two strings, if the first string is lexicographically less than
200 or equal to the second string, returns the longest common prefix
201 (using char=) of the two strings. Otherwise, returns ()."
202 (string<=* string1 string2 start1 end1 start2 end2))
203
204 (defun string>= (string1 string2 &key (start1 0) end1 (start2 0) end2)
205 "Given two strings, if the first string is lexicographically greater
206 than or equal to the second string, returns the longest common prefix
207 (using char=) of the two strings. Otherwise, returns ()."
208 (string>=* string1 string2 start1 end1 start2 end2))
209
210 (defun string= (string1 string2 &key (start1 0) end1 (start2 0) end2)
211 "Given two strings (string1 and string2), and optional integers start1,
212 start2, end1 and end2, compares characters in string1 to characters in
213 string2 (using char=)."
214 (string=* string1 string2 start1 end1 start2 end2))
215
216 (defun string/= (string1 string2 &key (start1 0) end1 (start2 0) end2)
217 "Given two strings, if the first string is not lexicographically equal
218 to the second string, returns the longest common prefix (using char=)
219 of the two strings. Otherwise, returns ()."
220 (string/=* string1 string2 start1 end1 start2 end2))
221
222
223 (eval-when (compile eval)
224
225 ;;; STRING-NOT-EQUAL-LOOP is used to generate character comparison loops for
226 ;;; STRING-EQUAL and STRING-NOT-EQUAL.
227 (defmacro string-not-equal-loop (end end-value
228 &optional (abort-value nil abortp))
229 (declare (fixnum end))
230 (let ((end-test (if (= end 1)
231 `(= index1 (the fixnum end1))
232 `(= index2 (the fixnum end2)))))
233 `(do ((index1 start1 (1+ index1))
234 (index2 start2 (1+ index2)))
235 (,(if abortp
236 end-test
237 `(or ,end-test
238 (not (char-equal (schar string1 index1)
239 (schar string2 index2)))))
240 ,end-value)
241 (declare (fixnum index1 index2))
242 ,@(if abortp
243 `((if (not (char-equal (schar string1 index1)
244 (schar string2 index2)))
245 (return ,abort-value)))))))
246
247 ) ; eval-when
248
249 (defun string-equal (string1 string2 &key (start1 0) end1 (start2 0) end2)
250 "Given two strings (string1 and string2), and optional integers start1,
251 start2, end1 and end2, compares characters in string1 to characters in
252 string2 (using char-equal)."
253 (declare (fixnum start1 start2))
254 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
255 (let ((slen1 (- (the fixnum end1) start1))
256 (slen2 (- (the fixnum end2) start2)))
257 (declare (fixnum slen1 slen2))
258 (if (or (minusp slen1) (minusp slen2))
259 ;;prevent endless looping later.
260 (error "Improper bounds for string comparison."))
261 (if (= slen1 slen2)
262 ;;return () immediately if lengths aren't equal.
263 (string-not-equal-loop 1 t nil)))))
264
265 (defun string-not-equal (string1 string2 &key (start1 0) end1 (start2 0) end2)
266 "Given two strings, if the first string is not lexicographically equal
267 to the second string, returns the longest common prefix (using char-equal)
268 of the two strings. Otherwise, returns ()."
269 (with-two-strings string1 string2 start1 end1 offset1 start2 end2
270 (let ((slen1 (- end1 start1))
271 (slen2 (- end2 start2)))
272 (declare (fixnum slen1 slen2))
273 (if (or (minusp slen1) (minusp slen2))
274 ;;prevent endless looping later.
275 (error "Improper bounds for string comparison."))
276 (cond ((or (minusp slen1) (or (minusp slen2)))
277 (error "Improper substring for comparison."))
278 ((= slen1 slen2)
279 (string-not-equal-loop 1 nil (- index1 offset1)))
280 ((< slen1 slen2)
281 (string-not-equal-loop 1 (- index1 offset1)))
282 (t
283 (string-not-equal-loop 2 (- index1 offset1)))))))
284
285
286
287 (eval-when (compile eval)
288
289 ;;; STRING-LESS-GREATER-EQUAL-TESTS returns a test on the lengths of string1
290 ;;; and string2 and a test on the current characters from string1 and string2
291 ;;; for the following macro.
292 (defun string-less-greater-equal-tests (lessp equalp)
293 (if lessp
294 (if equalp
295 ;; STRING-NOT-GREATERP
296 (values '<= `(not (char-greaterp char1 char2)))
297 ;; STRING-LESSP
298 (values '< `(char-lessp char1 char2)))
299 (if equalp
300 ;; STRING-NOT-LESSP
301 (values '>= `(not (char-lessp char1 char2)))
302 ;; STRING-GREATERP
303 (values '> `(char-greaterp char1 char2)))))
304
305 (defmacro string-less-greater-equal (lessp equalp)
306 (multiple-value-bind (length-test character-test)
307 (string-less-greater-equal-tests lessp equalp)
308 `(with-two-strings string1 string2 start1 end1 offset1 start2 end2
309 (let ((slen1 (- (the fixnum end1) start1))
310 (slen2 (- (the fixnum end2) start2)))
311 (declare (fixnum slen1 slen2))
312 (if (or (minusp slen1) (minusp slen2))
313 ;;prevent endless looping later.
314 (error "Improper bounds for string comparison."))
315 (do ((index1 start1 (1+ index1))
316 (index2 start2 (1+ index2))
317 (char1)
318 (char2))
319 ((or (= index1 (the fixnum end1)) (= index2 (the fixnum end2)))
320 (if (,length-test slen1 slen2) (- index1 offset1)))
321 (declare (fixnum index1 index2))
322 (setq char1 (schar string1 index1))
323 (setq char2 (schar string2 index2))
324 (if (not (char-equal char1 char2))
325 (if ,character-test
326 (return (- index1 offset1))
327 (return ()))))))))
328
329 ) ; eval-when
330
331 (defun string-lessp* (string1 string2 start1 end1 start2 end2)
332 (declare (fixnum start1 start2))
333 (string-less-greater-equal t nil))
334
335 (defun string-greaterp* (string1 string2 start1 end1 start2 end2)
336 (declare (fixnum start1 start2))
337 (string-less-greater-equal nil nil))
338
339 (defun string-not-lessp* (string1 string2 start1 end1 start2 end2)
340 (declare (fixnum start1 start2))
341 (string-less-greater-equal nil t))
342
343 (defun string-not-greaterp* (string1 string2 start1 end1 start2 end2)
344 (declare (fixnum start1 start2))
345 (string-less-greater-equal t t))
346
347 (defun string-lessp (string1 string2 &key (start1 0) end1 (start2 0) end2)
348 "Given two strings, if the first string is lexicographically less than
349 the second string, returns the longest common prefix (using char-equal)
350 of the two strings. Otherwise, returns ()."
351 (string-lessp* string1 string2 start1 end1 start2 end2))
352
353 (defun string-greaterp (string1 string2 &key (start1 0) end1 (start2 0) end2)
354 "Given two strings, if the first string is lexicographically greater than
355 the second string, returns the longest common prefix (using char-equal)
356 of the two strings. Otherwise, returns ()."
357 (string-greaterp* string1 string2 start1 end1 start2 end2))
358
359 (defun string-not-lessp (string1 string2 &key (start1 0) end1 (start2 0) end2)
360 "Given two strings, if the first string is lexicographically greater
361 than or equal to the second string, returns the longest common prefix
362 (using char-equal) of the two strings. Otherwise, returns ()."
363 (string-not-lessp* string1 string2 start1 end1 start2 end2))
364
365 (defun string-not-greaterp (string1 string2 &key (start1 0) end1 (start2 0)
366 end2)
367 "Given two strings, if the first string is lexicographically less than
368 or equal to the second string, returns the longest common prefix
369 (using char-equal) of the two strings. Otherwise, returns ()."
370 (string-not-greaterp* string1 string2 start1 end1 start2 end2))
371
372
373 (defun make-string (count &key (element-type 'character)
374 ((:initial-element fill-char)))
375 "Given a character count and an optional fill character, makes and returns
376 a new string Count long filled with the fill character."
377 (declare (fixnum count)
378 #-unicode
379 (ignore element-type))
380 (if fill-char
381 (do ((i 0 (1+ i))
382 #-unicode
383 (string (make-string count))
384 #+unicode
385 (string (make-string count :element-type element-type)))
386 ((= i count) string)
387 (declare (fixnum i))
388 (setf (schar string i) fill-char))
389 #-unicode
390 (make-string count)
391 #+unicode
392 (make-string count :element-type element-type)))
393
394 (defun string-upcase (string &key (start 0) end)
395 "Given a string, returns a new string that is a copy of it with
396 all lower case alphabetic characters converted to uppercase."
397 (declare (fixnum start))
398 (let* ((string (if (stringp string) string (string string)))
399 (slen (length string)))
400 (declare (fixnum slen))
401 (with-one-string string start end offset
402 (let ((offset-slen (+ slen offset))
403 (newstring (make-string slen :element-type (array-element-type string))))
404 (declare (fixnum offset-slen))
405 (do ((index offset (1+ index))
406 (new-index 0 (1+ new-index)))
407 ((= index start))
408 (declare (fixnum index new-index))
409 (setf (schar newstring new-index) (schar string index)))
410 (do ((index start (1+ index))
411 (new-index (- start offset) (1+ new-index)))
412 ((= index (the fixnum end)))
413 (declare (fixnum index new-index))
414 (setf (schar newstring new-index)
415 (char-upcase (schar string index))))
416 (do ((index end (1+ index))
417 (new-index (- (the fixnum end) offset) (1+ new-index)))
418 ((= index offset-slen))
419 (declare (fixnum index new-index))
420 (setf (schar newstring new-index) (schar string index)))
421 newstring))))
422
423 (defun string-downcase (string &key (start 0) end)
424 "Given a string, returns a new string that is a copy of it with
425 all upper case alphabetic characters converted to lowercase."
426 (declare (fixnum start))
427 (let* ((string (if (stringp string) string (string string)))
428 (slen (length string)))
429 (declare (fixnum slen))
430 (with-one-string string start end offset
431 (let ((offset-slen (+ slen offset))
432 (newstring (make-string slen :element-type (array-element-type string))))
433 (declare (fixnum offset-slen))
434 (do ((index offset (1+ index))
435 (new-index 0 (1+ new-index)))
436 ((= index start))
437 (declare (fixnum index new-index))
438 (setf (schar newstring new-index) (schar string index)))
439 (do ((index start (1+ index))
440 (new-index (- start offset) (1+ new-index)))
441 ((= index (the fixnum end)))
442 (declare (fixnum index new-index))
443 (setf (schar newstring new-index)
444 (char-downcase (schar string index))))
445 (do ((index end (1+ index))
446 (new-index (- (the fixnum end) offset) (1+ new-index)))
447 ((= index offset-slen))
448 (declare (fixnum index new-index))
449 (setf (schar newstring new-index) (schar string index)))
450 newstring))))
451
452 (defun string-capitalize (string &key (start 0) end)
453 "Given a string, returns a copy of the string with the first
454 character of each ``word'' converted to upper-case, and remaining
455 chars in the word converted to lower case. A ``word'' is defined
456 to be a string of case-modifiable characters delimited by
457 non-case-modifiable chars."
458 (declare (fixnum start))
459 (let* ((string (if (stringp string) string (string string)))
460 (slen (length string)))
461 (declare (fixnum slen))
462 (with-one-string string start end offset
463 (let ((offset-slen (+ slen offset))
464 (newstring (make-string slen :element-type (array-element-type string))))
465 (declare (fixnum offset-slen))
466 (do ((index offset (1+ index))
467 (new-index 0 (1+ new-index)))
468 ((= index start))
469 (declare (fixnum index new-index))
470 (setf (schar newstring new-index) (schar string index)))
471 (do ((index start (1+ index))
472 (new-index (- start offset) (1+ new-index))
473 (newword t)
474 (char ()))
475 ((= index (the fixnum end)))
476 (declare (fixnum index new-index))
477 (setq char (schar string index))
478 (cond ((not (alphanumericp char))
479 (setq newword t))
480 (newword
481 ;;char is first case-modifiable after non-case-modifiable
482 (setq char (char-upcase char))
483 (setq newword ()))
484 ;;char is case-modifiable, but not first
485 (t (setq char (char-downcase char))))
486 (setf (schar newstring new-index) char))
487 (do ((index end (1+ index))
488 (new-index (- (the fixnum end) offset) (1+ new-index)))
489 ((= index offset-slen))
490 (declare (fixnum index new-index))
491 (setf (schar newstring new-index) (schar string index)))
492 newstring))))
493
494 (defun nstring-upcase (string &key (start 0) end)
495 "Given a string, returns that string with all lower case alphabetic
496 characters converted to uppercase."
497 (declare (fixnum start))
498 (let ((save-header string))
499 (with-one-string string start end offset
500 (do ((index start (1+ index)))
501 ((= index (the fixnum end)))
502 (declare (fixnum index))
503 (setf (schar string index) (char-upcase (schar string index)))))
504 save-header))
505
506 (defun nstring-downcase (string &key (start 0) end)
507 "Given a string, returns that string with all upper case alphabetic
508 characters converted to lowercase."
509 (declare (fixnum start))
510 (let ((save-header string))
511 (with-one-string string start end offset
512 (do ((index start (1+ index)))
513 ((= index (the fixnum end)))
514 (declare (fixnum index))
515 (setf (schar string index) (char-downcase (schar string index)))))
516 save-header))
517
518 (defun nstring-capitalize (string &key (start 0) end)
519 "Given a string, returns that string with the first
520 character of each ``word'' converted to upper-case, and remaining
521 chars in the word converted to lower case. A ``word'' is defined
522 to be a string of case-modifiable characters delimited by
523 non-case-modifiable chars."
524 (declare (fixnum start))
525 (let ((save-header string))
526 (with-one-string string start end offset
527 (do ((index start (1+ index))
528 (newword t)
529 (char ()))
530 ((= index (the fixnum end)))
531 (declare (fixnum index))
532 (setq char (schar string index))
533 (cond ((not (alphanumericp char))
534 (setq newword t))
535 (newword
536 ;;char is first case-modifiable after non-case-modifiable
537 (setf (schar string index) (char-upcase char))
538 (setq newword ()))
539 (t
540 (setf (schar string index) (char-downcase char))))))
541 save-header))
542
543 (defun string-left-trim (char-bag string)
544 "Given a set of characters (a list or string) and a string, returns
545 a copy of the string with the characters in the set removed from the
546 left end."
547 (with-string string
548 (do ((index start (1+ index)))
549 ((or (= index (the fixnum end))
550 (not (find (schar string index) char-bag)))
551 (subseq (the simple-string string) index end))
552 (declare (fixnum index)))))
553
554 (defun string-right-trim (char-bag string)
555 "Given a set of characters (a list or string) and a string, returns
556 a copy of the string with the characters in the set removed from the
557 right end."
558 (with-string string
559 (do ((index (1- (the fixnum end)) (1- index)))
560 ((or (< index start) (not (find (schar string index) char-bag)))
561 (subseq (the simple-string string) start (1+ index)))
562 (declare (fixnum index)))))
563
564 (defun string-trim (char-bag string)
565 "Given a set of characters (a list or string) and a string, returns a
566 copy of the string with the characters in the set removed from both
567 ends."
568 (with-string string
569 (let* ((left-end (do ((index start (1+ index)))
570 ((or (= index (the fixnum end))
571 (not (find (schar string index) char-bag)))
572 index)
573 (declare (fixnum index))))
574 (right-end (do ((index (1- (the fixnum end)) (1- index)))
575 ((or (< index left-end)
576 (not (find (schar string index) char-bag)))
577 (1+ index))
578 (declare (fixnum index)))))
579 (subseq (the simple-string string) left-end right-end))))

  ViewVC Help
Powered by ViewVC 1.1.5