/[cmucl]/src/code/string.lisp
ViewVC logotype

Diff of /src/code/string.lisp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12.30.7 by rtoy, Wed Apr 22 17:05:51 2009 UTC revision 1.12.30.8 by rtoy, Thu Apr 23 15:10:08 2009 UTC
# Line 28  Line 28 
28            nstring-capitalize))            nstring-capitalize))
29    
30    
31  (declaim (inline codepoint-from-surrogates codepoint surrogates))  (declaim (inline surrogates-to-codepoint codepoint surrogates))
32    
33  (defun codepoint-from-surrogates (high low)  (defun surrogates-to-codepoint (hi lo)
34    "Return the codepoint given the high and low surrogate values"    "Convert the given Hi and Lo surrogate characters to the
35    (declare (type (integer #xD800 #xDBFF) high)    corresponding codepoint value"
36             (type (integer #xDC00 #xDFFF) low))    (declare (type character hi lo))
37    (+ (ash (- high #xD800) 10)    (+ (ash (- (the (integer #xD800 #xDBFF) (char-code hi)) #xD800) 10)
38       low       (the (integer #xDC00 #xDFFF) (char-code lo)) #x2400))
      #x2400))  
39    
40  (defun codepoint (string i &optional (end (length string)))  (defun codepoint (string i &optional (end (length string)))
41    "Return the codepoint value from String at position I.  If that    "Return the codepoint value from String at position I.  If that
# Line 50  Line 49 
49      (cond ((and (<= #xD800 code #xDBFF) (< (1+ i) end))      (cond ((and (<= #xD800 code #xDBFF) (< (1+ i) end))
50             (let ((tmp (char-code (schar string (1+ i)))))             (let ((tmp (char-code (schar string (1+ i)))))
51               (if (<= #xDC00 tmp #xDFFF)               (if (<= #xDC00 tmp #xDFFF)
52                   (values (codepoint-from-surrogates code tmp) +1)                   (values (surrogates-to-codepoint (code-char code) (code-char tmp)) +1)
53                   (values code nil))))                   (values code nil))))
54            ((and (<= #xDC00 code #xDFFF) (> i 0))            ((and (<= #xDC00 code #xDFFF) (> i 0))
55             (let ((tmp (char-code (schar string (1- i)))))             (let ((tmp (char-code (schar string (1- i)))))
56               (if (<= #xD800 tmp #xDBFF)               (if (<= #xD800 tmp #xDBFF)
57                   (values (codepoint-from-surrogates tmp code) -1)                   (values (surrogates-to-codepoint (code-char tmp) (code-char code)) -1)
58                   (values code nil))))                   (values code nil))))
59            (t (values code nil)))))            (t (values code nil)))))
60    
61  (defun surrogates (codepoint)  (defun surrogates (codepoint)
62    "Return the high and low surrogate values for Codepoint.  If    "Return the high and low surrogate characters for Codepoint.  If
63    Codepoint is in the BMP, just return the Codepoint itself"    Codepoint is in the BMP, the first return value is the corresponding
64      character and the second is NIL."
65    (declare (type (integer 0 #x10FFFF) codepoint))    (declare (type (integer 0 #x10FFFF) codepoint))
66    (if (< codepoint #x10000)    (if (< codepoint #x10000)
67        (values codepoint nil)        (values (code-char codepoint) nil)
68        (let* ((tmp (- codepoint #x10000))        (let* ((tmp (- codepoint #x10000))
69               (hi (logior (ldb (byte 10 10) tmp) #xD800))               (hi (logior (ldb (byte 10 10) tmp) #xD800))
70               (lo (logior (ldb (byte 10 0) tmp) #xDC00)))               (lo (logior (ldb (byte 10 0) tmp) #xDC00)))
71          (values hi lo))))          (values (code-char hi) (code-char lo)))))
72    
73    
74  (defun string (X)  (defun string (X)
# Line 469  Line 469 
469              ;;  but that never actually occurs as of Unicode 5.1.0,              ;;  but that never actually occurs as of Unicode 5.1.0,
470              ;;  so I'm just going to ignore it for now...              ;;  so I'm just going to ignore it for now...
471              (multiple-value-bind (hi lo) (surrogates code)              (multiple-value-bind (hi lo) (surrogates code)
472                (setf (schar newstring new-index) (code-char hi))                (setf (schar newstring new-index) hi)
473                (when lo                (when lo
474                  (setf (schar newstring (incf new-index)) (code-char lo))))))                  (setf (schar newstring (incf new-index)) lo)))))
475          ;;@@ WARNING: see above          ;;@@ WARNING: see above
476          (do ((index end (1+ index))          (do ((index end (1+ index))
477               (new-index (- (the fixnum end) offset) (1+ new-index)))               (new-index (- (the fixnum end) offset) (1+ new-index)))
# Line 510  Line 510 
510              ;;  but that never actually occurs as of Unicode 5.1.0,              ;;  but that never actually occurs as of Unicode 5.1.0,
511              ;;  so I'm just going to ignore it for now...              ;;  so I'm just going to ignore it for now...
512              (multiple-value-bind (hi lo) (surrogates code)              (multiple-value-bind (hi lo) (surrogates code)
513                (setf (schar newstring new-index) (code-char hi))                (setf (schar newstring new-index) hi)
514                (when lo                (when lo
515                  (setf (schar newstring (incf new-index)) (code-char lo))))))                  (setf (schar newstring (incf new-index)) lo)))))
516          ;;@@ WARNING: see above          ;;@@ WARNING: see above
517          (do ((index end (1+ index))          (do ((index end (1+ index))
518               (new-index (- (the fixnum end) offset) (1+ new-index)))               (new-index (- (the fixnum end) offset) (1+ new-index)))
# Line 584  Line 584 
584            ;;  but that never actually occurs as of Unicode 5.1.0,            ;;  but that never actually occurs as of Unicode 5.1.0,
585            ;;  so I'm just going to ignore it for now...            ;;  so I'm just going to ignore it for now...
586            (multiple-value-bind (hi lo) (surrogates code)            (multiple-value-bind (hi lo) (surrogates code)
587              (setf (schar string index) (code-char hi))              (setf (schar string index) hi)
588              (when lo              (when lo
589                (setf (schar string (incf index)) (code-char lo)))))))                (setf (schar string (incf index)) lo))))))
590      save-header))      save-header))
591    
592  (defun nstring-downcase (string &key (start 0) end)  (defun nstring-downcase (string &key (start 0) end)
# Line 608  Line 608 
608            ;;  but that never actually occurs as of Unicode 5.1.0,            ;;  but that never actually occurs as of Unicode 5.1.0,
609            ;;  so I'm just going to ignore it for now...            ;;  so I'm just going to ignore it for now...
610            (multiple-value-bind (hi lo) (surrogates code)            (multiple-value-bind (hi lo) (surrogates code)
611              (setf (schar string index) (code-char hi))              (setf (schar string index) hi)
612              (when lo              (when lo
613                (setf (schar string (incf index)) (code-char lo)))))))                (setf (schar string (incf index)) lo))))))
614      save-header))      save-header))
615    
616  (defun nstring-capitalize (string &key (start 0) end)  (defun nstring-capitalize (string &key (start 0) end)

Legend:
Removed from v.1.12.30.7  
changed lines
  Added in v.1.12.30.8

  ViewVC Help
Powered by ViewVC 1.1.5