diff --git a/src/code/exports.lisp b/src/code/exports.lisp
index 6c7bbedc5f61666fc30eb1058e29e59192415369..249bc32abe9d94f54ca7eb5fa96e666188ecacf6 100644
--- a/src/code/exports.lisp
+++ b/src/code/exports.lisp
@@ -913,7 +913,17 @@
   (:export "STRING-TO-NFC" "STRING-TO-NFD"
 	   "STRING-TO-NFKC" "STRING-TO-NFKD"
 	   "UNICODE-COMPLETE" "UNICODE-COMPLETE-NAME"
-	   "LOAD-ALL-UNICODE-DATA"))
+	   "UNICODE-FULL-CASE-LOWER"
+	   "UNICODE-FULL-CASE-UPPER"
+	   "UNICODE-FULL-CASE-TITLE"
+	   "UNICODE-CATEGORY"
+	   "+UNICODE-CATEGORY-LOWER+"
+	   "+UNICODE-CATEGORY-OTHER+"
+	   "+UNICODE-CATEGORY-GRAPHIC+"
+	   "+UNICODE-CATEGORY-UPPER+"
+	   "+UNICODE-CATEGORY-TITLE+"
+	   "LOAD-ALL-UNICODE-DATA"
+	   "SURROGATES"))
 
 (defpackage "EVAL"
   (:export "*EVAL-STACK-TRACE*" "*INTERNAL-APPLY-NODE-TRACE*"
@@ -2432,4 +2442,14 @@
 	   "LAST-FWRAPPER"
 	   "DO-FWRAPPERS"))
 
+(defpackage "UNICODE"
+  (:use "COMMON-LISP")
+  (:shadow "STRING-CAPITALIZE"
+	   "STRING-DOWNCASE"
+	   "STRING-UPCASE")
+  (:export "STRING-CAPITALIZE"
+	   "STRING-DOWNCASE"
+	   "STRING-UPCASE"
+	   "STRING-NEXT-WORD-BREAK"))
+
 
diff --git a/src/code/string.lisp b/src/code/string.lisp
index 5176edf58f170cd09475cce93f2171ec813d6b70..8005cda87102a2120c676ad6580f3cb20802b4d9 100644
--- a/src/code/string.lisp
+++ b/src/code/string.lisp
@@ -30,7 +30,7 @@
 	  nstring-capitalize))
 
 #+unicode
-(export '(string-to-nfd string-to-nfkd string-to-nfkc))
+(export '(string-to-nfd string-to-nfkd string-to-nfkc surrogates))
 
 (declaim (inline surrogatep surrogates-to-codepoint codepoint surrogates))
 
@@ -159,7 +159,7 @@
 ;;; With-One-String is used to set up some string hacking things.  The keywords
 ;;; are parsed, and the string is hacked into a simple-string.
 
-(eval-when (compile)
+(eval-when (compile load eval)
 
 (defmacro with-one-string (string start end cum-offset &rest forms)
   `(let ((,string (if (stringp ,string) ,string (string ,string))))
@@ -594,7 +594,9 @@
 	(setf (schar string i) fill-char))
       (make-string count)))
 
-(defun string-upcase-simple (string &key (start 0) end)
+(defun string-upcase (string &key (start 0) end)
+  _N"Given a string, returns a new string that is a copy of it with all
+  lower case alphabetic characters converted to uppercase."
   (declare (fixnum start))
   (let* ((string (if (stringp string) string (string string)))
 	 (slen (length string)))
@@ -634,54 +636,9 @@
 	  (setf (schar newstring new-index) (schar string index)))
 	newstring))))
 
-#+unicode
-(defun string-upcase-full (string &key (start 0) end)
-  (declare (fixnum start))
-  (let* ((string (if (stringp string) string (string string)))
-	 (slen (length string)))
-    (declare (fixnum slen))
-    (with-output-to-string (s)
-      (with-one-string string start end offset
-        (let ((offset-slen (+ slen offset)))
-	  (declare (fixnum offset-slen))
-	  (write-string string s :start offset :end start)
-	  (do ((index start (1+ index)))
-	      ((= index (the fixnum end)))
-	    (declare (fixnum index))
-	    (multiple-value-bind (code wide)
-		(codepoint string index)
-	      (when wide (incf index))
-	      ;; Handle ASCII specially because this is called early in
-	      ;; initialization, before unidata is available.
-	      (cond ((< 96 code 123)
-		     (write-char (code-char (decf code 32)) s))
-		    ((> code 127)
-		     (write-string (unicode-full-case-upper code) s))
-		    (t
-		     (multiple-value-bind (hi lo)
-			 (surrogates code)
-		       (write-char hi s)
-		       (when lo
-			 (write-char lo s)))))))
-	  (write-string string s :start end :end offset-slen))))))
-
-(defun string-upcase (string &key (start 0) end #+unicode (casing :simple))
-  #-unicode
+(defun string-downcase (string &key (start 0) end)
   _N"Given a string, returns a new string that is a copy of it with all
-  lower case alphabetic characters converted to uppercase."
-  #+unicode
-  _N"Given a string, returns a new string that is a copy of it with all
-  lower case alphabetic characters converted to uppercase.  Casing is
-  :simple or :full for simple or full case conversion, respectively."
-  (declare (fixnum start))
-  #-unicode
-  (string-upcase-simple string :start start :end end)
-  #+unicode
-  (if (eq casing :simple)
-      (string-upcase-simple string :start start :end end)
-      (string-upcase-full string :start start :end end)))
-
-(defun string-downcase-simple (string &key (start 0) end)
+  upper case alphabetic characters converted to lowercase."
   (declare (fixnum start))
   (let* ((string (if (stringp string) string (string string)))
 	 (slen (length string)))
@@ -720,54 +677,12 @@
 	  (setf (schar newstring new-index) (schar string index)))
 	newstring))))
 
-#+unicode
-(defun string-downcase-full (string &key (start 0) end)
-  (declare (fixnum start))
-  (let* ((string (if (stringp string) string (string string)))
-	 (slen (length string)))
-    (declare (fixnum slen))
-    (with-output-to-string (s)
-      (with-one-string string start end offset
-        (let ((offset-slen (+ slen offset)))
-	  (declare (fixnum offset-slen))
-	  (write-string string s :start offset :end start)
-	  (do ((index start (1+ index)))
-	      ((= index (the fixnum end)))
-	    (declare (fixnum index))
-	    (multiple-value-bind (code wide)
-		(codepoint string index)
-	      (when wide (incf index))
-	      ;; Handle ASCII specially because this is called early in
-	      ;; initialization, before unidata is available.
-	      (cond ((< 64 code 91)
-		     (write-char (code-char (incf code 32)) s))
-		    ((> code 127)
-		     (write-string (unicode-full-case-lower code) s))
-		    (t
-		     (multiple-value-bind (hi lo)
-			 (surrogates code)
-		       (write-char hi s)
-		       (when lo
-			 (write-char lo s)))))))
-	  (write-string string s :start end :end offset-slen))))))
-
-(defun string-downcase (string &key (start 0) end #+unicode (casing :simple))
-  #-unicode
-  _N"Given a string, returns a new string that is a copy of it with all
-  upper case alphabetic characters converted to lowercase."
-  #+unicode
-  _N"Given a string, returns a new string that is a copy of it with all
-  upper case alphabetic characters converted to lowercase.  Casing is
-  :simple or :full for simple or full case conversion, respectively."
-  (declare (fixnum start))
-  #-unicode
-  (string-downcase-simple string :start start :end end)
-  #+unicode
-  (if (eq casing :simple)
-      (string-downcase-simple string :start start :end end)
-      (string-downcase-full string :start start :end end)))
-
-(defun string-capitalize-simple (string &key (start 0) end)
+(defun string-capitalize (string &key (start 0) end)
+  _N"Given a string, returns a copy of the string with the first
+  character of each ``word'' converted to upper-case, and remaining
+  chars in the word converted to lower case. A ``word'' is defined
+  to be a string of case-modifiable characters delimited by
+  non-case-modifiable chars."
   (declare (fixnum start))
   (let* ((string (if (stringp string) string (string string)))
 	 (slen (length string)))
@@ -804,74 +719,6 @@
 	  (setf (schar newstring new-index) (schar string index)))
 	newstring))))
 
-#+unicode
-(defun string-capitalize-full (string &key (start 0) end)
-  (declare (fixnum start))
-  (let* ((string (if (stringp string) string (string string)))
-	 (slen (length string)))
-    (declare (fixnum slen))
-    (with-output-to-string (s)
-      (with-one-string string start end offset
-        (let ((offset-slen (+ slen offset)))
-	  (declare (fixnum offset-slen))
-	  (write-string string s :start offset :end start)
-	  (flet ((alphanump (m)
-		   (or (< 47 m 58) (< 64 m 91) (< 96 m 123)
-		       #+(and unicode (not unicode-bootstrap))
-		       (and (> m 127)
-			    (<= +unicode-category-letter+ (unicode-category m)
-				(+ +unicode-category-letter+ #x0F))))))
-	    (do ((index start (1+ index))
-		 (newword t))
-		((= index (the fixnum end)))
-	      (declare (fixnum index))
-	      (multiple-value-bind (code wide)
-		  (codepoint string index)
-		(when wide (incf index))
-		(cond ((not (alphanump code))
-		       (multiple-value-bind (hi lo)
-			   (surrogates code)
-			 (write-char hi s)
-			 (when lo (write-char lo s)))
-		       (setq newword t))
-		      (newword
-		       ;;char is first case-modifiable after non-case-modifiable
-		       (write-string (unicode-full-case-title code) s)
-		       (setq newword ()))
-		      ;;char is case-modifiable, but not first
-		      (t
-		       (write-string (unicode-full-case-lower code) s))))))
-	  (write-string string s :start end :end offset-slen))))))
-
-(defun string-capitalize (string &key (start 0) end
-				 #+unicode (casing :simple)
-				 #+unicode unicode-word-break)
-  #-unicode
-  _N"Given a string, returns a copy of the string with the first
-  character of each ``word'' converted to upper-case, and remaining
-  chars in the word converted to lower case. A ``word'' is defined
-  to be a string of case-modifiable characters delimited by
-  non-case-modifiable chars."
-  #+unicode
-  _N"Given a string, returns a copy of the string with the first
-  character of each ``word'' converted to upper-case, and remaining
-  chars in the word converted to lower case. Casing is :simple or
-  :full for simple or full case conversion, respectively.  If
-  Unicode-Word-Break is non-Nil, then the Unicode word-breaking
-  algorithm is used to determine the word boundaries.  Otherwise, A
-  ``word'' is defined to be a string of case-modifiable characters
-  delimited by non-case-modifiable chars.  "
-  
-  (declare (fixnum start))
-  #-unicode
-  (string-capitalize-simple string :start start :end end)
-  #+unicode
-  (if unicode-word-break
-      (string-capitalize-unicode string :start start :end end :casing casing)
-      (if (eq casing :simple)
-	  (string-capitalize-simple string :start start :end end)
-	  (string-capitalize-full string :start start :end end))))
-
 (defun nstring-upcase (string &key (start 0) end)
   "Given a string, returns that string with all lower case alphabetic
   characters converted to uppercase."
@@ -1389,322 +1236,6 @@
 (defun string-to-nfc (string)
   (if (simple-string-p string) string (coerce string 'simple-string)))
 
-
-;;;
-;;; This is a Lisp translation of the Scheme code from William
-;;; D. Clinger that implements the word-breaking algorithm.  This is
-;;; used with permission.
-;;;
-;;; This version is modified from the original at
-;;; http://www.ccs.neu.edu/home/will/R6RS/ to conform to CMUCL's
-;;; implementation of the word break properties.
-;;;
-;;;
-;;; Copyright statement and original comments:
-;;;
-;;;--------------------------------------------------------------------------------
-
-;; Copyright 2006 William D Clinger.
-;;
-;; Permission to copy this software, in whole or in part, to use this
-;; software for any lawful purpose, and to redistribute this software
-;; is granted subject to the restriction that all copies made of this
-;; software must include this copyright and permission notice in full.
-;;
-;; I also request that you send me a copy of any improvements that you
-;; make to this software so that they may be incorporated within it to
-;; the benefit of the Scheme community.
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Word-breaking as defined by Unicode Standard Annex #29.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Implementation notes.
-;;
-;; The string-foldcase, string-downcase, and string-titlecase
-;; procedures rely on the notion of a word, which is defined
-;; by Unicode Standard Annex 29.
-;;
-;; The string-foldcase and string-downcase procedures rely on
-;; word boundaries only when they encounter a Greek sigma, so
-;; their performance should not be greatly affected by the
-;; performance of the word-breaking algorithm.
-;;
-;; The string-titlecase procedure must find all word boundaries,
-;; but it is typically used on short strings (titles).
-;;
-;; Hence the performance of the word-breaking algorithm should
-;; not matter too much for this reference implementation.
-;; Word-breaking is more generally useful, however, so I tried
-;; to make this implementation reasonably efficient.
-;;
-;; Word boundaries are defined by 14 different rules in
-;; Unicode Standard Annex #29, and by GraphemeBreakProperty.txt
-;; and WordBreakProperty.txt.  See also WordBreakTest.html.
-;;
-;; My original implementation of those specifications failed
-;; 6 of the 494 tests in auxiliary/WordBreakTest.txt, but it
-;; appeared to me that those tests were inconsistent with the
-;; word-breaking rules in UAX #29.  John Cowan forwarded my
-;; bug report to the Unicode experts, and Mark Davis responded
-;; on 29 May 2007:
-;;
-;;   Thanks for following up on this. I think you have found a problem in the
-;;   formulation of word break, not the test. The intention was to break after a
-;;   Sep character, as is done in Sentence break. So my previous suggestion was
-;;   incorrect; instead, what we need is a new rule:
-;; 
-;;   *Break after paragraph separators.*
-;;    WB3a. Sep �
-;;   I'll make a propose to the UTC for this.
-;;
-;; Here is Will's translation of those rules (including WB3a)
-;; into a finite state machine that searches forward within a
-;; string, looking for the next position at which a word break
-;; is allowed.  The current state consists of an index i into
-;; the string and a summary of the left context whose rightmost
-;; character is at index i.  The left context is usually
-;; determined by the character at index i, but there are three
-;; complications:
-;;
-;;     Extend and Format characters are ignored unless they
-;;         follow a separator or the beginning of the text.
-;;     ALetter followed by MidLetter is treated specially.
-;;     Numeric followed by MidNum is treated specially.
-;;
-;; In the implementation below, the left context ending at i
-;; is encoded by the following symbols:
-;;
-;;     CR
-;;     Sep (excluding CR)
-;;     ALetter
-;;     MidLetter
-;;     ALetterMidLetter (ALetter followed by MidLetter)
-;;     Numeric
-;;     MidNum
-;;     NumericMidNum (Numeric followed by MidNum)
-;;     Katakana
-;;     ExtendNumLet
-;;     other (none of the above)
-;;
-;; Given a string s and an exact integer i (which need not be
-;; a valid index into s), returns the index of the next character
-;; that is not part of the word containing the character at i,
-;; or the length of s if the word containing the character at i
-;; extends through the end of s.  If i is negative or a valid
-;; index into s, then the returned value will be greater than i.
-;;
-;;;--------------------------------------------------------------------------------
-
-(defun string-next-word-break (s i)
-  (let ((n (length s)))
-    (labels
-	((char-word-break-category (c)
-	   ;; Map our unicode word break property into what this
-	   ;; algorithm wants.
-	   (let ((cat (lisp::unicode-word-break c)))
-	     (case cat
-	       ((:lf :cr :newline)
-		:sep)
-	       ((:extend :format)
-		:extend-or-format)
-	       (otherwise cat))))
-	 (left-context (i)
-	   ;; Given a valid index i into s, returns the left context
-	   ;; at i.
-	   (multiple-value-bind (c widep)
-	       (lisp::codepoint s i n)
-	     (let* ((back
-		     ;; If we're at a regular character or a leading
-		     ;; surrogate, decrementing by 1 gets us the to
-		     ;; previous character.  But for a trailing
-		     ;; surrogate, we need to decrement by 2!
-		     (if (eql widep -1)
-			 2
-			 1))
-		    (cat (char-word-break-category c)))
-	       (case cat
-		 ((:sep)
-		  (if (= c (char-code #\return)) :cr cat))
-		 ((:midletter :midnumlet)
-		  (let ((i-1 (- i back)))
-		    (if (and (<= 0 i-1)
-			     (eq (left-context i-1) :aletter))
-			:aletter-midletter
-			cat)))
-		 ((:midnum :midnumlet)
-		  (let ((i-1 (- i back)))
-		    (if (and (<= 0 i-1)
-			     (eq (left-context i-1) :numeric))
-			:numeric-midnum
-			cat)))
-		 ((:extendorformat)
-		  (if (< 0 i)
-		      (left-context (- i back))
-		      :other))
-		 (otherwise cat)))))
-
-	 (index-of-previous-non-ignored (j)
-	   ;; Returns the index of the last non-Extend, non-Format
-	   ;; character within (substring s 0 j).  Should not be
-	   ;; called unless such a character exists.
-
-	   (let* ((j1 (- j 1)))
-	     (multiple-value-bind (c widep)
-		 (lisp::codepoint s j1)
-	       (when (eql widep -1)
-		 ;; Back up one more if we're at the trailing
-		 ;; surrogate.
-		 (decf j1))
-	       (let ((cat (char-word-break-category c)))
-		 (case cat
-		   ((:extend-or-format)
-		    (index-of-previous-non-ignored j1))
-		   (otherwise j1))))))
-
-	 (lookup (j context)
-	   ;; Given j and the context to the left of (not including) j,
-	   ;; returns the index at the start of the next word
-	   ;; (or before which a word break is permitted).
-
-	   (if (>= j n)
-	       (case context
-		 ((:aletter-midletter :numeric-midnum)
-		  (let ((j (index-of-previous-non-ignored n)))
-		    (if (< i j) j n)))
-		 (otherwise n))
-	       (multiple-value-bind (c widep)
-		   (lisp::codepoint s j)
-		 (let* ((next-j
-			 ;; The next character is either 1 or 2 code
-			 ;; units away.  For a leading surrogate, it's
-			 ;; 2; Otherwise just 1.
-			 (if (eql widep 1)
-			     2
-			     1))
-			(cat (char-word-break-category c)))
-		   (case cat
-		     ((:extend-or-format)
-		      (case context
-			((:cr :sep) j)
-			(otherwise (lookup (+ j next-j) context))))
-		     (otherwise
-		      (case context
-			((:cr)
-			 (if (= c (char-code #\linefeed))
-			     ;; Rule WB3:  Don't break CRLF, continue looking
-			     (lookup (+ j next-j) cat)
-			     j))
-			((:aletter)
-			 (case cat
-			   ((:aletter :numeric :extendnumlet)
-			    ;; Rules WB5, WB9, ?
-			    (lookup (+ j next-j) cat))
-			   ((:midletter :midnumlet)
-			    ;; Rule WB6, need to keep looking
-			    (lookup (+ j next-j) :aletter-midletter))
-			   (otherwise j)))
-			((:aletter-midletter)
-			 (case cat
-			   ((:aletter)
-			    ;; Rule WB7
-			    (lookup (+ j next-j) cat))
-			   (otherwise
-			    ;; Rule WB6 and WB7 were extended, but the
-			    ;; region didn't end with :aletter.  So
-			    ;; backup and break at that point.
-			    (let ((j2 (index-of-previous-non-ignored j)))
-			      (if (< i j2) j2 j)))))
-			((:numeric)
-			 (case cat
-			   ((:numeric :aletter :extendnumlet)
-			    ;; Rules WB8, WB10, ?
-			    (lookup (+ j next-j) cat))
-			   ((:midnum :midnumlet)
-			    ;; Rules WB11, need to keep looking
-			    (lookup (+ j next-j) :numeric-midnum))
-			   (otherwise j)))
-			((:numeric-midnum)
-			 (case cat
-			   ((:numeric)
-			    ;; Rule WB11, keep looking
-			    (lookup (+ j next-j) cat))
-			   (otherwise
-			    ;; Rule WB11, WB12 were extended, but the
-			    ;; region didn't end with :numeric, so
-			    ;; backup and break at that point.
-			    (let ((j2 (index-of-previous-non-ignored j)))
-			      (if (< i j2) j2 j)))))
-			((:midletter :midnum :midnumlet)
-			 ;; Rule WB14
-			 j)
-			((:katakana)
-			 (case cat
-			   ((:katakana :extendnumlet)
-			    ;; Rule WB13, WB13a
-			    (lookup (+ j next-j) cat))
-			   (otherwise j)))
-			((:extendnumlet)
-			 (case cat
-			   ((:extendnumlet :aletter :numeric :katakana)
-			    ;; Rule WB13a, WB13b
-			    (lookup (+ j next-j) cat))
-			   (otherwise j)))
-			((:regional_indicator)
-			 (case cat
-			   ((:regional_indicator)
-			    ;; Rule WB13c
-			    (lookup (+ j next-j) cat))
-			   (otherwise j)))
-			(otherwise j)))))))))
-      (declare (notinline lookup left-context))
-      (cond ((< i 0)
-	     ;; Rule WB1
-	     0)
-	    ((<= n i)
-	     ;; Rule WB2
-	     n)
-	    (t
-	     (multiple-value-bind (c widep)
-		 (lisp::codepoint s i)
-	       (declare (ignore c))
-	       (lookup (+ i (if (eql widep 1) 2 1)) (left-context i))))))))
-
-(defun string-capitalize-unicode (string &key (start 0) end (casing :simple))
-  (declare (type (member :simple :full) casing))
-  (let* ((string (if (stringp string) string (string string)))
-	 (slen (length string)))
-    (declare (fixnum slen))
-    (with-output-to-string (result)
-      (lisp::with-one-string string start end offset
-        (let ((offset-slen (+ slen offset)))
-	  (declare (fixnum offset-slen))
-
-	  (write-string string result :start 0 :end start)
-	  (let ((upper (ecase casing
-			 (:simple
-			  #'(lambda (ch)
-			      (multiple-value-bind (hi lo)
-				  (lisp::surrogates (lisp::unicode-upper ch))
-				(write-char hi result)
-				(when lo (write-char lo result)))))
-			 (:full
-			  #'(lambda (ch)
-			      (write-string (lisp::unicode-full-case-title ch) result))))))
-	    (do ((start start next)
-		 (next (string-next-word-break string start)
-		       (string-next-word-break string next)))
-		((or (= start next)
-		     (>= start end)))
-	      ;; Convert the first character of the word to upper
-	      ;; case, and then make the rest of the word lowercase.
-	      (funcall upper (lisp::codepoint string start))
-	      (write-string (string-downcase string :start (1+ start) :end next :casing casing)
-			    result :start (1+ start) :end next)))
-	  (write-string string result :start end :end offset-slen))))))
-
 
 ;; Some utilities
 (defun codepoints-string (seq)
diff --git a/src/code/unicode.lisp b/src/code/unicode.lisp
new file mode 100644
index 0000000000000000000000000000000000000000..98fe8805bbf17a0acfca464bed6b1e417ad8c5b3
--- /dev/null
+++ b/src/code/unicode.lisp
@@ -0,0 +1,519 @@
+;;; -*- Log: code.log; Package: Unicode -*-
+;;;
+;;; **********************************************************************
+;;; This code was written as part of the CMU Common Lisp project at
+;;; Carnegie Mellon University, and has been placed in the public domain.
+;;;
+(ext:file-comment
+  "$Header: src/code/unicode.lisp $")
+;;;
+;;; **********************************************************************
+;;;
+;;; Functions to process Unicode strings for CMU Common Lisp
+;;; Written by Paul Foley and Raymond Toy.
+;;;
+;;; ****************************************************************
+;;;
+(in-package "UNICODE")
+(intl:textdomain "cmucl")
+
+;; An example where this differs from cl:string-upcase differ:
+;; #\Latin_Small_Letter_Sharp_S
+(defun string-upcase-full (string &key (start 0) end)
+  _N"Given a string, returns a new string that is a copy of it with
+  all lower case alphabetic characters converted to uppercase using
+  full case conversion."
+  (declare (fixnum start)) (let* ((string (if
+  (stringp string) string (string string)))
+	 (slen (length string)))
+    (declare (fixnum slen))
+    (with-output-to-string (s)
+      (lisp::with-one-string string start end offset
+        (let ((offset-slen (+ slen offset)))
+	  (declare (fixnum offset-slen))
+	  (write-string string s :start offset :end start)
+	  (do ((index start (1+ index)))
+	      ((= index (the fixnum end)))
+	    (declare (fixnum index))
+	    (multiple-value-bind (code wide)
+		(lisp:codepoint string index)
+	      (when wide (incf index))
+	      ;; Handle ASCII specially because this is called early in
+	      ;; initialization, before unidata is available.
+	      (cond ((< 96 code 123)
+		     (write-char (code-char (decf code 32)) s))
+		    ((> code 127)
+		     (write-string (lisp:unicode-full-case-upper code) s))
+		    (t
+		     (multiple-value-bind (hi lo)
+			 (surrogates code)
+		       (write-char hi s)
+		       (when lo
+			 (write-char lo s)))))))
+	  (write-string string s :start end :end offset-slen))))))
+
+(defun string-upcase (string &key (start 0) end (casing :full))
+  _N"Given a string, returns a new string that is a copy of it with
+  all lower case alphabetic characters converted to uppercase.  Casing
+  is :simple or :full for simple or full case conversion,
+  respectively."
+  (declare (fixnum start))
+  (if (eq casing :simple)
+      (cl:string-upcase string :start start :end end)
+      (string-upcase-full string :start start :end end)))
+
+
+;; An example this differs from cl:string-downcase:
+;; #\Latin_Capital_Letter_I_With_Dot_Above.
+(defun string-downcase-full (string &key (start 0) end)
+  _N"Given a string, returns a new string that is a copy of it with
+  all uppercase alphabetic characters converted to lowercase using
+  full case conversion.."
+  (declare (fixnum start))
+  (let* ((string (if (stringp string) string (string string)))
+	 (slen (length string)))
+    (declare (fixnum slen))
+    (with-output-to-string (s)
+      (lisp::with-one-string string start end offset
+        (let ((offset-slen (+ slen offset)))
+	  (declare (fixnum offset-slen))
+	  (write-string string s :start offset :end start)
+	  (do ((index start (1+ index)))
+	      ((= index (the fixnum end)))
+	    (declare (fixnum index))
+	    (multiple-value-bind (code wide)
+		(lisp:codepoint string index)
+	      (when wide (incf index))
+	      ;; Handle ASCII specially because this is called early in
+	      ;; initialization, before unidata is available.
+	      (cond ((< 64 code 91)
+		     (write-char (code-char (incf code 32)) s))
+		    ((> code 127)
+		     (write-string (lisp:unicode-full-case-lower code) s))
+		    (t
+		     ;; Handle codes below 64
+		     (multiple-value-bind (hi lo)
+			 (surrogates code)
+		       (write-char hi s)
+		       (when lo
+			 (write-char lo s)))))))
+	  (write-string string s :start end :end offset-slen))))))
+
+(defun string-downcase (string &key (start 0) end (casing :full))
+  _N"Given a string, returns a new string that is a copy of it with all
+  uppercase alphabetic characters converted to lowercase.  Casing is
+  :simple or :full for simple or full case conversion, respectively."
+
+  (declare (fixnum start))
+  (if (eq casing :simple)
+      (cl:string-downcase string :start start :end end)
+      (string-downcase-full string :start start :end end)))
+
+
+;;;
+;;; This is a Lisp translation of the Scheme code from William
+;;; D. Clinger that implements the word-breaking algorithm.  This is
+;;; used with permission.
+;;;
+;;; This version is modified from the original at
+;;; http://www.ccs.neu.edu/home/will/R6RS/ to conform to CMUCL's
+;;; implementation of the word break properties.
+;;;
+;;;
+;;; Copyright statement and original comments:
+;;;
+;;;--------------------------------------------------------------------------------
+
+;; Copyright 2006 William D Clinger.
+;;
+;; Permission to copy this software, in whole or in part, to use this
+;; software for any lawful purpose, and to redistribute this software
+;; is granted subject to the restriction that all copies made of this
+;; software must include this copyright and permission notice in full.
+;;
+;; I also request that you send me a copy of any improvements that you
+;; make to this software so that they may be incorporated within it to
+;; the benefit of the Scheme community.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Word-breaking as defined by Unicode Standard Annex #29.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Implementation notes.
+;;
+;; The string-foldcase, string-downcase, and string-titlecase
+;; procedures rely on the notion of a word, which is defined
+;; by Unicode Standard Annex 29.
+;;
+;; The string-foldcase and string-downcase procedures rely on
+;; word boundaries only when they encounter a Greek sigma, so
+;; their performance should not be greatly affected by the
+;; performance of the word-breaking algorithm.
+;;
+;; The string-titlecase procedure must find all word boundaries,
+;; but it is typically used on short strings (titles).
+;;
+;; Hence the performance of the word-breaking algorithm should
+;; not matter too much for this reference implementation.
+;; Word-breaking is more generally useful, however, so I tried
+;; to make this implementation reasonably efficient.
+;;
+;; Word boundaries are defined by 14 different rules in
+;; Unicode Standard Annex #29, and by GraphemeBreakProperty.txt
+;; and WordBreakProperty.txt.  See also WordBreakTest.html.
+;;
+;; My original implementation of those specifications failed
+;; 6 of the 494 tests in auxiliary/WordBreakTest.txt, but it
+;; appeared to me that those tests were inconsistent with the
+;; word-breaking rules in UAX #29.  John Cowan forwarded my
+;; bug report to the Unicode experts, and Mark Davis responded
+;; on 29 May 2007:
+;;
+;;   Thanks for following up on this. I think you have found a problem in the
+;;   formulation of word break, not the test. The intention was to break after a
+;;   Sep character, as is done in Sentence break. So my previous suggestion was
+;;   incorrect; instead, what we need is a new rule:
+;; 
+;;   *Break after paragraph separators.*
+;;    WB3a. Sep �
+;;   I'll make a propose to the UTC for this.
+;;
+;; Here is Will's translation of those rules (including WB3a)
+;; into a finite state machine that searches forward within a
+;; string, looking for the next position at which a word break
+;; is allowed.  The current state consists of an index i into
+;; the string and a summary of the left context whose rightmost
+;; character is at index i.  The left context is usually
+;; determined by the character at index i, but there are three
+;; complications:
+;;
+;;     Extend and Format characters are ignored unless they
+;;         follow a separator or the beginning of the text.
+;;     ALetter followed by MidLetter is treated specially.
+;;     Numeric followed by MidNum is treated specially.
+;;
+;; In the implementation below, the left context ending at i
+;; is encoded by the following symbols:
+;;
+;;     CR
+;;     Sep (excluding CR)
+;;     ALetter
+;;     MidLetter
+;;     ALetterMidLetter (ALetter followed by MidLetter)
+;;     Numeric
+;;     MidNum
+;;     NumericMidNum (Numeric followed by MidNum)
+;;     Katakana
+;;     ExtendNumLet
+;;     other (none of the above)
+;;
+;; Given a string s and an exact integer i (which need not be
+;; a valid index into s), returns the index of the next character
+;; that is not part of the word containing the character at i,
+;; or the length of s if the word containing the character at i
+;; extends through the end of s.  If i is negative or a valid
+;; index into s, then the returned value will be greater than i.
+;;
+;;;--------------------------------------------------------------------------------
+
+(defun string-next-word-break (s i)
+  _N"Given a string, S, and a starting index, return the index of the
+  next character that is not part of the word containing the character
+  at the index, or the length of S if the word containing the
+  character extends to the end of S.  If the index is negative or
+  valid index into S, the returned value will be strictly greater than
+  the index."
+  (let ((n (length s)))
+    (labels
+	((char-word-break-category (c)
+	   ;; Map our unicode word break property into what this
+	   ;; algorithm wants.
+	   (let ((cat (lisp::unicode-word-break c)))
+	     (case cat
+	       ((:lf :cr :newline)
+		:sep)
+	       ((:extend :format)
+		:extend-or-format)
+	       (otherwise cat))))
+	 (left-context (i)
+	   ;; Given a valid index i into s, returns the left context
+	   ;; at i.
+	   (multiple-value-bind (c widep)
+	       (lisp:codepoint s i n)
+	     (let* ((back
+		     ;; If we're at a regular character or a leading
+		     ;; surrogate, decrementing by 1 gets us the to
+		     ;; previous character.  But for a trailing
+		     ;; surrogate, we need to decrement by 2!
+		     (if (eql widep -1)
+			 2
+			 1))
+		    (cat (char-word-break-category c)))
+	       (case cat
+		 ((:sep)
+		  (if (= c (char-code #\return)) :cr cat))
+		 ((:midletter :midnumlet)
+		  (let ((i-1 (- i back)))
+		    (if (and (<= 0 i-1)
+			     (eq (left-context i-1) :aletter))
+			:aletter-midletter
+			cat)))
+		 ((:midnum :midnumlet)
+		  (let ((i-1 (- i back)))
+		    (if (and (<= 0 i-1)
+			     (eq (left-context i-1) :numeric))
+			:numeric-midnum
+			cat)))
+		 ((:extendorformat)
+		  (if (< 0 i)
+		      (left-context (- i back))
+		      :other))
+		 (otherwise cat)))))
+
+	 (index-of-previous-non-ignored (j)
+	   ;; Returns the index of the last non-Extend, non-Format
+	   ;; character within (substring s 0 j).  Should not be
+	   ;; called unless such a character exists.
+
+	   (let* ((j1 (- j 1)))
+	     (multiple-value-bind (c widep)
+		 (lisp:codepoint s j1)
+	       (when (eql widep -1)
+		 ;; Back up one more if we're at the trailing
+		 ;; surrogate.
+		 (decf j1))
+	       (let ((cat (char-word-break-category c)))
+		 (case cat
+		   ((:extend-or-format)
+		    (index-of-previous-non-ignored j1))
+		   (otherwise j1))))))
+
+	 (lookup (j context)
+	   ;; Given j and the context to the left of (not including) j,
+	   ;; returns the index at the start of the next word
+	   ;; (or before which a word break is permitted).
+
+	   (if (>= j n)
+	       (case context
+		 ((:aletter-midletter :numeric-midnum)
+		  (let ((j (index-of-previous-non-ignored n)))
+		    (if (< i j) j n)))
+		 (otherwise n))
+	       (multiple-value-bind (c widep)
+		   (lisp:codepoint s j)
+		 (let* ((next-j
+			 ;; The next character is either 1 or 2 code
+			 ;; units away.  For a leading surrogate, it's
+			 ;; 2; Otherwise just 1.
+			 (if (eql widep 1)
+			     2
+			     1))
+			(cat (char-word-break-category c)))
+		   (case cat
+		     ((:extend-or-format)
+		      (case context
+			((:cr :sep) j)
+			(otherwise (lookup (+ j next-j) context))))
+		     (otherwise
+		      (case context
+			((:cr)
+			 (if (= c (char-code #\linefeed))
+			     ;; Rule WB3:  Don't break CRLF, continue looking
+			     (lookup (+ j next-j) cat)
+			     j))
+			((:aletter)
+			 (case cat
+			   ((:aletter :numeric :extendnumlet)
+			    ;; Rules WB5, WB9, ?
+			    (lookup (+ j next-j) cat))
+			   ((:midletter :midnumlet)
+			    ;; Rule WB6, need to keep looking
+			    (lookup (+ j next-j) :aletter-midletter))
+			   (otherwise j)))
+			((:aletter-midletter)
+			 (case cat
+			   ((:aletter)
+			    ;; Rule WB7
+			    (lookup (+ j next-j) cat))
+			   (otherwise
+			    ;; Rule WB6 and WB7 were extended, but the
+			    ;; region didn't end with :aletter.  So
+			    ;; backup and break at that point.
+			    (let ((j2 (index-of-previous-non-ignored j)))
+			      (if (< i j2) j2 j)))))
+			((:numeric)
+			 (case cat
+			   ((:numeric :aletter :extendnumlet)
+			    ;; Rules WB8, WB10, ?
+			    (lookup (+ j next-j) cat))
+			   ((:midnum :midnumlet)
+			    ;; Rules WB11, need to keep looking
+			    (lookup (+ j next-j) :numeric-midnum))
+			   (otherwise j)))
+			((:numeric-midnum)
+			 (case cat
+			   ((:numeric)
+			    ;; Rule WB11, keep looking
+			    (lookup (+ j next-j) cat))
+			   (otherwise
+			    ;; Rule WB11, WB12 were extended, but the
+			    ;; region didn't end with :numeric, so
+			    ;; backup and break at that point.
+			    (let ((j2 (index-of-previous-non-ignored j)))
+			      (if (< i j2) j2 j)))))
+			((:midletter :midnum :midnumlet)
+			 ;; Rule WB14
+			 j)
+			((:katakana)
+			 (case cat
+			   ((:katakana :extendnumlet)
+			    ;; Rule WB13, WB13a
+			    (lookup (+ j next-j) cat))
+			   (otherwise j)))
+			((:extendnumlet)
+			 (case cat
+			   ((:extendnumlet :aletter :numeric :katakana)
+			    ;; Rule WB13a, WB13b
+			    (lookup (+ j next-j) cat))
+			   (otherwise j)))
+			((:regional_indicator)
+			 (case cat
+			   ((:regional_indicator)
+			    ;; Rule WB13c
+			    (lookup (+ j next-j) cat))
+			   (otherwise j)))
+			(otherwise j)))))))))
+      (declare (notinline lookup left-context))
+      (cond ((< i 0)
+	     ;; Rule WB1
+	     0)
+	    ((<= n i)
+	     ;; Rule WB2
+	     n)
+	    (t
+	     (multiple-value-bind (c widep)
+		 (lisp:codepoint s i)
+	       (declare (ignore c))
+	       (lookup (+ i (if (eql widep 1) 2 1)) (left-context i))))))))
+
+(defun string-capitalize-unicode (string &key (start 0) end (casing :simple))
+  "Capitalize String using the Unicode word-break algorithm to find
+  the words in String.  The beginning is capitalized depending on the
+  value of Casing"
+  (declare (type (member :simple :full :title) casing))
+  (let* ((string (if (stringp string) string (string string)))
+	 (slen (length string)))
+    (declare (fixnum slen))
+    (with-output-to-string (result)
+      (lisp::with-one-string string start end offset
+        (let ((offset-slen (+ slen offset)))
+	  (declare (fixnum offset-slen))
+
+	  (write-string string result :start 0 :end start)
+	  (let ((upper (ecase casing
+			 (:simple
+			  #'(lambda (ch)
+			      (multiple-value-bind (hi lo)
+				  (lisp::surrogates (lisp::unicode-upper ch))
+				(write-char hi result)
+				(when lo (write-char lo result)))))
+			 (:full
+			  #'(lambda (ch)
+			      (write-string (lisp::unicode-full-case-upper ch) result)))
+			 (:title
+			  #'(lambda (ch)
+			      (write-string (lisp::unicode-full-case-title ch) result))))))
+	    (do ((start start next)
+		 (next (string-next-word-break string start)
+		       (string-next-word-break string next)))
+		((or (= start next)
+		     (>= start end)))
+	      ;; Convert the first character of the word to upper
+	      ;; case, and then make the rest of the word lowercase.
+	      (funcall upper (lisp:codepoint string start))
+	      (write-string (string-downcase string :start (1+ start)
+						    :end next
+						    :casing casing)
+			    result
+			    :start (1+ start)
+			    :end next)))
+	  (write-string string result :start end :end offset-slen))))))
+
+(defun string-capitalize-full (string &key (start 0) end (casing :full))
+  "Capitalize String using the Common Lisp word-break algorithm to find
+  the words in String.  The beginning is capitalized depending on the
+  value of Casing"
+  (declare (fixnum start)
+	   (type (member :simple :full :title) casing))
+  (let* ((string (if (stringp string) string (string string)))
+	 (slen (length string)))
+    (declare (fixnum slen))
+    (with-output-to-string (s)
+      (lisp::with-one-string string start end offset
+        (let ((offset-slen (+ slen offset)))
+	  (declare (fixnum offset-slen))
+	  (write-string string s :start offset :end start)
+	  (flet ((alphanump (m)
+		   (or (< 47 m 58) (< 64 m 91) (< 96 m 123)
+		       #+(and unicode (not unicode-bootstrap))
+		       (and (> m 127)
+			    (<= lisp::+unicode-category-letter+
+				(lisp::unicode-category m)
+				(+ lisp::+unicode-category-letter+ #x0F)))))
+		 (upper (ch)
+		   (ecase casing
+		     (:simple
+		      #'(lambda (ch)
+			  (multiple-value-bind (hi lo)
+			      (lisp::surrogates (lisp::unicode-upper ch))
+			    (write-char hi s)
+			    (when lo (write-char lo s)))))
+		     (:full
+		      #'(lambda (ch)
+			  (write-string (lisp::unicode-full-case-upper ch) s)))
+		     (:title
+		      #'(lambda (ch)
+			  (write-string (lisp::unicode-full-case-title ch) s))))))
+	    (do ((index start (1+ index))
+		 (newword t))
+		((= index (the fixnum end)))
+	      (declare (fixnum index))
+	      (multiple-value-bind (code wide)
+		  (lisp:codepoint string index)
+		(when wide (incf index))
+		(cond ((not (alphanump code))
+		       (multiple-value-bind (hi lo)
+			   (surrogates code)
+			 (write-char hi s)
+			 (when lo (write-char lo s)))
+		       (setq newword t))
+		      (newword
+		       ;; Char is first case-modifiable after non-case-modifiable
+		       (funcall upper code)
+		       (setq newword ()))
+		      (t
+		       ;; char is case-modifiable, but not first
+		       (write-string (lisp:unicode-full-case-lower code) s))))))
+	  (write-string string s :start end :end offset-slen))))))
+
+(defun string-capitalize (string &key (start 0) end
+				 (casing :title)
+				 (unicode-word-break t))
+  _N"Given a string, returns a copy of the string with the first
+  character of each ``word'' converted to upper-case, and remaining
+  chars in the word converted to lower case. Casing is :simple, :full
+  or :title for simple, full or title case conversion, respectively.  If
+  Unicode-Word-Break is non-Nil, then the Unicode word-breaking
+  algorithm is used to determine the word boundaries.  Otherwise, A
+  ``word'' is defined to be a string of case-modifiable characters
+  delimited by non-case-modifiable chars.  "
+
+  (declare (fixnum start)
+	   (type (member :simple :full :title) casing))
+  (if unicode-word-break
+      (string-capitalize-unicode string :start start :end end :casing casing)
+      (if (eq casing :simple)
+	  (cl:string-capitalize string :start start :end end)
+	  (string-capitalize-full string :start start :end end :casing casing))))
diff --git a/src/code/unidata.lisp b/src/code/unidata.lisp
index 55e3a28b73ce5de1e4a794ff6d9afbe7dc5b51ee..351810075c8b5c0eb8503082f7723ec15444fff2 100644
--- a/src/code/unidata.lisp
+++ b/src/code/unidata.lisp
@@ -18,6 +18,15 @@
 
 (export '(string-to-nfd string-to-nfkc string-to-nfkd string-to-nfc
 	  unicode-complete unicode-complete-name
+	  unicode-full-case-lower
+	  unicode-full-case-upper
+	  unicode-full-case-title
+	  unicode-category
+	  +unicode-category-lower+
+	  +unicode-category-other+
+	  +unicode-category-graphic+
+	  +unicode-category-upper+
+	  +unicode-category-title+
 	  load-all-unicode-data))
 
 (defvar *unidata-path* #p"ext-formats:unidata.bin")
diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp
index 52299153e1b9927248a724b797313369798eccd8..052b4c1a2581e0ca8b2bdef9e07ba928cf3a6c57 100644
--- a/src/compiler/fndb.lisp
+++ b/src/compiler/fndb.lisp
@@ -851,13 +851,11 @@
   (sequence stringable) simple-string (flushable))
 
 (defknown (string-upcase string-downcase)
-  (stringable &key (:start index) (:end sequence-end) #+unicode (:casing case-conversion-type))
+  (stringable &key (:start index) (:end sequence-end))
   simple-string (flushable))
 
 (defknown (string-capitalize)
-  (stringable &key (:start index) (:end sequence-end)
-	      #+unicode (:casing case-conversion-type)
-	      #+unicode (:unicode-word-break boolean))
+  (stringable &key (:start index) (:end sequence-end))
   simple-string (flushable))
 
 (defknown (nstring-upcase nstring-downcase nstring-capitalize)
diff --git a/src/tools/worldbuild.lisp b/src/tools/worldbuild.lisp
index 9d0dbb68418c15691b4d0eca1b8803978633e1c0..4b5f36439fd07312314c86308c8c752b1c242b3f 100644
--- a/src/tools/worldbuild.lisp
+++ b/src/tools/worldbuild.lisp
@@ -186,6 +186,8 @@
     ,@(when (c:backend-featurep :mp)
 	    '("target:code/multi-proc"))
     "target:code/intl-tramp"
+    ,@(when (c::backend-featurep :unicode)
+	'("target:code/unicode"))
     ))
 
 (setf *genesis-core-name* "target:lisp/kernel.core")
diff --git a/src/tools/worldcom.lisp b/src/tools/worldcom.lisp
index f8ac65ac06ad3b7d00d20ee778af33997a4d7fcd..150d33cc1d71c26e82236eb92f232bd55d197fa5 100644
--- a/src/tools/worldcom.lisp
+++ b/src/tools/worldcom.lisp
@@ -324,6 +324,8 @@
 (comf "target:code/intl-tramp")
 (comf "target:code/intl")
 
+(when (c:backend-featurep :unicode)
+  (comf "target:code/unicode"))
 ); let *byte-compile-top-level*
 
 ); with-compiler-log-file