Fix ticket:68 by adding {{{BYTE-BASH-COPY}}}
authorRaymond Toy <toy.raymond@gmail.com>
Sun, 23 Dec 2012 18:38:36 +0000 (10:38 -0800)
committerRaymond Toy <toy.raymond@gmail.com>
Sun, 23 Dec 2012 18:38:36 +0000 (10:38 -0800)
code/bit-bash.lisp::
 Add {{{BYTE-BASH-COPY}}} for copying bytes

code/exports.lisp::
 Add {{{BYTE-BASH-COPY}}}

compiler/generic/vm-fndb.lisp::
 Add {{{BYTE-BASH-COPY}}}

compiler/generic/vm-tran.lisp::
 Call {{{BYTE-BASH-COPY}}} in the deftransform for {{{REPLACE}}}.

src/code/bit-bash.lisp
src/code/exports.lisp
src/compiler/generic/vm-fndb.lisp
src/compiler/generic/vm-tran.lisp

index 459925a..8f94a97 100644 (file)
@@ -27,6 +27,9 @@
 (defconstant unit-bits vm:word-bits
   "The number of bits to process at a time.")
 
+(defconstant unit-bytes vm:word-bytes
+  "The number of bytes to process at a time.")
+
 (defconstant max-bits (1- (ash 1 vm:word-bits))
   "The maximum number of bits that can be dealt with during a single call.")
 
@@ -40,6 +43,9 @@
 (deftype bit-offset ()
   `(integer 0 (,unit-bits)))
 
+(deftype byte-offset ()
+  `(integer 0 (,unit-bytes)))
+
 (deftype bit-count ()
   `(integer 1 (,unit-bits)))
 
    (do-unary-bit-bash src src-offset dst dst-offset length
                      #'%raw-bits #'%set-raw-bits #'%raw-bits)))
 
+(defun do-unary-byte-bash (src src-offset dst dst-offset length
+                          dst-ref-fn dst-set-fn src-ref-fn)
+  (declare (type offset src-offset dst-offset length)
+          (type function dst-ref-fn dst-set-fn src-ref-fn))
+  (multiple-value-bind (dst-word-offset dst-byte-offset)
+      (floor dst-offset unit-bytes)
+    (declare (type word-offset dst-word-offset)
+            (type byte-offset dst-byte-offset))
+    (multiple-value-bind (src-word-offset src-byte-offset)
+       (floor src-offset unit-bytes)
+      (declare (type word-offset src-word-offset)
+              (type byte-offset src-byte-offset))
+      (cond
+       ((<= (+ dst-byte-offset length) unit-bytes)
+        #+nil(format t "case 1, one word~%")
+        ;; We are only writing one word, so it doesn't matter what order
+        ;; we do it in.  But we might be reading from multiple words, so take
+        ;; care.
+        (cond
+          ((zerop length)
+           #+nil(format t "case 1a: 0 length~%")
+           ;; Actually, we aren't even writing one word.  This is real easy.
+           )
+          ((= length unit-bytes)
+           #+nil(format t "case 1b~%")
+           ;; dst-byte-offset must be equal to zero, or we would be writing
+           ;; multiple words.  If src-byte-offset is also zero, then we
+           ;; just transfer the single word.  Otherwise we have to extract bits
+           ;; from two src words.
+           (funcall dst-set-fn dst dst-word-offset
+                    (if (zerop src-byte-offset)
+                        (funcall src-ref-fn src src-word-offset)
+                        (32bit-logical-or
+                         (shift-towards-start
+                          (funcall src-ref-fn src src-word-offset)
+                          (* vm:byte-bits src-byte-offset))
+                         (shift-towards-end
+                          (funcall src-ref-fn src (1+ src-word-offset))
+                          (* vm:byte-bits (- src-byte-offset)))))))
+          (t
+           #+nil(format t "case 1c~%")
+           ;; We are only writing some portion of the dst word, so we need to
+           ;; preserve the extra bits.  Also, we still don't know if we need
+           ;; one or two source words.
+           (let ((mask (shift-towards-end (start-mask (* vm:byte-bits length))
+                                          (* vm:byte-bits dst-byte-offset)))
+                 (orig (funcall dst-ref-fn dst dst-word-offset))
+                 (value
+                   (if (> src-byte-offset dst-byte-offset)
+                       ;; The source starts further into the word than does
+                       ;; the dst, so the source could extend into the next
+                       ;; word.  If it does, we have to merge the two words,
+                       ;; and if not, we can just shift the first word.
+                       (let ((src-bit-shift (* vm:byte-bits (- src-byte-offset dst-byte-offset))))
+                         (if (> (+ src-byte-offset length) unit-bytes)
+                             (32bit-logical-or
+                              (shift-towards-start
+                               (funcall src-ref-fn src src-word-offset)
+                               src-bit-shift)
+                              (shift-towards-end
+                               (funcall src-ref-fn src (1+ src-word-offset))
+                               (- src-bit-shift)))
+                             (shift-towards-start
+                              (funcall src-ref-fn src src-word-offset)
+                              src-bit-shift)))
+                       ;; The dst starts further into the word than does the
+                       ;; source, so we know the source can't extend into
+                       ;; a second word (or else the dst would too, and we
+                       ;; wouldn't be in this branch).
+                       (shift-towards-end
+                        (funcall src-ref-fn src src-word-offset)
+                        (* vm:byte-bits (- dst-byte-offset src-byte-offset))))))
+             (declare (type unit mask orig value))
+             ;; Replace the dst word.
+             (funcall dst-set-fn dst dst-word-offset
+                      (32bit-logical-or
+                       (32bit-logical-and value mask)
+                       (32bit-logical-andc2 orig mask)))))))
+       ((= src-byte-offset dst-byte-offset)
+        #+nil(format t "case 2, aligned~%")
+        ;; The source and dst are aligned, so we don't need to shift
+        ;; anything.  But we have to pick the direction of the loop
+        ;; in case the source and dst are really the same thing.
+        (multiple-value-bind (words final-bytes)
+            (floor (+ dst-byte-offset length) unit-bytes)
+          (declare (type word-offset words) (type byte-offset final-bytes))
+          (let ((interior (floor (- length final-bytes) unit-bytes)))
+            (declare (type word-offset interior))
+            (cond
+              ((<= dst-offset src-offset)
+               #+nil(format t " case 2a: L-R~%")
+               ;; We need to loop from left to right
+               (unless (zerop dst-byte-offset)
+                 #+nil(format t "  case 2a1: dst-byte-offset = ~D~%" dst-byte-offset)
+                 ;; We are only writing part of the first word, so mask off the
+                 ;; bits we want to preserve.
+                 (let ((mask (end-mask (- dst-byte-offset)))
+                       (orig (funcall dst-ref-fn dst dst-word-offset))
+                       (value (funcall src-ref-fn src src-word-offset)))
+                   (declare (type unit mask orig value))
+                   (funcall dst-set-fn dst dst-word-offset
+                            (32bit-logical-or (32bit-logical-and value mask)
+                                              (32bit-logical-andc2 orig mask))))
+                 (incf src-word-offset)
+                 (incf dst-word-offset))
+               ;; Just copy the interior words.
+               (dotimes (i interior)
+                 (funcall dst-set-fn dst dst-word-offset
+                          (funcall src-ref-fn src src-word-offset))
+                 (incf src-word-offset)
+                 (incf dst-word-offset))
+               (unless (zerop final-bytes)
+                 #+nil(format t "  case 2a2: final-bytes = ~D~%" final-bytes)
+                 ;; We are only writing part of the last word.
+                 (let ((mask (start-mask (* vm:byte-bits final-bytes)))
+                       (orig (funcall dst-ref-fn dst dst-word-offset))
+                       (value (funcall src-ref-fn src src-word-offset)))
+                   (declare (type unit mask orig value))
+                   (funcall dst-set-fn dst dst-word-offset
+                            (32bit-logical-or
+                             (32bit-logical-and value mask)
+                             (32bit-logical-andc2 orig mask))))))
+              (t
+               #+nil(format t " case 2b: R-L~%")
+               ;; We need to loop from right to left.
+               (incf dst-word-offset words)
+               (incf src-word-offset words)
+               (unless (zerop final-bytes)
+                 #+nil(format t "  case 2b1: R-L final-bytes = ~D~%" final-bytes)
+                 (let ((mask (start-mask (* vm:byte-bits final-bytes)))
+                       (orig (funcall dst-ref-fn dst dst-word-offset))
+                       (value (funcall src-ref-fn src src-word-offset)))
+                   (declare (type unit mask orig value))
+                   (funcall dst-set-fn dst dst-word-offset
+                            (32bit-logical-or
+                             (32bit-logical-and value mask)
+                             (32bit-logical-andc2 orig mask)))))
+               (dotimes (i interior)
+                 (decf src-word-offset)
+                 (decf dst-word-offset)
+                 (funcall dst-set-fn dst dst-word-offset
+                          (funcall src-ref-fn src src-word-offset)))
+               (unless (zerop dst-byte-offset)
+                 #+nil(format t "  case 2b2: R-L dst-byte-offset = ~D~%" dst-byte-offset)
+                 (decf src-word-offset)
+                 (decf dst-word-offset)
+                 (let ((mask (end-mask (* vm:byte-bits (- dst-byte-offset))))
+                       (orig (funcall dst-ref-fn dst dst-word-offset))
+                       (value (funcall src-ref-fn src src-word-offset)))
+                   (declare (type unit mask orig value))
+                   (funcall dst-set-fn dst dst-word-offset
+                            (32bit-logical-or
+                             (32bit-logical-and value mask)
+                             (32bit-logical-andc2 orig mask))))))))))
+       (t
+        #+nil(format t "case 3, unaligned~%")
+        ;; They aren't aligned.
+        (multiple-value-bind (words final-bytes)
+            (floor (+ dst-byte-offset length) unit-bytes)
+          (declare (type word-offset words) (type byte-offset final-bytes))
+          (let ((src-shift (mod (- src-byte-offset dst-byte-offset) unit-bytes))
+                (interior (floor (- length final-bytes) unit-bytes)))
+            (declare (type byte-offset src-shift)
+                     (type word-offset interior))
+            (cond
+              ((<= dst-offset src-offset)
+               #+nil(format t "case 3a: L-R~%")
+               ;; We need to loop from left to right
+               (let ((prev 0)
+                     (next (funcall src-ref-fn src src-word-offset)))
+                 (declare (type unit prev next))
+                 (flet ((get-next-src ()
+                          (setf prev next)
+                          (setf next (funcall src-ref-fn src
+                                              (incf src-word-offset)))))
+                   (declare (inline get-next-src))
+                   (unless (zerop dst-byte-offset)
+                     (when (> src-byte-offset dst-byte-offset)
+                       (get-next-src))
+                     (let ((mask (end-mask (* vm:byte-bits (- dst-byte-offset))))
+                           (orig (funcall dst-ref-fn dst dst-word-offset))
+                           (value (32bit-logical-or
+                                   (shift-towards-start prev (* vm:byte-bits src-shift))
+                                   (shift-towards-end next (* vm:byte-bits (- src-shift))))))
+                       (declare (type unit mask orig value))
+                       (funcall dst-set-fn dst dst-word-offset
+                                (32bit-logical-or
+                                 (32bit-logical-and value mask)
+                                 (32bit-logical-andc2 orig mask)))
+                       (incf dst-word-offset)))
+                   (dotimes (i interior)
+                     (get-next-src)
+                     (let ((value (32bit-logical-or
+                                   (shift-towards-end next (* vm:byte-bits (- src-shift)))
+                                   (shift-towards-start prev (* vm:byte-bits src-shift)))))
+                       (declare (type unit value))
+                       (funcall dst-set-fn dst dst-word-offset value)
+                       (incf dst-word-offset)))
+                   (unless (zerop final-bytes)
+                     (let ((value
+                             (if (> (+ final-bytes src-shift) unit-bytes)
+                                 (progn
+                                   (get-next-src)
+                                   (32bit-logical-or
+                                    (shift-towards-end next (* vm:byte-bits (- src-shift)))
+                                    (shift-towards-start prev (* vm:byte-bits src-shift))))
+                                 (shift-towards-start next (* vm:byte-bits src-shift))))
+                           (mask (start-mask (*  vm:byte-bits final-bytes)))
+                           (orig (funcall dst-ref-fn dst dst-word-offset)))
+                       (declare (type unit mask orig value))
+                       (funcall dst-set-fn dst dst-word-offset
+                                (32bit-logical-or
+                                 (32bit-logical-and value mask)
+                                 (32bit-logical-andc2 orig mask))))))))
+              (t
+               #+nil(format t "case 3b: L-R~%")
+               ;; We need to loop from right to left.
+               (incf dst-word-offset words)
+               (incf src-word-offset
+                     (1- (ceiling (+ src-byte-offset length) unit-bytes)))
+               (let ((next 0)
+                     (prev (funcall src-ref-fn src src-word-offset)))
+                 (declare (type unit prev next))
+                 (flet ((get-next-src ()
+                          (setf next prev)
+                          (setf prev (funcall src-ref-fn src
+                                              (decf src-word-offset)))))
+                   (declare (inline get-next-src))
+                   (unless (zerop final-bytes)
+                     (when (> final-bytes (- unit-bytes src-shift))
+                       (get-next-src))
+                     (let ((value (32bit-logical-or
+                                   (shift-towards-end next (* vm:byte-bits (- src-shift)))
+                                   (shift-towards-start prev (* vm:byte-bits src-shift))))
+                           (mask (start-mask (* vm:byte-bits final-bytes)))
+                           (orig (funcall dst-ref-fn dst dst-word-offset)))
+                       (declare (type unit mask orig value))
+                       (funcall dst-set-fn dst dst-word-offset
+                                (32bit-logical-or
+                                 (32bit-logical-and value mask)
+                                 (32bit-logical-andc2 orig mask)))))
+                   (decf dst-word-offset)
+                   (dotimes (i interior)
+                     (get-next-src)
+                     (let ((value (32bit-logical-or
+                                   (shift-towards-end next (* vm:byte-bits (- src-shift)))
+                                   (shift-towards-start prev (* vm:byte-bits src-shift)))))
+                       (declare (type unit value))
+                       (funcall dst-set-fn dst dst-word-offset value)
+                       (decf dst-word-offset)))
+                   (unless (zerop dst-byte-offset)
+                     (if (> src-byte-offset dst-byte-offset)
+                         (get-next-src)
+                         (setf next prev prev 0))
+                     (let ((mask (end-mask (* vm:byte-bits (- dst-byte-offset))))
+                           (orig (funcall dst-ref-fn dst dst-word-offset))
+                           (value (32bit-logical-or
+                                   (shift-towards-start prev (* vm:byte-bits src-shift))
+                                   (shift-towards-end next (* vm:byte-bits (- src-shift))))))
+                       (declare (type unit mask orig value))
+                       (funcall dst-set-fn dst dst-word-offset
+                                (32bit-logical-or
+                                 (32bit-logical-and value mask)
+                                 (32bit-logical-andc2 orig mask)))))))))))))))
+  (undefined-value))
+
+(defun byte-bash-copy (src src-offset dst dst-offset length)
+  (declare (type offset src-offset dst-offset length))
+  (locally
+      (declare (optimize (speed 3) (safety 0))
+              (inline do-unary-bit-bash))
+    (do-unary-byte-bash src src-offset dst dst-offset length
+      #'%raw-bits #'%set-raw-bits #'%raw-bits)))
+
 (defun system-area-copy (src src-offset dst dst-offset length)
   (declare (type offset src-offset dst-offset length))
   (locally
index a46d5bb..5a9bdaf 100644 (file)
           "BINDING-STACK-POINTER-SAP" "BIT-BASH-AND" "BIT-BASH-ANDC1"
           "BIT-BASH-ANDC2" "BIT-BASH-CLEAR" "BIT-BASH-COPY" "BIT-BASH-EQV"
           "BIT-BASH-IOR" "BIT-BASH-LOGNAND" "BIT-BASH-LOGNOR" "BIT-BASH-NOT"
-          "BIT-BASH-ORC1" "BIT-BASH-ORC2" "BIT-BASH-SET" "BIT-BASH-XOR"
-          "BIT-INDEX" "BOGUS-ARGUMENT-TO-VALUES-LIST-ERROR" "BOOLE-CODE"
+          "BIT-BASH-ORC1" "BIT-BASH-ORC2" "BIT-BASH-SET" "BIT-BASH-XOR" "BIT-INDEX"
+          "BYTE-BASH-COPY"
+          "BOGUS-ARGUMENT-TO-VALUES-LIST-ERROR" "BOOLE-CODE"
           "BOOLEAN" "BYTE-SPECIFIER" "CALLABLE" "CHAR-INT"
           "SEQUENCE-COUNT"
           "CHECK-FOR-CIRCULARITY" "CODE-COMPONENT" "CODE-COMPONENT-P"
index 80e5630..893b201 100644 (file)
   t
   ())
 
-(defknown bit-bash-copy
+(defknown (bit-bash-copy byte-bash-copy)
          ((simple-unboxed-array (*)) vm::offset
           (simple-unboxed-array (*)) vm::offset vm::offset)
   t
index a90c87f..aa8cd4f 100644 (file)
 ;;;; Simple string transforms:
 
 (defconstant vector-data-bit-offset (* vm:vector-data-offset vm:word-bits))
+(defconstant vector-data-byte-offset (* vm:vector-data-offset vm:word-bytes))
 
 (deftransform subseq ((string start &optional (end nil))
                      (simple-string t &optional t))
 
       (locally
          (declare (optimize (safety 0)))
-       (bit-bash-copy string2
+       (byte-bash-copy string2
                       (the vm::offset
-                           (+ (the vm::offset (* start2 vm:char-bits))
-                              vector-data-bit-offset))
+                           (+ (the vm::offset (* start2 vm:char-bytes))
+                              vector-data-byte-offset))
                       string1
                       (the vm::offset
-                           (+ (the vm::offset (* start1 vm:char-bits))
-                              vector-data-bit-offset))
+                           (+ (the vm::offset (* start1 vm:char-bytes))
+                              vector-data-byte-offset))
                       (the vm::offset
                            (* (min (the vm::offset (- (or end1 (length string1))
                                                       start1))
                                    (the vm::offset (- (or end2 (length string2))
                                                       start2)))
-                              vm:char-bits)))
+                              vm:char-bytes)))
        string1)))
 
 ;; The original version of this deftransform seemed to cause the