/[cl-rope]/cl-rope/cl-rope.lisp
ViewVC logotype

Contents of /cl-rope/cl-rope.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (show annotations) (vendor branch)
Tue Jun 17 20:42:38 2008 UTC (5 years, 9 months ago) by pgijsels
Branch: MAIN, yoyo
CVS Tags: start, HEAD
Changes since 1.1: +0 -0 lines
Imported sources
1 ;;; -*- Mode: LISP; Package: ROPE; outline-regexp:";;;;;*" -*-
2 ;;;
3 ;;; CL-ROPE -- a rope package for Common Lisp
4 ;;; by Peter Gijsels, 2008
5 ;;;
6 ;;; An implementation of ropes (an alternative to strings) which supports
7 ;;; efficient (O(log n)) :
8 ;;; - concatenation
9 ;;; - subrope selection
10 ;;; - element selection
11 ;;;
12 ;;;; References:
13 ;;; * Hans-J. Boehm, Russ Atkinson, Michiael Pass, 1995, "Ropes: an
14 ;;; Alternative to Strings"
15 ;;; * SGI's rope class implementation in C++ (an extension to the STL).
16 ;;;
17 ;;; Limitations:
18 ;;; * There is currently no support for producing functions or lazily
19 ;;; copying of substrings.
20 ;;;
21 ;;;; Licence:
22 ;;;
23 ;;; Permission is hereby granted, free of charge, to any person
24 ;;; obtaining a copy of this software and associated documentation files
25 ;;; (the "Software"), to deal in the Software without restriction,
26 ;;; including without limitation the rights to use, copy, modify, merge,
27 ;;; publish, distribute, sublicense, and/or sell copies of the Software,
28 ;;; and to permit persons to whom the Software is furnished to do so,
29 ;;; subject to the following conditions:
30 ;;;
31 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
34 ;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
35 ;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
36 ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
37 ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38
39 (eval-when (:compile-toplevel :load-toplevel :execute)
40
41 (defpackage cl-rope
42 (:use :cl)
43 (:export
44 #:advance
45 #:at-end-p
46 #:copy-rope-iterator
47 #:current-elt
48 #:iterator-position
49 #:make-rope-iterator
50 #:read-rope-from-stream
51 #:rope
52 #:rope-concatenate
53 #:rope-length
54 #:rope-elt
55 #:sub-rope)))
56
57 (in-package cl-rope)
58
59 (declaim (optimize (speed 3) (debug 0)))
60
61 (defparameter *min-balanced-rope-lengths*
62 (let ((fibs (list 2 1)))
63 (loop for next-fib = (+ (first fibs) (second fibs))
64 while (<= next-fib most-positive-fixnum)
65 do (push next-fib fibs))
66 (make-array (length fibs)
67 :element-type 'fixnum
68 :initial-contents (nreverse fibs)))
69 "A balanced rope of depth i has a length of at least
70 (aref *min-balanced-rope-lengths* i), the (+ i 2)'th Fibonacci number.")
71
72 (defparameter *max-rope-depth* (1- (length *min-balanced-rope-lengths*)))
73
74 (defparameter *max-print-length* 40
75 "Ropes with a length larger than *max-print-length* will be abbreviated
76 when printed.")
77
78 (defparameter *short-cut-off* 23
79 "When concatenating leafs that are shorter than *short-cut-off*, we
80 will allocate a leaf with a new string.")
81
82 (defparameter *initial-rope-chunk-size* 256
83 "The initial length of the leaves when reading a rope from a stream.")
84
85 (declaim (type fixnum *short-cut-off* *initial-rope-chunk-size* *max-rope-depth*))
86
87 ;;;; Ropes
88 (defstruct (rope (:constructor nil))
89 (balanced-p nil) ; implies (>= length (aref *min-balanced-rope-lengths* depth))
90 (depth 0 :type fixnum)
91 (length 0 :type fixnum))
92
93 (defstruct (leaf (:include rope) (:constructor %make-leaf))
94 (string "" :type (simple-array character (*))))
95
96 (defstruct (concat-node (:include rope) (:constructor %make-concat-node))
97 left right)
98
99 ;;;;; Construction
100 (declaim (inline %make-leaf make-leaf %make-concat-node))
101
102 (defun rope (x)
103 "Make a rope for a string or character X."
104 (make-leaf x))
105
106 (defun make-leaf (x)
107 (etypecase x
108 (string (%make-leaf :string x :length (length x) :balanced-p t))
109 (character (%make-leaf :string (string x) :length 1 :balanced-p t))))
110
111 (defun make-concat-node (left right &optional (balanced-p nil))
112 (%make-concat-node :depth (1+ (max (rope-depth left) (rope-depth right)))
113 :length (+ (rope-length left) (rope-length right))
114 :left left
115 :right right
116 :balanced-p balanced-p))
117
118 (defun read-rope-from-stream (stream)
119 "Read a rope from STREAM."
120 (let ((string (make-string *initial-rope-chunk-size*))
121 (result (rope "")))
122 (loop for chars-read = (read-sequence string stream)
123 do (setf result (rope-concatenate result
124 (rope (subseq string 0 chars-read))))
125 while (= chars-read *initial-rope-chunk-size*))
126 result))
127
128 ;;;;; Sub ropes
129 (defun sub-rope (rope start &optional end)
130 "Return a sub-rope of ROPE starting with element number START
131 and continuing to the end of the ROPE or the optional END."
132 (let ((l (rope-length rope)))
133 (unless (<= 0 start (or end l) l)
134 (error "The bounding indices ~A and ~A are bad for a sequence of length ~A."
135 start
136 end
137 l))
138 (labels
139 ((rec (rope start end)
140 (declare (type fixnum start end))
141 (declare (optimize (speed 3) (safety 0)))
142 (etypecase rope
143 (leaf (make-leaf (subseq (leaf-string rope) start end)))
144 (concat-node
145 (let* ((l (concat-node-left rope))
146 (r (concat-node-right rope))
147 (mid (rope-length l)))
148 (declare (type fixnum mid))
149 (cond
150 ((<= end mid)
151 (rec l start end))
152 ((<= mid start)
153 (rec r (- start mid) (- end mid)))
154 ((and (= 0 start) (= (rope-length rope) end))
155 rope)
156 (t
157 (make-concat-node
158 (rec-left l start)
159 (rec-right r (- end mid)))))))))
160 (rec-left (rope start)
161 "An optimized version of rec for the left subtree of a concat-node."
162 (declare (type fixnum start))
163 (declare (optimize (speed 3) (safety 0)))
164 (if (zerop start)
165 rope
166 (etypecase rope
167 (leaf (make-leaf (subseq (leaf-string rope) start)))
168 (concat-node
169 (let* ((l (concat-node-left rope))
170 (r (concat-node-right rope))
171 (mid (rope-length l)))
172 (declare (type fixnum mid))
173 (if (<= mid start)
174 (rec-left r (- start mid))
175 (make-concat-node
176 (rec-left l start)
177 r)))))))
178 (rec-right (rope end)
179 "An optimized version of rec for the right subtree of a concat-node."
180 (declare (type fixnum end))
181 (declare (optimize (speed 3) (safety 0)))
182 (if (= end (rope-length rope))
183 rope
184 (etypecase rope
185 (leaf (make-leaf (subseq (leaf-string rope) 0 end)))
186 (concat-node
187 (let* ((l (concat-node-left rope))
188 (mid (rope-length l)))
189 (declare (type fixnum mid))
190 (if (<= end mid)
191 (rec-right l end)
192 (make-concat-node
193 l
194 (rec-right (concat-node-right rope) (- end mid))))))))))
195 (rec rope start (or end l)))))
196
197 ;;;;; Concatenation (and balancing)
198 (declaim (inline short-leaf-p))
199 (defun short-leaf-p (rope)
200 (and (leaf-p rope)
201 (< (rope-length rope) *short-cut-off*)))
202
203 (declaim (inline concatenate-leaves))
204 (defun concatenate-leaves (leaf1 leaf2)
205 (make-leaf (concatenate 'string (leaf-string leaf1) (leaf-string leaf2))))
206
207 (defun rope-concatenate (rope1 rope2)
208 "Return a rope that is the concatenation of ROPE1 and ROPE2."
209 (concat rope1 rope2))
210
211 (declaim (inline leaf-concat))
212 (defun leaf-concat (leaf1 leaf2)
213 (make-leaf (concatenate 'string (leaf-string leaf1) (leaf-string leaf2))))
214
215 (defun tree-concat (left right)
216 (let ((result (make-concat-node left right)))
217 (if (and (> (rope-depth result) 20)
218 (or (< (rope-length result) 1000)
219 (> (rope-depth result) *max-rope-depth*)))
220 (balance result)
221 result)))
222
223 (defun concat (left right)
224 (cond ((null left) right)
225 ((null right) left)
226 ((short-leaf-p right)
227 ;; Optimize the case of repeatedly appending one char.
228 (if (short-leaf-p left)
229 (leaf-concat left right)
230 (let ((l-r (concat-node-right left)))
231 (if (short-leaf-p l-r)
232 (make-concat-node (concat-node-left left)
233 (leaf-concat l-r right))
234 (tree-concat left right)))))
235 (t (tree-concat left right))))
236
237 (defun concat-and-set-balanced (left right)
238 (let ((result (concat left right)))
239 (if (calculate-balanced result)
240 (setf (rope-balanced-p result) t))
241 result))
242
243 (defun calculate-balanced (rope)
244 (>= (rope-length rope) (aref *min-balanced-rope-lengths* (rope-depth rope))))
245
246 (defun balance (r)
247 (let ((forest (make-array (1+ *max-rope-depth*) :initial-element nil))
248 (result nil))
249 ;; (aref forest i) is one of the following:
250 ;; - is nil
251 ;; - is balanced and has a length in the interval [F(i+2),F(i+3))
252 ;; The concatenation of the sequence of ropes in order of decreasing
253 ;; length is equivalent to the prefix of R traversed so far.
254 (add-to-forest r forest)
255 (loop for y across forest
256 if y
257 do (setf result (concat y result)))
258 result))
259
260 (defun add-to-forest (r forest)
261 (if (rope-balanced-p r)
262 (add-leaf-to-forest r forest)
263 (progn
264 (add-to-forest (concat-node-left r) forest)
265 (add-to-forest (concat-node-right r) forest))))
266
267 (defun add-leaf-to-forest (r forest)
268 (let* ((too-tiny nil)
269 (s (rope-length r))
270 (i (loop
271 for i from 0
272 while (>= s (aref *min-balanced-rope-lengths* (1+ i)))
273 for tree = (aref forest i)
274 if tree
275 do (progn
276 (setf too-tiny (concat-and-set-balanced tree too-tiny))
277 (setf (aref forest i) nil))
278 finally (return i)))
279 (insertee (concat-and-set-balanced too-tiny r)))
280 (loop
281 for i from i
282 for tree = (aref forest i)
283 if tree
284 do (progn
285 (setf insertee (concat-and-set-balanced tree insertee))
286 (setf (aref forest i) nil))
287 until (or (= i *max-rope-depth*)
288 (< (rope-length insertee) (aref *min-balanced-rope-lengths* (1+ i))))
289 finally (setf (aref forest i) insertee))))
290
291 ;;;;; Element selection
292 (defun rope-elt (rope index)
293 (labels
294 ((rec (rope index)
295 (declare (fixnum index))
296 (declare (optimize (speed 3) (safety 0)))
297 (etypecase rope
298 (leaf (elt (leaf-string rope) index))
299 (concat-node
300 (let* ((l (concat-node-left rope))
301 (mid (rope-length l)))
302 (if (< index mid)
303 (rec l index)
304 (rec (concat-node-right rope) (- index mid))))))))
305 (rec rope index)))
306
307 ;;;;; Utilities
308 (defun rope->string (rope)
309 (let ((result (make-string (rope-length rope)))
310 (pos 0))
311 (loop
312 for leaf in (leaves rope)
313 for string = (leaf-string leaf)
314 do (replace result string :start1 pos)
315 do (incf pos (length string)))
316 result))
317
318 (defmethod print-object ((x rope) stream)
319 (let ((l (rope-length x)))
320 (print-unreadable-object (x stream :type t)
321 (format stream "(~A) " l)
322 (if (< l *max-print-length*)
323 (format stream "~A" (rope->string x))
324 (format stream "~A..." (rope->string (sub-rope x 0 *max-print-length*)))))))
325
326 (defun leaves (rope)
327 (if (leaf-p rope)
328 (list rope)
329 (nconc (leaves (concat-node-left rope))
330 (leaves (concat-node-right rope)))))
331
332 ;;;;; Rope iterators
333 ;; Only the leaf is stored, not the complete path to the leaf. This
334 ;; saves some space and the code becomes simpler.
335 ;; This might be suboptimal speedwise, but in practice it doesn't make
336 ;; much difference.
337 (defstruct (rope-iterator (:constructor %make-rope-iterator (rope)))
338 rope
339 leaf
340 (leaf-start 0 :type fixnum)
341 (offset 0 :type fixnum))
342
343 (defun iterator-position (iterator)
344 "Return the current position of ITERATOR (or length if at end)."
345 (if (at-end-p iterator)
346 (rope-length (rope-iterator-rope iterator))
347 (+ (rope-iterator-leaf-start iterator)
348 (rope-iterator-offset iterator))))
349
350 (defun make-rope-iterator (rope &optional (i 0))
351 (initialize-rope-iterator (%make-rope-iterator rope) i))
352
353 (defun initialize-rope-iterator (iterator i)
354 (symbol-macrolet ((rope (rope-iterator-rope iterator))
355 (leaf (rope-iterator-leaf iterator))
356 (leaf-start (rope-iterator-leaf-start iterator))
357 (offset (rope-iterator-offset iterator)))
358 (labels ((rec (rope s o)
359 (declare (fixnum s o))
360 (declare (optimize (speed 3) (safety 0)))
361 (etypecase rope
362 (leaf
363 (if (>= o (rope-length rope))
364 (setf leaf nil) ; at end
365 (progn
366 (setf leaf rope)
367 (setf leaf-start s)
368 (setf offset o))))
369 (concat-node
370 (let* ((l (concat-node-left rope))
371 (mid (rope-length l)))
372 (if (< o mid)
373 (rec l s o)
374 (rec (concat-node-right rope) (+ s mid) (- o mid))))))))
375 (rec rope 0 i))
376 iterator))
377
378 (defun advance (iterator &optional (n 1))
379 "Advance ITERATOR by N positions (N should be >= 0)."
380 (declare (type fixnum n))
381 (assert (not (minusp n)))
382 (symbol-macrolet ((rope (rope-iterator-rope iterator))
383 (leaf (rope-iterator-leaf iterator))
384 (leaf-start (rope-iterator-leaf-start iterator))
385 (offset (rope-iterator-offset iterator)))
386 (unless (at-end-p iterator)
387 (let ((l (rope-length leaf)))
388 (if (< (+ offset n) l)
389 (incf offset n) ; we're staying in the current leaf
390 (initialize-rope-iterator iterator (+ leaf-start offset n)))))
391 iterator))
392
393 (defun current-elt (iterator)
394 "Return the current element or nil if ITERATER is at-end-p."
395 (if (at-end-p iterator)
396 nil
397 (rope-elt (rope-iterator-leaf iterator) (rope-iterator-offset iterator))))
398
399 (defun at-end-p (iterator)
400 (null (rope-iterator-leaf iterator)))
401
402 ;;;; Tests
403 #+5am(eval-when (:compile-toplevel :load-toplevel :execute)
404 (use-package :fiveam))
405
406 #+5am
407 (progn
408 (def-suite rope-test-suite)
409 (in-suite rope-test-suite)
410
411 ;; In the tests we're using sexps to represent the internal structure of
412 ;; a rope. A sexp can be
413 ;; - a string or character: corresponds to a leaf
414 ;; - a list of exactly two elements: corresponds to a concat-node
415 (defun sexp->rope (sexp)
416 (if (or (characterp sexp) (stringp sexp))
417 (rope sexp)
418 (progn
419 (assert (= 2 (length sexp)))
420 (make-concat-node (sexp->rope (first sexp))
421 (sexp->rope (second sexp))))))
422
423 (defun rope->sexp (rope)
424 (etypecase rope
425 (leaf (leaf-string rope))
426 (concat-node (list (rope->sexp (concat-node-left rope))
427 (rope->sexp (concat-node-right rope))))))
428
429 (defun mapply (fn list)
430 (mapcar #'(lambda (x) (apply fn x)) list))
431
432 (test min-rope-lengths-test
433 (is (equalp
434 (loop for i below 7 collect (aref *min-balanced-rope-lengths* i))
435 '(1 2 3 5 8 13 21))))
436
437 (test length-test
438 (mapply #'(lambda (sexp expected-length)
439 (is (= (rope-length (sexp->rope sexp)) expected-length)))
440 '(("" 0)
441 ("abc" 3)
442 (("foob" "ar") 6))))
443
444 (test leaves-test
445 (mapply #'(lambda (sexp expected)
446 (equalp (leaves (sexp->rope sexp)) expected))
447 '(("" ())
448 ("abc" ("abc"))
449 ((("foo" "bar") "baz") ("foo" "bar" "baz")))))
450
451 (test rope->string-test
452 (mapply #'(lambda (sexp string)
453 (is (string= (rope->string (sexp->rope sexp))
454 string)))
455 '(("" "")
456 (#\a "a")
457 ("abc" "abc")
458 (("foo" ("bar" "baz")) "foobarbaz"))))
459
460 (test rope-concatenate-test
461 (mapply #'(lambda (sexp1 sexp2 sexp3)
462 (is (equalp (rope->sexp (rope-concatenate (sexp->rope sexp1)
463 (sexp->rope sexp2)))
464 sexp3)))
465 '(("" "" "")
466 ("abc" "def" "abcdef")
467 (("abc" "def") "g" ("abc" "defg"))
468 ("abc" ("def" "g") ("abc" ("def" "g")))
469 (("abc" "def") ("gh" "i") (("abc" "def") ("gh" "i"))))))
470
471 (test sub-rope-test
472 (mapply #'(lambda (sexp start end string)
473 (is (string= (rope->string (sub-rope (sexp->rope sexp)
474 start end))
475 string)))
476 '(("abc" 0 0 "")
477 ("abc" 0 1 "a")
478 ("abc" 0 2 "ab")
479 ("abc" 0 3 "abc")
480 ("abc" 1 1 "")
481 ("abc" 1 2 "b")
482 ("abc" 1 3 "bc")
483 ("abc" 3 3 "")
484 (("ab" "cd") 0 0 "")
485 (("ab" "cd") 0 1 "a")
486 (("ab" "cd") 0 2 "ab")
487 (("ab" "cd") 0 3 "abc")
488 (("ab" "cd") 0 4 "abcd")
489 (("ab" "cd") 1 1 "")
490 (("ab" "cd") 1 2 "b")
491 (("ab" "cd") 1 3 "bc")
492 (("ab" "cd") 1 4 "bcd")
493 (("ab" "cd") 2 2 "")
494 (("ab" "cd") 2 3 "c")
495 (("ab" "cd") 2 4 "cd")
496 (("ab" "cd") 3 3 "")
497 (("ab" "cd") 3 4 "d")
498 (("ab" "cd") 4 4 "")))
499 (let ((rope (sexp->rope '((("abc" "def") "ghi")
500 ("jkl" (("mno" "pqr")
501 ("stu" ("vw" "xyz"))))))))
502 (loop for i below 26
503 do (setf rope (rope-concatenate (sub-rope rope 0 i)
504 (sub-rope rope i 26))))
505 (is (string= (rope->string rope)
506 "abcdefghijklmnopqrstuvwxyz"))
507 (loop for i below 26
508 do (setf rope (rope-concatenate (sub-rope rope i 26)
509 (sub-rope rope 0 i))))
510 (is (= (rope-length rope) 26))))
511
512 (test balance-test
513 (let ((*short-cut-off* 1))
514 (mapply #'(lambda (sexp expected)
515 (is (equalp (rope->sexp (balance (sexp->rope sexp)))
516 expected)))
517 '(("" "")
518 ("abc" "abc")
519 (("abc" "de") ("abc" "de"))
520 ((("abc" "d") "e") ("abc" ("d" "e")))
521 (((("a" ("b" "c")) "d") ("e" ("f" "g")))
522 ((("a" "b") ("c" "d")) (("e" "f") "g")))))))
523
524 (test rope-elt-test
525 (mapply #'(lambda (sexp index expected)
526 (is (eql (rope-elt (sexp->rope sexp) index) expected)))
527 '((("ab" "cd") 0 #\a)
528 (("ab" "cd") 1 #\b)
529 (("ab" "cd") 2 #\c)
530 (("ab" "cd") 3 #\d))))
531
532 (test rope-iterator-test
533 (let* ((rope (sexp->rope '((("abc" "def") "ghi")
534 ("jkl" (("mno" "pqr")
535 ("stu" ("vw" "xyz")))))))
536 (it (make-rope-iterator rope)))
537 (is (string= (coerce (loop
538 until (at-end-p it)
539 collect (current-elt it)
540 do (advance it))
541 'string)
542 (rope->string rope))))
543 (let ((it (make-rope-iterator (sexp->rope '("a" ("bcd" "efg"))))))
544 (is (zerop (iterator-position it)))
545 (is (char= (current-elt it) #\a))
546 (advance it 2)
547 (is (= (iterator-position it) 2))
548 (is (char= (current-elt it) #\c))
549 (advance it 1)
550 (is (= (iterator-position it) 3))
551 (is (char= (current-elt it) #\d))
552 (advance it 10)
553 (is (at-end-p it))
554 (is (= (iterator-position it) 7))
555 (is (null (current-elt it))))))

  ViewVC Help
Powered by ViewVC 1.1.5