Newer
Older
Francois-Rene Rideau
committed
;;; -*- Mode: Lisp ; Base: 10 ; Syntax: ANSI-Common-Lisp -*-
;;; "Fast Mergable Integer Maps"
;;; See article of same name by Chris Okasaki & Andrew Gill, 1998
Francois-Rene Rideau
committed
;;; http://www.eecs.usma.edu/webs/people/okasaki/ml98maps.ps
;;; Under the hood: Big Endian Patricia Trees (Tries).
;;; Note however that in our API, what they call "merge" is called "join".
Francois-Rene Rideau
committed
#+xcvb
(module
(:depends-on
("interface/interface" "pure/package" "pure/map" "pure/alist" "pure/tree")))
Francois-Rene Rideau
committed
(in-package :pure)
Francois-Rene Rideau
committed
(defclass <fmim>
(<map> <tree>
map-simple-empty map-simple-decons map-simple-update-key
map-simple-map/2 map-simple-join/list map-simple-size
map-simple-for-each map-simple-divide/list)
Francois-Rene Rideau
committed
()
(:documentation "Fast Merge Integer Maps"))
Francois-Rene Rideau
committed
(defparameter <fmim> (make-instance '<fmim>))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
;;; (big-endian) patricia tree (aka trie)
(defclass trie-head (simple-value-box)
Francois-Rene Rideau
committed
((height
:type fixnum
:initform 0
:initarg :height
:reader node-height)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defclass trie-node () ())
Francois-Rene Rideau
committed
(defclass trie-skip (trie-node box)
Francois-Rene Rideau
committed
((prefix-bits
:type (integer 0 *)
:initarg :prefix-bits
:reader node-prefix-bits)
(prefix-length
:type fixnum
:initarg :prefix-length
:reader node-prefix-length)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defclass trie-branch (trie-node binary-branch) ())
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defclass full-trie-branch (trie-branch) ())
;;; Not needed: position tells us! (defclass trie-leaf (trie-node box) ())
Francois-Rene Rideau
committed
(defmethod check-invariant ((i <fmim>) (map trie-head) &key)
(trie-check-invariant (box-ref map) (node-height map) 0))
(defun trie-check-invariant (trie position key)
(declare (optimize (debug 3)))
(check-type position (unsigned-byte))
(check-type key (unsigned-byte))
(assert (zerop (ldb (byte position 0) key)))
(unless (zerop position)
(etypecase trie
(trie-skip
(let ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie)))
(check-type pbits (unsigned-byte))
(check-type plen (integer 1 *))
(assert (<= (integer-length pbits) plen))
(assert (<= plen position))
(let ((pos (- position plen)))
(trie-check-invariant (box-ref trie) pos (dpb pbits (byte plen pos) key)))))
(trie-branch
(let ((pos (1- position)))
(trie-check-invariant (left trie) pos key)
(trie-check-invariant (right trie) pos (dpb 1 (byte 1 pos) key))))))
(values))
(defmethod lookup ((i <fmim>) map key)
(check-type map (or null trie-head))
Francois-Rene Rideau
committed
(check-type key (integer 0 *))
(if map
(let ((len (integer-length key))
(height (node-height map)))
(if (< height len)
(values nil nil)
(trie-lookup (box-ref map) height key)))
Francois-Rene Rideau
committed
(values nil nil)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun trie-lookup (trie position key)
(cond
((zerop position) (values trie t))
((null trie) (values nil nil))
(t
(check-type trie trie-node)
(assert (plusp position))
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(if (= pbits (ldb (byte plen pos) key))
(trie-lookup (box-ref trie) pos key)
Francois-Rene Rideau
committed
(values nil nil))))
(trie-branch
(let ((pos (1- position)))
(trie-lookup
(if (zerop (ldb (byte 1 pos) key))
(left trie)
(right trie))
pos key)))))))
(defun make-trie-leaf (position key value)
(if (zerop position)
value
(make-trie-skip position position (ldb (byte position 0) key) value)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun make-trie-skip (position length bits datum)
(cond
((zerop length)
datum)
((and (plusp position) (null datum))
nil)
((and (> position length) (typep datum 'trie-skip))
(make-instance
'trie-skip
:prefix-length (+ length (node-prefix-length datum))
:prefix-bits (dpb bits (byte length (node-prefix-length datum))
(node-prefix-bits datum))
Francois-Rene Rideau
committed
(t
(make-instance
'trie-skip
:prefix-length length
:prefix-bits bits
:datum datum))))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun make-trie-branch (pos left right)
(cond
((or (zerop pos)
(and (typep left 'full-trie-branch)
(typep right 'full-trie-branch)))
(make-instance 'full-trie-branch :left left :right right))
((and left right)
(make-instance 'trie-branch :left left :right right))
(left
(make-trie-skip pos 1 0 left))
(right
(make-trie-skip pos 1 1 right))
(t
nil)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun make-trie-head (height trie)
(cond
((and (plusp height) (null trie))
nil)
((and (plusp height)
(typep trie 'trie-skip)
(zerop (ldb (byte 1 (1- (node-prefix-length trie))) (node-prefix-bits trie))))
(let* ((plen (integer-length (node-prefix-bits trie)))
(height (- height (- (node-prefix-length trie) plen)))
(trie (make-trie-skip height plen (node-prefix-bits trie) datum)))
(make-instance 'trie-head :height height :datum trie)))
(t
(make-instance 'trie-head :height height :datum trie))))
Francois-Rene Rideau
committed
(defmethod insert ((i <fmim>) map key value)
Francois-Rene Rideau
committed
(check-type map (or null trie-head))
(check-type key (integer 0 *))
(let ((len (integer-length key)))
Francois-Rene Rideau
committed
(multiple-value-bind (l d)
(if (null map)
(values len (make-trie-skip len len key value))
(let ((height (node-height map))
(if (< height len)
(values len
(make-trie-branch
len
(make-trie-skip len (- len height 1) 0 trie)
(make-trie-leaf (1- len) key value)))
(values height
(trie-insert trie height key value)))))
Francois-Rene Rideau
committed
(make-trie-head l d))))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun trie-insert (trie position key value)
(if (zerop position) value
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(if (= pbits (ldb (byte plen pos) key))
(make-trie-skip position plen pbits
(trie-insert (box-ref trie) pos key value))
(let* ((datum (box-ref trie))
Francois-Rene Rideau
committed
(len (1- plen))
(pos (1- position))
(trie1 (make-trie-skip
position len (ldb (byte len 0) pbits) datum))
(hb (ldb (byte 1 len) pbits))
(if (= hb new-hb)
position 1 hb
(trie-insert trie1 pos key value))
(let ((leaf (make-trie-leaf pos key value)))
(if (zerop new-hb)
(make-trie-branch pos leaf trie1)
(make-trie-branch pos trie1 leaf))))))))
Francois-Rene Rideau
committed
(trie-branch
(let ((pos (1- position)))
(if (zerop (ldb (byte 1 pos) key))
(make-trie-branch
pos
(trie-insert (left trie) pos key value)
(right trie))
(make-trie-branch
pos
(left trie)
(trie-insert (right trie) pos key value))))))))
Francois-Rene Rideau
committed
(defmethod drop ((i <fmim>) map key)
Francois-Rene Rideau
committed
(check-type map (or null trie-head))
(multiple-value-bind (v f)
(lookup i map key)
Francois-Rene Rideau
committed
(if f
(values
(multiple-value-bind (datum non-empty-p)
(trie-drop (box-ref map) (node-height map) key)
(when non-empty-p
(make-trie-head (node-height map) datum)))
v f)
Francois-Rene Rideau
committed
(values map nil nil))))
Francois-Rene Rideau
committed
(defun trie-drop (trie position key)
;; from our contract with drop,
Francois-Rene Rideau
committed
;; we do assume the key IS in fact in the trie.
(if (zerop position)
(values nil nil)
Francois-Rene Rideau
committed
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(assert (= pbits (ldb (byte plen pos) key)))
(multiple-value-bind (datum non-empty-p)
(trie-drop (box-ref trie) pos key)
(if non-empty-p
(values (make-trie-skip position plen pbits datum) t)
(values nil nil)))))
Francois-Rene Rideau
committed
(trie-branch
(let* ((pos (1- position))
(bit (ldb (byte 1 pos) key)))
(values
(cond
((zerop pos)
(make-trie-skip 1 1 (- 1 bit)
(if (zerop bit) (right trie) (left trie))))
((zerop bit)
Francois-Rene Rideau
committed
(make-trie-branch
position
(trie-drop (left trie) pos key)
(right trie)))
(t
Francois-Rene Rideau
committed
(make-trie-branch
position
Francois-Rene Rideau
committed
(left trie)
(trie-drop (right trie) pos key))))
t))))))
Francois-Rene Rideau
committed
(defmethod first-key-value ((i <fmim>) map)
(leftmost i map))
Francois-Rene Rideau
committed
(defmethod fold-left ((i <fmim>) map f seed)
Francois-Rene Rideau
committed
(if (null map)
seed
(trie-fold-left (box-ref map) (node-height map) 0 f seed)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun trie-fold-left (trie position key f seed)
(if (zerop position)
(funcall f seed key trie)
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(trie-fold-left
(box-ref trie) pos (dpb pbits (byte plen pos) key) f seed)))
Francois-Rene Rideau
committed
(trie-branch
(let ((pos (1- position)))
(trie-fold-left
(right trie) pos (dpb 1 (byte 1 pos) key) f
(trie-fold-left
(left trie) pos key f seed)))))))
Francois-Rene Rideau
committed
(defmethod fold-right ((i <fmim>) map f seed)
Francois-Rene Rideau
committed
(if (null map)
seed
(trie-fold-right (box-ref map) (node-height map) 0 f seed)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun trie-fold-right (trie position key f seed)
(if (zerop position)
(funcall f key trie seed)
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(trie-fold-right
(box-ref trie) pos (dpb pbits (byte plen pos) key) f seed)))
Francois-Rene Rideau
committed
(trie-branch
(let ((pos (1- position)))
(trie-fold-right
(left trie) pos key f
(trie-fold-right
(right trie) pos (dpb 1 (byte 1 pos) key) f seed)))))))
Francois-Rene Rideau
committed
(defmethod leftmost ((i <fmim>) map)
(if (null map)
(values nil nil nil)
(trie-leftmost (box-ref map) (node-height map) 0)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun trie-leftmost (trie position key)
(if (zerop position)
(values key trie t)
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(trie-leftmost
(box-ref trie) pos (dpb pbits (byte plen pos) key))))
Francois-Rene Rideau
committed
(trie-branch
(trie-leftmost (left trie) (1- position) key)))))
Francois-Rene Rideau
committed
(defmethod rightmost ((i <fmim>) map)
(if (null map)
(values nil nil nil)
(trie-rightmost (box-ref map) (node-height map) 0)))
Francois-Rene Rideau
committed
Francois-Rene Rideau
committed
(defun trie-rightmost (trie position key)
(if (zerop position)
(values key trie t)
(etypecase trie
(trie-skip
(let* ((pbits (node-prefix-bits trie))
(plen (node-prefix-length trie))
(pos (- position plen)))
(trie-rightmost
(box-ref trie) pos (dpb pbits (byte plen pos) key))))
Francois-Rene Rideau
committed
(trie-branch
(let ((pos (1- position)))
(trie-rightmost (right trie) pos (dpb 1 (byte 1 pos) key)))))))
(defmethod divide ((i <fmim>) map)
(etypecase map
(null
(values nil nil))
(trie-head
(let ((height (node-height map))
(if (zerop height)
(values map nil)
(etypecase datum
(trie-branch
(values
(make-trie-head (1- height) (left datum))
(make-trie-head height
(make-trie-skip height 1 1 (right datum)))))
(trie-skip
(let* ((pbits (node-prefix-bits datum))
(plen (node-prefix-length datum))
(position (- height plen)))
(if (zerop position)
(values map nil)
(trie-branch
(flet ((f (bit datum)
(make-trie-head height
(make-trie-skip
height
(1+ plen)
(dpb pbits (byte plen 1) bit)
datum))))
(f 0 (left datum))
(f 1 (right datum))))))))))))))
Francois-Rene Rideau
committed
;;; The whole point of fmim is that we could do a fast "append",
(defmethod join ((i <fmim>) a b)
Francois-Rene Rideau
committed
(cond
((null a) b)
((null b) a)
(t
(check-type a trie-head)
(check-type b trie-head)
(let* ((ha (node-height a))
(hb (node-height b))
(h (max ha hb)))
(make-trie-head
h (trie-join (make-trie-skip h (- h ha) 0 (box-ref a))
(make-trie-skip h (- h hb) 0 (box-ref b))
h))))))
Francois-Rene Rideau
committed
(defun trie-join (a b position)
Francois-Rene Rideau
committed
(if (zerop position) a
(etypecase a
(full-trie-branch a)
(trie-branch
(let ((pos (1- position)))
(etypecase b
(trie-branch
(make-trie-branch
position
(trie-join (left a) (left b) pos)
(trie-join (right a) (right b) pos)))
Francois-Rene Rideau
committed
(trie-skip
(let* ((pbits (node-prefix-bits b))
(plen (node-prefix-length b))
(bh (ldb (byte 1 (1- plen)) pbits))
(b1
(make-trie-skip
pos (1- plen)
(ldb (byte (1- plen) 0) pbits) (box-ref b))))
Francois-Rene Rideau
committed
(if (zerop bh)
(make-trie-branch
position (trie-join (left a) b1 pos) (right a))
Francois-Rene Rideau
committed
(make-trie-branch
position (left a) (trie-join (right a) b1 pos))))))))
Francois-Rene Rideau
committed
(trie-skip
(let* ((pbits (node-prefix-bits a))
(plen (node-prefix-length a))
(pos (1- position))
(ah (ldb (byte 1 (1- plen)) pbits))
(a1
(make-trie-skip
pos (1- plen)
(ldb (byte (1- plen) 0) pbits) (box-ref a))))
Francois-Rene Rideau
committed
(etypecase b
(trie-branch
(if (zerop ah)
(make-trie-branch
position (trie-join a1 (left b) pos) (right b))
Francois-Rene Rideau
committed
(make-trie-branch
position (left b) (trie-join a1 (right b) pos))))
Francois-Rene Rideau
committed
(trie-skip
(let* ((pbitsb (node-prefix-bits b))
(plenb (node-prefix-length b))
(bh (ldb (byte 1 (1- plenb)) pbitsb))
(b1
(make-trie-skip
pos (1- plenb)
(ldb (byte (1- plenb) 0) pbitsb) (box-ref b))))
Francois-Rene Rideau
committed
(if (= ah bh)
(make-trie-skip position 1 0 (trie-join a1 b1 pos))
Francois-Rene Rideau
committed
(if (zerop ah)
(make-trie-branch position a1 b1)
(make-trie-branch position b1 a1)))))))))))
(defmethod print-object ((object trie-head) stream)
(format stream "#<fmim ~S>" (convert <alist> <fmim> object)))
Francois-Rene Rideau
committed
(defmethod print-object ((trie trie-branch) stream)
(format stream "#<tb ~S ~S>" (left trie) (right trie)))
Francois-Rene Rideau
committed
(defmethod print-object ((trie trie-skip) stream)
(format stream "#<ts ~S ~S ~S>"
(node-prefix-bits trie)
(node-prefix-length trie)
#|
You could implement sets of integers as bitmaps,
as a list of integers, as a binary tree,
or as big-endian patricia tries mapping integers to the unit type.
In the latter case, using multiple dispatch for sub-operations
instead of the typecases I used above would allow to easily add an
optimization to my patricia trie implementation in the case the
target type is the unit type (eql t), by i.e. short-circuiting
data representation and membership test on full subtrees.
See also
http://paste.lisp.org/display/95321
|#