implementation module Data.Set import StdClass, StdMisc, StdBool, StdList, StdFunc, StdInt, StdTuple import Data.Maybe :: Ordering = LT | GT | EQ mapSet :: (a -> b) (Set a) -> Set b | < a & == a & < b & == b mapSet f s = fromList (map f (toList s)) mapSetMonotonic :: (a -> b) (Set a) -> Set b mapSetMonotonic _ Tip = Tip mapSetMonotonic f (Bin n x l r) = Bin n (f x) (mapSetMonotonic f l) (mapSetMonotonic f r) compare :: a a -> Ordering | < a & == a compare x y = if (xy) GT EQ) /* * Sets are size balanced trees. * A set of values @a@. */ :: Set a = Tip | Bin !Int a !(Set a) !(Set a) instance == (Set a) | == a where (==) t1 t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2) instance < (Set a) | < a where (<) s1 s2 = compare (toAscList s1) (toAscList s2) where compare [] [] = False compare [] _ = True compare [_:_] [] = False compare [a:as] [b:bs] | a < b = True | a > b = False | otherwise = compare as bs /*-------------------------------------------------------------------- * Query *--------------------------------------------------------------------*/ // | /O(1)/. Is this the empty set? null :: (Set a) -> Bool null t = case t of Tip -> True Bin _ _ _ _ -> False // | /O(1)/. The number of elements in the set. size :: (Set a) -> Int size t = case t of Tip -> 0 Bin sz _ _ _ -> sz // | /O(log n)/. Is the element in the set? member :: a (Set a) -> Bool | < a & == a member x t = case t of Tip -> False Bin _ y l r -> case compare x y of LT -> member x l GT -> member x r EQ -> True // | /O(log n)/. Is the element not in the set? notMember :: a (Set a) -> Bool | < a & == a notMember x t = not (member x t) /*-------------------------------------------------------------------- * Construction *--------------------------------------------------------------------*/ // | /O(1)/. The empty set. empty :: Set a empty = Tip newSet :: Set a newSet = empty // | /O(1)/. Create a singleton set. singleton :: u:a -> w:(Set u:a), [w <= u] singleton x = Bin 1 x Tip Tip /*-------------------------------------------------------------------- * Insertion, Deletion *--------------------------------------------------------------------*/ // | /O(log n)/. Insert an element in a set. // If the set already contains an element equal to the given value, // it is replaced with the new value. insert :: a .(Set a) -> Set a | < a & == a insert x t = case t of Tip -> singleton x Bin sz y l r -> case compare x y of LT -> balance y (insert x l) r GT -> balance y l (insert x r) EQ -> Bin sz x l r // | /O(log n)/. Delete an element from a set. delete :: a .(Set a) -> Set a | < a & == a delete x t = case t of Tip -> Tip Bin _ y l r -> case compare x y of LT -> balance y (delete x l) r GT -> balance y l (delete x r) EQ -> glue l r /*-------------------------------------------------------------------- * Subset *--------------------------------------------------------------------*/ // | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal). isProperSubsetOf :: (Set a) (Set a) -> Bool | < a & == a isProperSubsetOf s1 s2 = (size s1 < size s2) && (isSubsetOf s1 s2) // | /O(n+m)/. Is this a subset? // @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@. isSubsetOf :: (Set a) (Set a) -> Bool | < a & == a isSubsetOf t1 t2 = (size t1 <= size t2) && (isSubsetOfX t1 t2) isSubsetOfX :: (Set a) (Set a) -> Bool | < a & == a isSubsetOfX Tip _ = True isSubsetOfX _ Tip = False isSubsetOfX (Bin _ x l r) t = found && isSubsetOfX l lt && isSubsetOfX r gt where (lt,found,gt) = splitMember x t /*-------------------------------------------------------------------- * Minimal, Maximal *--------------------------------------------------------------------*/ // | /O(log n)/. The minimal element of a set. findMin :: (Set a) -> a findMin (Bin _ x Tip _) = x findMin (Bin _ _ l _) = findMin l findMin Tip = abort "Set.findMin: empty set has no minimal element" // | /O(log n)/. The maximal element of a set. findMax :: (Set a) -> a findMax (Bin _ x _ Tip) = x findMax (Bin _ _ _ r) = findMax r findMax Tip = abort "Set.findMax: empty set has no maximal element" // | /O(log n)/. Delete the minimal element. deleteMin :: .(Set a) -> Set a deleteMin (Bin _ _ Tip r) = r deleteMin (Bin _ x l r) = balance x (deleteMin l) r deleteMin Tip = Tip // | /O(log n)/. Delete the maximal element. deleteMax :: .(Set a) -> Set a deleteMax (Bin _ _ l Tip) = l deleteMax (Bin _ x l r) = balance x l (deleteMax r) deleteMax Tip = Tip /*-------------------------------------------------------------------- * Union. *--------------------------------------------------------------------*/ // | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@). unions :: u:[v:(Set a)] -> Set a | < a & == a, [u <= v] unions ts = foldl union empty ts // | /O(n+m)/. The union of two sets, preferring the first set when // equal elements are encountered. // The implementation uses the efficient /hedge-union/ algorithm. // Hedge-union is more efficient on (bigset `union` smallset). union :: u:(Set a) u:(Set a) -> Set a | < a & == a union Tip t2 = t2 union t1 Tip = t1 union t1 t2 = hedgeUnion (const LT) (const GT) t1 t2 hedgeUnion :: (a -> Ordering) (a -> Ordering) u:(Set a) u:(Set a) -> Set a | < a & == a hedgeUnion _ _ t1 Tip = t1 hedgeUnion cmplo cmphi Tip (Bin _ x l r) = join x (filterGt cmplo l) (filterLt cmphi r) hedgeUnion cmplo cmphi (Bin _ x l r) t2 = join x (hedgeUnion cmplo cmpx l (trim cmplo cmpx t2)) (hedgeUnion cmpx cmphi r (trim cmpx cmphi t2)) where cmpx y = compare x y /*-------------------------------------------------------------------- * Difference *--------------------------------------------------------------------*/ // | /O(n+m)/. Difference of two sets. // The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/. difference :: (Set a) (Set a) -> Set a | < a & == a difference Tip _ = Tip difference t1 Tip = t1 difference t1 t2 = hedgeDiff (const LT) (const GT) t1 t2 hedgeDiff :: (a -> Ordering) (a -> Ordering) (Set a) (Set a) -> Set a | < a & == a hedgeDiff _ _ Tip _ = Tip hedgeDiff cmplo cmphi (Bin _ x l r) Tip = join x (filterGt cmplo l) (filterLt cmphi r) hedgeDiff cmplo cmphi t (Bin _ x l r) = merge (hedgeDiff cmplo cmpx (trim cmplo cmpx t) l) (hedgeDiff cmpx cmphi (trim cmpx cmphi t) r) where cmpx y = compare x y /*-------------------------------------------------------------------- * Intersection *--------------------------------------------------------------------*/ intersections :: [Set a] -> Set a | < a & == a intersections [t] = t intersections [t:ts] = foldl intersection t ts // | /O(n+m)/. The intersection of two sets. // Elements of the result come from the first set, so for example // // > import qualified Data.Set as S // > data AB = A | B deriving Show // > instance Ord AB where compare _ _ = EQ // > instance Eq AB where _ == _ = True // > main = print (S.singleton A `S.intersection` S.singleton B, // > S.singleton B `S.intersection` S.singleton A) // // prints @(fromList [A],fromList [B])@. intersection :: (Set a) (Set a) -> Set a | < a & == a intersection Tip _ = Tip intersection _ Tip = Tip intersection t1=:(Bin s1 x1 l1 r1) t2=:(Bin s2 x2 l2 r2) = if (s1 >= s2) then1 else1 where then1 = let (lt,found,gt) = splitLookup x2 t1 tl = intersection lt l2 tr = intersection gt r2 in case found of (Just x) -> join x tl tr Nothing -> merge tl tr else1 = let (lt,found,gt) = splitMember x1 t2 tl = intersection l1 lt tr = intersection r1 gt in (if found (join x1 tl tr) (merge tl tr)) /*-------------------------------------------------------------------- * Filter and partition *--------------------------------------------------------------------*/ // | /O(n)/. Filter all elements that satisfy the predicate. filter :: (a -> Bool) (Set a) -> Set a | < a & == a filter _ Tip = Tip filter p (Bin _ x l r) | p x = join x (filter p l) (filter p r) | otherwise = merge (filter p l) (filter p r) // | /O(n)/. Partition the set into two sets, one with all elements that satisfy // the predicate and one with all elements that don't satisfy the predicate. // See also 'split'. partition :: (a -> Bool) (Set a) -> (Set a,Set a) | < a & == a partition _ Tip = (Tip,Tip) partition p (Bin _ x l r) | p x = (join x l1 r1,merge l2 r2) | otherwise = (merge l1 r1,join x l2 r2) where (l1,l2) = partition p l (r1,r2) = partition p r /*-------------------------------------------------------------------- * Fold *--------------------------------------------------------------------*/ // | /O(n)/. Post-order fold. fold :: (a -> .b -> .b) .b .(Set a) -> .b fold _ z Tip = z fold f z (Bin _ x l r) = fold f (f x (fold f z r)) l /*-------------------------------------------------------------------- * Lists *--------------------------------------------------------------------*/ // | /O(n)/. Convert the set to a list of elements. toList :: (Set a) -> [a] toList s = toAscList s // | /O(n)/. Convert the set to an ascending list of elements. toAscList :: (Set a) -> [a] toAscList t = fold (\a as -> [a:as]) [] t // | /O(n*log n)/. Create a set from a list of elements. fromList :: [a] -> Set a | < a & == a fromList xs = foldl ins empty xs where ins t x = insert x t /*-------------------------------------------------------------------- Utility functions that return sub-ranges of the original tree. Some functions take a comparison function as argument to allow comparisons against infinite values. A function [cmplo x] should be read as [compare lo x]. [trim cmplo cmphi t] A tree that is either empty or where [cmplo x == LT] and [cmphi x == GT] for the value [x] of the root. [filterGt cmp t] A tree where for all values [k]. [cmp k == LT] [filterLt cmp t] A tree where for all values [k]. [cmp k == GT] [split k t] Returns two trees [l] and [r] where all values in [l] are <[k] and all keys in [r] are >[k]. [splitMember k t] Just like [split] but also returns whether [k] was found in the tree. --------------------------------------------------------------------*/ /*-------------------------------------------------------------------- [trim lo hi t] trims away all subtrees that surely contain no values between the range [lo] to [hi]. The returned tree is either empty or the key of the root is between @lo@ and @hi@. --------------------------------------------------------------------*/ trim :: (a -> Ordering) (a -> Ordering) (Set a) -> Set a trim _ _ Tip = Tip trim cmplo cmphi t=:(Bin _ x l r) = case cmplo x of LT -> case cmphi x of GT -> t _ -> trim cmplo cmphi l _ -> trim cmplo cmphi r /*-------------------------------------------------------------------- * [filterGt x t] filter all values >[x] from tree [t] * [filterLt x t] filter all values <[x] from tree [t] *--------------------------------------------------------------------*/ filterGt :: (a -> Ordering) (Set a) -> Set a filterGt _ Tip = Tip filterGt cmp (Bin _ x l r) = case cmp x of LT -> join x (filterGt cmp l) r GT -> filterGt cmp r EQ -> r filterLt :: (a -> Ordering) (Set a) -> Set a filterLt _ Tip = Tip filterLt cmp (Bin _ x l r) = case cmp x of LT -> filterLt cmp l GT -> join x l (filterLt cmp r) EQ -> l /*-------------------------------------------------------------------- * Split *--------------------------------------------------------------------*/ // | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@ // where @set1@ comprises the elements of @set@ less than @x@ and @set2@ // comprises the elements of @set@ greater than @x@. split :: a (Set a) -> (Set a,Set a) | < a & == a split _ Tip = (Tip,Tip) split x (Bin _ y l r) = case compare x y of LT -> let (lt,gt) = split x l in (lt,join y gt r) GT -> let (lt,gt) = split x r in (join y l lt,gt) EQ -> (l,r) // | /O(log n)/. Performs a 'split' but also returns whether the pivot // element was found in the original set. splitMember :: a (Set a) -> (Set a,Bool,Set a) | < a & == a splitMember x t = let (l,m,r) = splitLookup x t in (l,maybe False (const True) m,r) // | /O(log n)/. Performs a 'split' but also returns the pivot // element that was found in the original set. splitLookup :: a (Set a) -> (Set a,Maybe a,Set a) | < a & == a splitLookup _ Tip = (Tip,Nothing,Tip) splitLookup x (Bin _ y l r) = case compare x y of LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r) GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt) EQ -> (l,Just y,r) /*-------------------------------------------------------------------- Utility functions that maintain the balance properties of the tree. All constructors assume that all values in [l] < [x] and all values in [r] > [x], and that [l] and [r] are valid trees. In order of sophistication: [Bin sz x l r] The type constructor. [bin x l r] Maintains the correct size, assumes that both [l] and [r] are balanced with respect to each other. [balance x l r] Restores the balance and size. Assumes that the original tree was balanced and that [l] or [r] has changed by at most one element. [join x l r] Restores balance and size. Furthermore, we can construct a new tree from two trees. Both operations assume that all values in [l] < all values in [r] and that [l] and [r] are valid: [glue l r] Glues [l] and [r] together. Assumes that [l] and [r] are already balanced with respect to each other. [merge l r] Merges two trees and restores balance. Note: in contrast to Adam's paper, we use (<=) comparisons instead of (<) comparisons in [join], [merge] and [balance]. Quickcheck (on [difference]) showed that this was necessary in order to maintain the invariants. It is quite unsatisfactory that I haven't been able to find out why this is actually the case! Fortunately, it doesn't hurt to be a bit more conservative. --------------------------------------------------------------------*/ /*-------------------------------------------------------------------- * Join *--------------------------------------------------------------------*/ join :: a (Set a) (Set a) -> Set a join x Tip r = insertMin x r join x l Tip = insertMax x l join x l=:(Bin sizeL y ly ry) r=:(Bin sizeR z lz rz) | delta*sizeL <= sizeR = balance z (join x l lz) rz | delta*sizeR <= sizeL = balance y ly (join x ry r) | otherwise = bin x l r // insertMin and insertMax don't perform potentially expensive comparisons. insertMax :: a (Set a) -> Set a insertMax x t = case t of Tip -> singleton x Bin _ y l r -> balance y l (insertMax x r) insertMin :: a (Set a) -> Set a insertMin x t = case t of Tip -> singleton x Bin _ y l r -> balance y (insertMin x l) r /*-------------------------------------------------------------------- * [merge l r]: merges two trees. *--------------------------------------------------------------------*/ merge :: (Set a) (Set a) -> Set a merge Tip r = r merge l Tip = l merge l=:(Bin sizeL x lx rx) r=:(Bin sizeR y ly ry) | delta*sizeL <= sizeR = balance y (merge l ly) ry | delta*sizeR <= sizeL = balance x lx (merge rx r) | otherwise = glue l r /*-------------------------------------------------------------------- * [glue l r]: glues two trees together. * Assumes that [l] and [r] are already balanced with respect to each other. *--------------------------------------------------------------------*/ glue :: .(Set a) .(Set a) -> Set a glue Tip r = r glue l Tip = l glue l r | size l > size r = let (m,l`) = deleteFindMax l in balance m l` r = let (m,r`) = deleteFindMin r in balance m l r` // | /O(log n)/. Delete and find the minimal element. // // > deleteFindMin set = (findMin set, deleteMin set) deleteFindMin :: .(Set a) -> (a, Set a) deleteFindMin t = case t of Bin _ x Tip r -> (x,r) Bin _ x l r -> let (xm,l`) = deleteFindMin l in (xm,balance x l` r) Tip -> (abort "Set.deleteFindMin: can not return the minimal element of an empty set", Tip) // | /O(log n)/. Delete and find the maximal element. // // > deleteFindMax set = (findMax set, deleteMax set) deleteFindMax :: .(Set a) -> (a, Set a) deleteFindMax t = case t of Bin _ x l Tip -> (x,l) Bin _ x l r -> let (xm,r`) = deleteFindMax r in (xm,balance x l r`) Tip -> (abort "Set.deleteFindMax: can not return the maximal element of an empty set", Tip) // | /O(log n)/. Retrieves the minimal key of the set, and the set // stripped of that element, or 'Nothing' if passed an empty set. minView :: .(Set a) -> .(Maybe (a,Set a)) minView Tip = Nothing minView x = Just (deleteFindMin x) // | /O(log n)/. Retrieves the maximal key of the set, and the set // stripped of that element, or 'Nothing' if passed an empty set. maxView :: .(Set a) -> .(Maybe (a,Set a)) maxView Tip = Nothing maxView x = Just (deleteFindMax x) /*-------------------------------------------------------------------- [balance x l r] balances two trees with value x. The sizes of the trees should balance after decreasing the size of one of them. (a rotation). [delta] is the maximal relative difference between the sizes of two trees, it corresponds with the [w] in Adams' paper, or equivalently, [1/delta] corresponds with the $\alpha$ in Nievergelt's paper. Adams shows that [delta] should be larger than 3.745 in order to garantee that the rotations can always restore balance. [ratio] is the ratio between an outer and inner sibling of the heavier subtree in an unbalanced setting. It determines whether a double or single rotation should be performed to restore balance. It is correspondes with the inverse of $\alpha$ in Adam's article. Note that: - [delta] should be larger than 4.646 with a [ratio] of 2. - [delta] should be larger than 3.745 with a [ratio] of 1.534. - A lower [delta] leads to a more 'perfectly' balanced tree. - A higher [delta] performs less rebalancing. - Balancing is automatic for random data and a balancing scheme is only necessary to avoid pathological worst cases. Almost any choice will do in practice - Allthough it seems that a rather large [delta] may perform better than smaller one, measurements have shown that the smallest [delta] of 4 is actually the fastest on a wide range of operations. It especially improves performance on worst-case scenarios like a sequence of ordered insertions. Note: in contrast to Adams' paper, we use a ratio of (at least) 2 to decide whether a single or double rotation is needed. Allthough he actually proves that this ratio is needed to maintain the invariants, his implementation uses a (invalid) ratio of 1. He is aware of the problem though since he has put a comment in his original source code that he doesn't care about generating a slightly inbalanced tree since it doesn't seem to matter in practice. However (since we use quickcheck :-) we will stick to strictly balanced trees. --------------------------------------------------------------------*/ delta :== 4 ratio :== 2 balance :: a (Set a) (Set a) -> Set a balance x l r | sizeL + sizeR <= 1 = Bin sizeX x l r | sizeR >= delta*sizeL = rotateL x l r | sizeL >= delta*sizeR = rotateR x l r | otherwise = Bin sizeX x l r where sizeL = size l sizeR = size r sizeX = sizeL + sizeR + 1 // rotate rotateL :: a (Set a) (Set a) -> Set a rotateL x l r=:(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r | otherwise = doubleL x l r rotateL _ _ Tip = abort "rotateL Tip" rotateR :: a (Set a) (Set a) -> Set a rotateR x l=:(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r | otherwise = doubleR x l r rotateR _ Tip _ = abort "rotateL Tip" // basic rotations singleL :: a (Set a) (Set a) -> Set a singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3 singleL _ _ Tip = abort "singleL" singleR :: a (Set a) (Set a) -> Set a singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3) singleR _ Tip _ = abort "singleR" doubleL :: a (Set a) (Set a) -> Set a doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4) doubleL _ _ _ = abort "doubleL" doubleR :: a (Set a) (Set a) -> Set a doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4) doubleR _ _ _ = abort "doubleR" /*-------------------------------------------------------------------- * The bin constructor maintains the size of the tree *--------------------------------------------------------------------*/ bin :: a (Set a) (Set a) -> Set a bin x l r = Bin (size l + size r + 1) x l r