SumBag.hs 7.09 KB
Newer Older
1
2
{-# LANGUAGE RoleAnnotations #-}

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- | == Multisets with constant-time fold.
--
-- This implements a generic (multi-)set data structure for __commutative__
-- monoids with O(1) fold over the elements. Similar restrictions as for
-- "Data.Set" apply. In particular, the number of /different/ elements in the
-- Multiset must not exceed @maxBound::Int@. If this condition is violated, the
-- behaviour is undefined.
--
-- == Implementation
--
-- Internally, a running total is kept and updated each time an element is
-- inserted or deleted. The implementation is derivided from
--
--   * Stephen Adams, \"/Efficient sets: a balancing act/\",
--     Journal of Functional Programming 3(4):553-562, October 1993,
--     <http://www.swiss.ai.mit.edu/~adams/BB/>,
--
-- with the addition of the monoidal running total.
21
22
23
24
25
26
27
module Data.SumBag
  ( SumBag
  , empty
  , singleton
  , sum
  , insert
  , delete
28
  , elem
29
30
  , toAscList
  , fromList
31
32
  ) where

33
import Prelude hiding (sum, min, elem)
Hans-Peter Deifel's avatar
Hans-Peter Deifel committed
34
import Data.Foldable hiding (sum,elem)
35
36
import qualified Data.List.NonEmpty as NE

37
38
-- | A multiset of value @a@.
data SumBag a = Leaf | Node (MetaData a) (Element a) (SumBag a) (SumBag a)
39
40
  deriving (Show)

41
type role SumBag nominal
Hans-Peter Deifel's avatar
Hans-Peter Deifel committed
42

43
instance (Ord a, Eq a) => Eq (SumBag a) where
44
  x == y = toAscList x == toAscList y
45

46
-- TODO There are a few functions from Foldable that can be implemented way more
Hans-Peter Deifel's avatar
Hans-Peter Deifel committed
47
48
49
50
-- efficiently.
--
-- Notably 'minimum' and 'maximum', but also explicit recursion instead of
-- conversions to lists in a lot of cases.
51
instance Foldable SumBag where
Hans-Peter Deifel's avatar
Hans-Peter Deifel committed
52
53
54
55
56
57
58
59
60
  foldMap f = foldMap f . toAscList
  {-# INLINE foldMap #-}

  fold = sum
  {-# INLINE fold #-}

  toList = toAscList
  {-# INLINE toList #-}
  
61
62
63
64
65

data MetaData a = MetaData
  { nodeSize :: Int
  , nodeSum :: a
  }
66
  deriving (Show)
67
68
69
70
71

data Element a = Element
  { value :: a
  , multiplicity :: NE.NonEmpty a
  }
72
  deriving (Show)
73
74


75
76
77
-- | The empty set.
--
-- Running time: O(1)
78
79
80
empty :: SumBag a
empty = Leaf

81
-- | Constructs a set with a single element @a@.
82
83
84
85
singleton :: Monoid a => a -> SumBag a
singleton a =
  node (Element a (NE.fromList [a])) Leaf Leaf

86
87
88
89
-- | Returns the number of nodes in the internal tree. This doesn't count
-- duplicate elements and only returns the size of the internal tree.
--
-- Running time: O(1)
90
91
92
93
size :: SumBag a -> Int
size Leaf = 0
size (Node node _ _ _) = nodeSize node

94
95
96
97
98
99
100
-- | Compute the sum of all elements with their '<>' implementation. This is
-- also called 'mconcat' and 'fold' for other containers.
--
-- Note that for the implementation to work, the Monoid @a@ has to be
-- commutative.
--
-- Running time: O(1), since this value is cached internally.
101
102
103
104
sum :: Monoid a => SumBag a -> a
sum Leaf = mempty
sum (Node node _ _ _) = nodeSum node

105
106
107
-- | Inserts an element into the set.
--
-- Running time: O(log n)
108
109
110
111
112
113
114
insert :: (Ord a, Monoid a) => a -> SumBag a -> SumBag a
insert a Leaf = node (Element a (NE.fromList [a])) Leaf Leaf
insert a (Node _ e left right)
  | a < value e = balance1 e (insert a left) right
  | a > value e = balance1 e left (insert a right)
  | otherwise = node (addOnce e) left right

115
116
117
-- | Tests membership of an element.
--
-- Running time: O(log n)
118
119
120
121
122
123
124
elem :: (Ord a) => a -> SumBag a -> Bool
elem _ Leaf = False
elem a (Node _ e left right)
  | a < value e = elem a left
  | a > value e = elem a right
  | otherwise = True

125
126
127
128
-- | Delete an element from the set. If this element is not present, the
-- original set is returned unmodified.
--
-- Running time: O(log n)
129
130
131
132
133
134
135
136
137
138
139
140
141
142
delete :: (Ord a, Monoid a) => a -> SumBag a -> SumBag a
delete _ Leaf = Leaf
delete a (Node _ e left right)
  | a < value e = balance1 e (delete a left) right
  | a > value e = balance1 e left (delete a right)
  | Just e' <- delOnce e = node e' left right
  | otherwise = helper left right

  where helper Leaf right = right
        helper left Leaf = left
        helper left right =
          let (min, rest) = delmin right
          in balance1 min left rest

143
144
145
146
-- | Returns a list of all elements in the set in ascending order. Duplicate
-- elements are correctly returned multiple times.
--
-- Running time: O(n)
Hans-Peter Deifel's avatar
Hans-Peter Deifel committed
147
toAscList :: SumBag a -> [a]
148
149
150
151
152
153
154
toAscList bag = helper bag []
  where helper Leaf accu = accu
        helper (Node _ e left right) accu =
          helper left (mkList e ++ helper right accu)

        mkList (Element val mult) = map (const val) (NE.toList mult)

155
156
157
-- | Return a multiset of all elements of a list.
--
-- Running time: O(n · log n)
158
159
160
fromList :: (Ord a, Monoid a) => [a] -> SumBag a
fromList = foldr insert empty

161
162
163
-- Internal functions

-- | "Smart" constructor for Node. Will compute the meta data from its subtrees
164
node :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
165
166
167
168
169
170
171
node a left right =
  let nodeData = MetaData
        { nodeSize = size left + 1 + size right
        , nodeSum = NE.head (multiplicity a) <> sum left <> sum right
        }
  in Node nodeData a left right

172
rotateSingleLeft :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
173
174
175
rotateSingleLeft a x (Node _ b y z) = node b (node a x y) z
rotateSingleLeft _ _ _ = error "rotateSingleLeft called with empty right tree"

176
rotateSingleRight :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
177
178
179
rotateSingleRight b (Node _ a x y) z = node a x (node b y z)
rotateSingleRight _ _ _ = error "rotateSingleRight called with empty left tree"

180
rotateDoubleLeft :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
181
rotateDoubleLeft a x (Node _ c (Node _ b y1 y2) z) = node b (node a x y1) (node c y2 z)
182
rotateDoubleLeft _ _ _ = error "rotateDoubleLeft called with too small right tree"
183

184
rotateDoubleRight :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
185
186
187
188
rotateDoubleRight c (Node _ a x (Node _ b y1 y2)) z = node b (node a x y1) (node c y2 z)
rotateDoubleRight _ _ _ = error "rotateDoubleRight called with too small left tree"


189
balance1 :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
190
191
192
193
194
195
196
197
198
199
200
201
202
203
balance1 a left right
  -- Subtrees have only one element
  | size left + size right < 2 = node a left right
  -- Right subtree is too heavy
  | size right > balanceBound * size left =
    let Node _ _ rleft rright = right
        sizeRL = size rleft
        sizeRR = size rright
    in if sizeRL < sizeRR then rotateSingleLeft a left right else rotateDoubleLeft a left right
  -- Left subtree is too heavy
  | size left > balanceBound * size right =
    let Node _ _ lleft lright = left
        sizeLL = size lleft
        sizeLR = size lright
204
    in if sizeLR < sizeLL then rotateSingleRight a left right else rotateDoubleRight a left right
205
  -- No subtree is too heavy, we can just form a new tree straight away
206
207
208
209
210
211
212
213
214
215
216
  | otherwise = node a left right

addOnce :: Semigroup a => Element a -> Element a
addOnce e = let total = NE.head (multiplicity e)
            in e { multiplicity = NE.cons (total <> value e) (multiplicity e) }

delOnce :: Element a -> Maybe (Element a)
delOnce e = case snd (NE.uncons (multiplicity e)) of
  Nothing -> Nothing
  Just rest -> Just (e { multiplicity = rest })

217
delmin :: Monoid a => SumBag a -> (Element a, SumBag a)
218
delmin Leaf = error "delmin: Empty tree"
219
delmin (Node _ e Leaf right) = (e, right)
220
221
222
223
delmin (Node _ e left right) = (\left' -> balance1 e left' right) <$> delmin left

balanceBound :: Int
balanceBound = 4