diff --git a/bench/BenchMain.hs b/bench/BenchMain.hs index d8575a8bebe5a1f5ec145460c470a60a556b0b32..d0327fcc3ab98773c64aae47d9da7e1f40ecb20f 100644 --- a/bench/BenchMain.hs +++ b/bench/BenchMain.hs @@ -9,6 +9,7 @@ import qualified Data.List.BenchUtils import qualified Copar.Algorithm.BenchInitialize import qualified Data.BenchRefinablePartition import qualified Data.BenchBlockQueue +import qualified Data.BenchSumBag main :: IO () main = defaultMain @@ -19,4 +20,5 @@ main = defaultMain , Copar.Algorithm.BenchInitialize.benchmarks , Data.BenchRefinablePartition.benchmarks , Data.BenchBlockQueue.benchmarks + , Data.BenchSumBag.benchmarks ] diff --git a/bench/Data/BenchSumBag.hs b/bench/Data/BenchSumBag.hs new file mode 100644 index 0000000000000000000000000000000000000000..68cf624b6291581d7c0b1fa89f5da6ad6e358783 --- /dev/null +++ b/bench/Data/BenchSumBag.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Data.BenchSumBag (benchmarks) where + +import Criterion + +import Data.Monoid +import Data.Coerce +import Control.DeepSeq + +import qualified Data.SumBag as SumBag +import Data.SumBag (SumBag) + +benchmarks :: Benchmark +benchmarks = bgroup "Data.SumBag" [benchInsert, benchDelete, benchEq] + +benchInsert :: Benchmark +benchInsert = bgroup "insert" $ + let mkBench n = withBag (range 1 n) $ \(~bag) -> + bench (show n ++ " elements") $ whnf (SumBag.insert 0) bag + in map mkBench [1000,2000..10000] + +benchDelete :: Benchmark +benchDelete = bgroup "delete" $ + let mkBench n = withBag (range 1 n) $ \(~bag) -> + bench (show n ++ " elements") $ whnf (SumBag.delete 1) bag + in map mkBench [1000,2000..10000] + +benchEq :: Benchmark +benchEq = bgroup "eq" $ + let mkBench n = + withBag (range 1 n) $ \(~bag) -> + withBag (reverse $ range 1 n) $ \(~revbag) -> + bench (show n ++ " elements") $ whnf (uncurry (==)) (bag, revbag) + in map mkBench [1000,2000..10000] + +range :: Int -> Int -> [Sum Int] +range a b = coerce [a..b] + +withBag :: [Sum Int] -> (SumBag (Sum Int) -> Benchmark) -> Benchmark +withBag input = env setupEnv + where setupEnv = do + let !bag = SumBag.fromList input + return bag + +instance NFData (SumBag a) where + rnf a = seq a () diff --git a/copar.cabal b/copar.cabal index 7ebc10e49bdc10dc603ab0ae1ca4f3d0e94c3b1c..417883009887dbf94e73cff537e896c53f6f4bc8 100644 --- a/copar.cabal +++ b/copar.cabal @@ -33,6 +33,7 @@ library , Data.Float.Utils , Data.List.Utils , Data.Text.Prettyprint + , Data.SumBag , Copar.RefinementInterface , Copar.Functors , Copar.FunctorDescription @@ -135,6 +136,7 @@ test-suite spec , Data.OpenUnionSpec , Data.List.UtilsSpec , Data.Float.UtilsSpec + , Data.SumBagSpec , Copar.Functors.PowersetSpec , Copar.Functors.GroupValuedSpec , Copar.Functors.BagSpec @@ -210,6 +212,7 @@ benchmark bench , Copar.Algorithm.BenchInitialize , Data.BenchRefinablePartition , Data.BenchBlockQueue + , Data.BenchSumBag default-extensions: GADTs , StandaloneDeriving , DeriveFunctor diff --git a/src/Copar/Functors/MonoidValued.hs b/src/Copar/Functors/MonoidValued.hs index 885986aa41c7730c1d2eb06d03bab25c00a7f5fe..e51c2b557dd6e4f3733ee39e7971b3ad92a31be5 100644 --- a/src/Copar/Functors/MonoidValued.hs +++ b/src/Copar/Functors/MonoidValued.hs @@ -23,7 +23,6 @@ import Control.Monad import Data.Foldable import qualified Data.Vector as V -import qualified Data.Map.Strict as M import Text.Megaparsec import qualified Data.Text.Prettyprint as Doc import Data.Text.Prettyprint ((<+>)) @@ -36,6 +35,8 @@ import Copar.FunctorExpression.Parser import Copar.Coalgebra.Parser import Data.Float.Utils ( MaxDouble(..) ) import Copar.Parser.Types +import Data.SumBag (SumBag) +import qualified Data.SumBag as SumBag data SlowMonoidValued m a = SlowMonoidValued a @@ -121,10 +122,8 @@ realHelp = <> Doc.annotate Doc.bold "Coalgebra syntax:" <+> Doc.reflow "'{' X ':' real, ... '}'" -type LabelCountMap m = M.Map m Int - type instance Label (SlowMonoidValued m) = m -type instance Weight (SlowMonoidValued m) = (m, LabelCountMap m) +type instance Weight (SlowMonoidValued m) = (m, SumBag m) type instance F1 (SlowMonoidValued m) = m type instance F3 (SlowMonoidValued m) = (m, m, m) @@ -134,7 +133,7 @@ instance (Monoid m, Ord m) => RefinementInterface (SlowMonoidValued m) where -> [Label (SlowMonoidValued m)] -> Weight (SlowMonoidValued m) init _ labels = - (mempty, foldl' (\m l -> M.insertWith (+) l 1 m) M.empty labels) + (mempty, foldl' (flip SumBag.insert) SumBag.empty labels) update :: [Label (SlowMonoidValued m)] @@ -144,21 +143,16 @@ instance (Monoid m, Ord m) => RefinementInterface (SlowMonoidValued m) where , Weight (SlowMonoidValued m) ) update labels (sumRest, counts) = - let toS = foldl' (\m l -> M.insertWith (+) l 1 m) M.empty labels - toCWithoutS = foldl' (flip (M.adjust pred)) counts labels - sumS = sumCounts toS - sumCWithoutS = sumCounts toS + let toS = foldl' (flip SumBag.insert) SumBag.empty labels + toCWithoutS = foldl' (flip SumBag.delete) counts labels + sumS = fold toS + sumCWithoutS = fold toS f3 = (sumRest, sumCWithoutS, sumS) w1 = (sumRest <> sumCWithoutS, toS) w2 = (sumRest <> sumS, toCWithoutS) in (w1, f3, w2) -sumCounts :: Monoid m => LabelCountMap m -> m -sumCounts = M.foldlWithKey' (\a x -> (<> a) . multiply x) mempty - where multiply x n = mconcat (replicate n x) - - instance ParseMorphism (SlowMonoidValued (Max Int)) where parseMorphismPoint (SlowMonoidValued inner) = parseMorphismPointHelper inner (Max <$> (L.signed L.decimal)) diff --git a/src/Data/SumBag.hs b/src/Data/SumBag.hs new file mode 100644 index 0000000000000000000000000000000000000000..2d49f5ff137b349e07bfe5106b7743aa00fe6dcf --- /dev/null +++ b/src/Data/SumBag.hs @@ -0,0 +1,249 @@ +{-# LANGUAGE RoleAnnotations #-} +{-# LANGUAGE StrictData #-} + +-- | == Multisets with constant-time fold. +-- +-- This implements a generic (multi-)set data structure for __commutative__ +-- monoids with O(1) fold over the elements. Similar restrictions as for +-- "Data.Set" apply. In particular, the number of /different/ elements in the +-- Multiset must not exceed @maxBound::Int@. If this condition is violated, the +-- behaviour is undefined. +-- +-- == Implementation +-- +-- Internally, a running total is kept and updated each time an element is +-- inserted or deleted. The implementation is derivided from +-- +-- * Stephen Adams, \"/Efficient sets: a balancing act/\", +-- Journal of Functional Programming 3(4):553-562, October 1993, +-- <http://www.swiss.ai.mit.edu/~adams/BB/>, +-- +-- with the addition of the monoidal running total. +module Data.SumBag + ( SumBag + , empty + , singleton + , sum + , insert + , delete + , elem + , toAscList + , fromList + ) where + +import Prelude hiding (sum, min, elem) +import Data.Foldable hiding (sum,elem) +import qualified Data.List.NonEmpty as NE + +-- | A multiset of value @a@. +data SumBag a = Leaf | Node (MetaData a) (Element a) (SumBag a) (SumBag a) + deriving (Show) + +type role SumBag nominal + +instance (Ord a, Eq a) => Eq (SumBag a) where + x == y = toAscList x == toAscList y + +-- TODO There are a few functions from Foldable that can be implemented way more +-- efficiently. +-- +-- Notably 'minimum' and 'maximum', but also explicit recursion instead of +-- conversions to lists in a lot of cases. +instance Foldable SumBag where + foldMap f = foldMap f . toAscList + {-# INLINE foldMap #-} + + fold = sum + {-# INLINE fold #-} + + toList = toAscList + {-# INLINE toList #-} + + +data MetaData a = MetaData + { nodeSize :: Int + , nodeSum :: a + } + deriving (Show) + +data Element a = Element + { value :: a + , multiplicity :: NE.NonEmpty a + } + deriving (Show) + + +-- | The empty set. +-- +-- Running time: O(1) +empty :: SumBag a +empty = Leaf + +-- | Constructs a set with a single element @a@. +singleton :: Monoid a => a -> SumBag a +singleton a = + node (Element a (NE.fromList [a])) Leaf Leaf + +-- | Returns the number of nodes in the internal tree. This doesn't count +-- duplicate elements and only returns the size of the internal tree. +-- +-- Running time: O(1) +size :: SumBag a -> Int +size Leaf = 0 +size (Node node _ _ _) = nodeSize node + +-- | Compute the sum of all elements with their '<>' implementation. This is +-- also called 'mconcat' and 'fold' for other containers. +-- +-- Note that for the implementation to work, the Monoid @a@ has to be +-- commutative. +-- +-- Running time: O(1), since this value is cached internally. +sum :: Monoid a => SumBag a -> a +sum Leaf = mempty +sum (Node node _ _ _) = nodeSum node + +-- | Inserts an element into the set. +-- +-- Running time: O(log n) +insert :: (Ord a, Monoid a) => a -> SumBag a -> SumBag a +insert a Leaf = node (Element a (NE.fromList [a])) Leaf Leaf +insert a (Node _ e left right) + | a < value e = balance1 e (insert a left) right + | a > value e = balance1 e left (insert a right) + | otherwise = node (addOnce e) left right + +-- | Tests membership of an element. +-- +-- Running time: O(log n) +elem :: (Ord a) => a -> SumBag a -> Bool +elem _ Leaf = False +elem a (Node _ e left right) + | a < value e = elem a left + | a > value e = elem a right + | otherwise = True + +-- | Delete an element from the set. If this element is not present, the +-- original set is returned unmodified. +-- +-- Running time: O(log n) +delete :: (Ord a, Monoid a) => a -> SumBag a -> SumBag a +delete _ Leaf = Leaf +delete a (Node _ e left right) + | a < value e = balance1 e (delete a left) right + | a > value e = balance1 e left (delete a right) + | Just e' <- delOnce e = node e' left right + | otherwise = helper left right + + where helper Leaf right = right + helper left Leaf = left + helper left right = + let (min, rest) = delmin right + in balance1 min left rest + +-- | Returns a list of all elements in the set in ascending order. Duplicate +-- elements are correctly returned multiple times. +-- +-- Running time: O(n) +toAscList :: SumBag a -> [a] +toAscList bag = helper bag [] + where helper Leaf accu = accu + helper (Node _ e left right) accu = + helper left (mkList e ++ helper right accu) + + mkList (Element val mult) = map (const val) (NE.toList mult) + +-- | Return a multiset of all elements of a list. +-- +-- Running time: O(n ยท log n) +fromList :: (Ord a, Monoid a) => [a] -> SumBag a +fromList = foldr insert empty + +-- Internal functions + +-- | "Smart" constructor for Node, will compute the meta data from its subtrees. +node :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a +node a left right = + let nodeData = MetaData + { nodeSize = size left + 1 + size right + , nodeSum = NE.head (multiplicity a) <> sum left <> sum right + } + in Node nodeData a left right + +-- a b +-- / \ / \ +-- x b => a z +-- / \ / \ +-- y z x y +rotateSingleLeft :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a +rotateSingleLeft a x (Node _ b y z) = node b (node a x y) z +rotateSingleLeft _ _ _ = error "rotateSingleLeft called with empty right tree" + +-- b a +-- / \ / \ +-- a z => x b +-- / \ / \ +-- x y y z +rotateSingleRight :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a +rotateSingleRight b (Node _ a x y) z = node a x (node b y z) +rotateSingleRight _ _ _ = error "rotateSingleRight called with empty left tree" + +-- a b +-- / \ / \ +-- x c a c +-- / \ => / \ / \ +-- b z x y1 y2 z +-- / \ +-- y1 y2 +rotateDoubleLeft :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a +rotateDoubleLeft a x (Node _ c (Node _ b y1 y2) z) = node b (node a x y1) (node c y2 z) +rotateDoubleLeft _ _ _ = error "rotateDoubleLeft called with too small right tree" + +-- c b +-- / \ / \ +-- a z a c +-- / \ => / \ / \ +-- x b x y1 y2 z +-- / \ +-- y1 y2 +rotateDoubleRight :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a +rotateDoubleRight c (Node _ a x (Node _ b y1 y2)) z = node b (node a x y1) (node c y2 z) +rotateDoubleRight _ _ _ = error "rotateDoubleRight called with too small left tree" + + +-- | Performs a single balancing act on a node. +balance1 :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a +balance1 a left right + -- Subtrees have only one element + | size left + size right < 2 = node a left right + -- Right subtree is too heavy + | size right > balanceBound * size left = + let Node _ _ rleft rright = right + sizeRL = size rleft + sizeRR = size rright + in if sizeRL < sizeRR then rotateSingleLeft a left right else rotateDoubleLeft a left right + -- Left subtree is too heavy + | size left > balanceBound * size right = + let Node _ _ lleft lright = left + sizeLL = size lleft + sizeLR = size lright + in if sizeLR < sizeLL then rotateSingleRight a left right else rotateDoubleRight a left right + -- No subtree is too heavy, we can just form a new tree straight away + | otherwise = node a left right + +addOnce :: Semigroup a => Element a -> Element a +addOnce e = let total = NE.head (multiplicity e) + in e { multiplicity = NE.cons (total <> value e) (multiplicity e) } + +delOnce :: Element a -> Maybe (Element a) +delOnce e = case snd (NE.uncons (multiplicity e)) of + Nothing -> Nothing + Just rest -> Just (e { multiplicity = rest }) + +delmin :: Monoid a => SumBag a -> (Element a, SumBag a) +delmin Leaf = error "delmin: Empty tree" +delmin (Node _ e Leaf right) = (e, right) +delmin (Node _ e left right) = (\left' -> balance1 e left' right) <$> delmin left + +balanceBound :: Int +balanceBound = 4 diff --git a/tests/Data/SumBagSpec.hs b/tests/Data/SumBagSpec.hs new file mode 100644 index 0000000000000000000000000000000000000000..6fb142b59d93cd9fc98c47936071e1e109a164c8 --- /dev/null +++ b/tests/Data/SumBagSpec.hs @@ -0,0 +1,75 @@ +module Data.SumBagSpec (spec) where + +import Test.Hspec + +import Data.Monoid ( Sum(..) ) +import Data.Coerce + +import qualified Data.SumBag as SumBag + +spec :: Spec +spec = do + insertSpec + deleteSpec + sumSpec + +insertSpec :: Spec +insertSpec = describe "insert" $ do + it "works with one element" $ do + SumBag.elem (si 1) (SumBag.insert (si 1) SumBag.empty) `shouldBe` True + + it "can handle 100 elements in order" $ + let bag = foldr SumBag.insert SumBag.empty (map si [1 .. 100]) + in and (map (flip SumBag.elem bag) (coerce @[Int] [1..100])) `shouldBe` True + + it "can handle 100 elements in reverse order" $ + let bag = foldr SumBag.insert SumBag.empty (reverse $ map si [1 .. 100]) + in and (map (flip SumBag.elem bag) (coerce @[Int] [1..100])) `shouldBe` True + + it "can handle 100 elements in strange order" $ + let bag = foldr SumBag.insert SumBag.empty (map si ([1 .. 50] ++ [100,99..51])) + in and (map (flip SumBag.elem bag) (coerce @[Int] [1..100])) `shouldBe` True + + it "works with duplicate elements" $ + let bag = iterate (SumBag.insert (si 1)) SumBag.empty !! 10 + in SumBag.toAscList bag `shouldBe` (replicate 10 (si 1)) + + it "is strict" $ + let bag = SumBag.insert @(Sum Int) (error "") SumBag.empty + in seq bag (return ()) `shouldThrow` anyErrorCall + + +deleteSpec :: Spec +deleteSpec = describe "delete" $ do + it "does nothing on the empty List" $ + SumBag.delete (si 1) SumBag.empty `shouldBe` SumBag.empty + + it "does nothing when the element is not there" $ + let bag = SumBag.fromList (coerce @[Int] [2..10]) + in SumBag.delete (si 1) bag `shouldBe` bag + + it "deletes a single element" $ + SumBag.delete (si 1) (SumBag.singleton (si 1)) `shouldBe` SumBag.empty + + it "deletes multiple different elements" $ + let bag = SumBag.fromList (coerce @[Int] [1..100]) + in foldr SumBag.delete bag (map si [1..50]) `shouldBe` SumBag.fromList (map si [51..100]) + + it "deletes multiple equal elements" $ + let bag = SumBag.fromList (replicate 100 (si 1)) + in foldr SumBag.delete bag (replicate 50 (si 1)) + `shouldBe` SumBag.fromList (replicate 50 (si 1)) + +sumSpec :: Spec +sumSpec = describe "sum" $ do + it "sums the empty bag to mempty" $ + SumBag.sum @(Sum Int) (SumBag.empty) `shouldBe` mempty + + it "computes the correct sum of [1..100]" $ + SumBag.sum (SumBag.fromList (map si [1..100])) `shouldBe` Sum 5050 + + it "computes the correct sum of one hunderd twos" $ + SumBag.sum (SumBag.fromList (replicate 100 (si 2))) `shouldBe` Sum 200 + +si :: Int -> Sum Int +si = Sum @Int