Skip to content
Snippets Groups Projects
Commit c79e6a75 authored by Hans-Peter Deifel's avatar Hans-Peter Deifel
Browse files

Merge branch 'sumbag'

parents 2ccb7ef8 733171e8
Branches
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ import qualified Data.List.BenchUtils
import qualified Copar.Algorithm.BenchInitialize
import qualified Data.BenchRefinablePartition
import qualified Data.BenchBlockQueue
import qualified Data.BenchSumBag
main :: IO ()
main = defaultMain
......@@ -19,4 +20,5 @@ main = defaultMain
, Copar.Algorithm.BenchInitialize.benchmarks
, Data.BenchRefinablePartition.benchmarks
, Data.BenchBlockQueue.benchmarks
, Data.BenchSumBag.benchmarks
]
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.BenchSumBag (benchmarks) where
import Criterion
import Data.Monoid
import Data.Coerce
import Control.DeepSeq
import qualified Data.SumBag as SumBag
import Data.SumBag (SumBag)
benchmarks :: Benchmark
benchmarks = bgroup "Data.SumBag" [benchInsert, benchDelete, benchEq]
benchInsert :: Benchmark
benchInsert = bgroup "insert" $
let mkBench n = withBag (range 1 n) $ \(~bag) ->
bench (show n ++ " elements") $ whnf (SumBag.insert 0) bag
in map mkBench [1000,2000..10000]
benchDelete :: Benchmark
benchDelete = bgroup "delete" $
let mkBench n = withBag (range 1 n) $ \(~bag) ->
bench (show n ++ " elements") $ whnf (SumBag.delete 1) bag
in map mkBench [1000,2000..10000]
benchEq :: Benchmark
benchEq = bgroup "eq" $
let mkBench n =
withBag (range 1 n) $ \(~bag) ->
withBag (reverse $ range 1 n) $ \(~revbag) ->
bench (show n ++ " elements") $ whnf (uncurry (==)) (bag, revbag)
in map mkBench [1000,2000..10000]
range :: Int -> Int -> [Sum Int]
range a b = coerce [a..b]
withBag :: [Sum Int] -> (SumBag (Sum Int) -> Benchmark) -> Benchmark
withBag input = env setupEnv
where setupEnv = do
let !bag = SumBag.fromList input
return bag
instance NFData (SumBag a) where
rnf a = seq a ()
......@@ -33,6 +33,7 @@ library
, Data.Float.Utils
, Data.List.Utils
, Data.Text.Prettyprint
, Data.SumBag
, Copar.RefinementInterface
, Copar.Functors
, Copar.FunctorDescription
......@@ -135,6 +136,7 @@ test-suite spec
, Data.OpenUnionSpec
, Data.List.UtilsSpec
, Data.Float.UtilsSpec
, Data.SumBagSpec
, Copar.Functors.PowersetSpec
, Copar.Functors.GroupValuedSpec
, Copar.Functors.BagSpec
......@@ -210,6 +212,7 @@ benchmark bench
, Copar.Algorithm.BenchInitialize
, Data.BenchRefinablePartition
, Data.BenchBlockQueue
, Data.BenchSumBag
default-extensions: GADTs
, StandaloneDeriving
, DeriveFunctor
......
......@@ -23,7 +23,6 @@ import Control.Monad
import Data.Foldable
import qualified Data.Vector as V
import qualified Data.Map.Strict as M
import Text.Megaparsec
import qualified Data.Text.Prettyprint as Doc
import Data.Text.Prettyprint ((<+>))
......@@ -36,6 +35,8 @@ import Copar.FunctorExpression.Parser
import Copar.Coalgebra.Parser
import Data.Float.Utils ( MaxDouble(..) )
import Copar.Parser.Types
import Data.SumBag (SumBag)
import qualified Data.SumBag as SumBag
data SlowMonoidValued m a = SlowMonoidValued a
......@@ -121,10 +122,8 @@ realHelp =
<> Doc.annotate Doc.bold "Coalgebra syntax:"
<+> Doc.reflow "'{' X ':' real, ... '}'"
type LabelCountMap m = M.Map m Int
type instance Label (SlowMonoidValued m) = m
type instance Weight (SlowMonoidValued m) = (m, LabelCountMap m)
type instance Weight (SlowMonoidValued m) = (m, SumBag m)
type instance F1 (SlowMonoidValued m) = m
type instance F3 (SlowMonoidValued m) = (m, m, m)
......@@ -134,7 +133,7 @@ instance (Monoid m, Ord m) => RefinementInterface (SlowMonoidValued m) where
-> [Label (SlowMonoidValued m)]
-> Weight (SlowMonoidValued m)
init _ labels =
(mempty, foldl' (\m l -> M.insertWith (+) l 1 m) M.empty labels)
(mempty, foldl' (flip SumBag.insert) SumBag.empty labels)
update
:: [Label (SlowMonoidValued m)]
......@@ -144,21 +143,16 @@ instance (Monoid m, Ord m) => RefinementInterface (SlowMonoidValued m) where
, Weight (SlowMonoidValued m)
)
update labels (sumRest, counts) =
let toS = foldl' (\m l -> M.insertWith (+) l 1 m) M.empty labels
toCWithoutS = foldl' (flip (M.adjust pred)) counts labels
sumS = sumCounts toS
sumCWithoutS = sumCounts toS
let toS = foldl' (flip SumBag.insert) SumBag.empty labels
toCWithoutS = foldl' (flip SumBag.delete) counts labels
sumS = fold toS
sumCWithoutS = fold toS
f3 = (sumRest, sumCWithoutS, sumS)
w1 = (sumRest <> sumCWithoutS, toS)
w2 = (sumRest <> sumS, toCWithoutS)
in (w1, f3, w2)
sumCounts :: Monoid m => LabelCountMap m -> m
sumCounts = M.foldlWithKey' (\a x -> (<> a) . multiply x) mempty
where multiply x n = mconcat (replicate n x)
instance ParseMorphism (SlowMonoidValued (Max Int)) where
parseMorphismPoint (SlowMonoidValued inner) =
parseMorphismPointHelper inner (Max <$> (L.signed L.decimal))
......
{-# LANGUAGE RoleAnnotations #-}
{-# LANGUAGE StrictData #-}
-- | == Multisets with constant-time fold.
--
-- This implements a generic (multi-)set data structure for __commutative__
-- monoids with O(1) fold over the elements. Similar restrictions as for
-- "Data.Set" apply. In particular, the number of /different/ elements in the
-- Multiset must not exceed @maxBound::Int@. If this condition is violated, the
-- behaviour is undefined.
--
-- == Implementation
--
-- Internally, a running total is kept and updated each time an element is
-- inserted or deleted. The implementation is derivided from
--
-- * Stephen Adams, \"/Efficient sets: a balancing act/\",
-- Journal of Functional Programming 3(4):553-562, October 1993,
-- <http://www.swiss.ai.mit.edu/~adams/BB/>,
--
-- with the addition of the monoidal running total.
module Data.SumBag
( SumBag
, empty
, singleton
, sum
, insert
, delete
, elem
, toAscList
, fromList
) where
import Prelude hiding (sum, min, elem)
import Data.Foldable hiding (sum,elem)
import qualified Data.List.NonEmpty as NE
-- | A multiset of value @a@.
data SumBag a = Leaf | Node (MetaData a) (Element a) (SumBag a) (SumBag a)
deriving (Show)
type role SumBag nominal
instance (Ord a, Eq a) => Eq (SumBag a) where
x == y = toAscList x == toAscList y
-- TODO There are a few functions from Foldable that can be implemented way more
-- efficiently.
--
-- Notably 'minimum' and 'maximum', but also explicit recursion instead of
-- conversions to lists in a lot of cases.
instance Foldable SumBag where
foldMap f = foldMap f . toAscList
{-# INLINE foldMap #-}
fold = sum
{-# INLINE fold #-}
toList = toAscList
{-# INLINE toList #-}
data MetaData a = MetaData
{ nodeSize :: Int
, nodeSum :: a
}
deriving (Show)
data Element a = Element
{ value :: a
, multiplicity :: NE.NonEmpty a
}
deriving (Show)
-- | The empty set.
--
-- Running time: O(1)
empty :: SumBag a
empty = Leaf
-- | Constructs a set with a single element @a@.
singleton :: Monoid a => a -> SumBag a
singleton a =
node (Element a (NE.fromList [a])) Leaf Leaf
-- | Returns the number of nodes in the internal tree. This doesn't count
-- duplicate elements and only returns the size of the internal tree.
--
-- Running time: O(1)
size :: SumBag a -> Int
size Leaf = 0
size (Node node _ _ _) = nodeSize node
-- | Compute the sum of all elements with their '<>' implementation. This is
-- also called 'mconcat' and 'fold' for other containers.
--
-- Note that for the implementation to work, the Monoid @a@ has to be
-- commutative.
--
-- Running time: O(1), since this value is cached internally.
sum :: Monoid a => SumBag a -> a
sum Leaf = mempty
sum (Node node _ _ _) = nodeSum node
-- | Inserts an element into the set.
--
-- Running time: O(log n)
insert :: (Ord a, Monoid a) => a -> SumBag a -> SumBag a
insert a Leaf = node (Element a (NE.fromList [a])) Leaf Leaf
insert a (Node _ e left right)
| a < value e = balance1 e (insert a left) right
| a > value e = balance1 e left (insert a right)
| otherwise = node (addOnce e) left right
-- | Tests membership of an element.
--
-- Running time: O(log n)
elem :: (Ord a) => a -> SumBag a -> Bool
elem _ Leaf = False
elem a (Node _ e left right)
| a < value e = elem a left
| a > value e = elem a right
| otherwise = True
-- | Delete an element from the set. If this element is not present, the
-- original set is returned unmodified.
--
-- Running time: O(log n)
delete :: (Ord a, Monoid a) => a -> SumBag a -> SumBag a
delete _ Leaf = Leaf
delete a (Node _ e left right)
| a < value e = balance1 e (delete a left) right
| a > value e = balance1 e left (delete a right)
| Just e' <- delOnce e = node e' left right
| otherwise = helper left right
where helper Leaf right = right
helper left Leaf = left
helper left right =
let (min, rest) = delmin right
in balance1 min left rest
-- | Returns a list of all elements in the set in ascending order. Duplicate
-- elements are correctly returned multiple times.
--
-- Running time: O(n)
toAscList :: SumBag a -> [a]
toAscList bag = helper bag []
where helper Leaf accu = accu
helper (Node _ e left right) accu =
helper left (mkList e ++ helper right accu)
mkList (Element val mult) = map (const val) (NE.toList mult)
-- | Return a multiset of all elements of a list.
--
-- Running time: O(n · log n)
fromList :: (Ord a, Monoid a) => [a] -> SumBag a
fromList = foldr insert empty
-- Internal functions
-- | "Smart" constructor for Node, will compute the meta data from its subtrees.
node :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
node a left right =
let nodeData = MetaData
{ nodeSize = size left + 1 + size right
, nodeSum = NE.head (multiplicity a) <> sum left <> sum right
}
in Node nodeData a left right
-- a b
-- / \ / \
-- x b => a z
-- / \ / \
-- y z x y
rotateSingleLeft :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
rotateSingleLeft a x (Node _ b y z) = node b (node a x y) z
rotateSingleLeft _ _ _ = error "rotateSingleLeft called with empty right tree"
-- b a
-- / \ / \
-- a z => x b
-- / \ / \
-- x y y z
rotateSingleRight :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
rotateSingleRight b (Node _ a x y) z = node a x (node b y z)
rotateSingleRight _ _ _ = error "rotateSingleRight called with empty left tree"
-- a b
-- / \ / \
-- x c a c
-- / \ => / \ / \
-- b z x y1 y2 z
-- / \
-- y1 y2
rotateDoubleLeft :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
rotateDoubleLeft a x (Node _ c (Node _ b y1 y2) z) = node b (node a x y1) (node c y2 z)
rotateDoubleLeft _ _ _ = error "rotateDoubleLeft called with too small right tree"
-- c b
-- / \ / \
-- a z a c
-- / \ => / \ / \
-- x b x y1 y2 z
-- / \
-- y1 y2
rotateDoubleRight :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
rotateDoubleRight c (Node _ a x (Node _ b y1 y2)) z = node b (node a x y1) (node c y2 z)
rotateDoubleRight _ _ _ = error "rotateDoubleRight called with too small left tree"
-- | Performs a single balancing act on a node.
balance1 :: Monoid a => Element a -> SumBag a -> SumBag a -> SumBag a
balance1 a left right
-- Subtrees have only one element
| size left + size right < 2 = node a left right
-- Right subtree is too heavy
| size right > balanceBound * size left =
let Node _ _ rleft rright = right
sizeRL = size rleft
sizeRR = size rright
in if sizeRL < sizeRR then rotateSingleLeft a left right else rotateDoubleLeft a left right
-- Left subtree is too heavy
| size left > balanceBound * size right =
let Node _ _ lleft lright = left
sizeLL = size lleft
sizeLR = size lright
in if sizeLR < sizeLL then rotateSingleRight a left right else rotateDoubleRight a left right
-- No subtree is too heavy, we can just form a new tree straight away
| otherwise = node a left right
addOnce :: Semigroup a => Element a -> Element a
addOnce e = let total = NE.head (multiplicity e)
in e { multiplicity = NE.cons (total <> value e) (multiplicity e) }
delOnce :: Element a -> Maybe (Element a)
delOnce e = case snd (NE.uncons (multiplicity e)) of
Nothing -> Nothing
Just rest -> Just (e { multiplicity = rest })
delmin :: Monoid a => SumBag a -> (Element a, SumBag a)
delmin Leaf = error "delmin: Empty tree"
delmin (Node _ e Leaf right) = (e, right)
delmin (Node _ e left right) = (\left' -> balance1 e left' right) <$> delmin left
balanceBound :: Int
balanceBound = 4
module Data.SumBagSpec (spec) where
import Test.Hspec
import Data.Monoid ( Sum(..) )
import Data.Coerce
import qualified Data.SumBag as SumBag
spec :: Spec
spec = do
insertSpec
deleteSpec
sumSpec
insertSpec :: Spec
insertSpec = describe "insert" $ do
it "works with one element" $ do
SumBag.elem (si 1) (SumBag.insert (si 1) SumBag.empty) `shouldBe` True
it "can handle 100 elements in order" $
let bag = foldr SumBag.insert SumBag.empty (map si [1 .. 100])
in and (map (flip SumBag.elem bag) (coerce @[Int] [1..100])) `shouldBe` True
it "can handle 100 elements in reverse order" $
let bag = foldr SumBag.insert SumBag.empty (reverse $ map si [1 .. 100])
in and (map (flip SumBag.elem bag) (coerce @[Int] [1..100])) `shouldBe` True
it "can handle 100 elements in strange order" $
let bag = foldr SumBag.insert SumBag.empty (map si ([1 .. 50] ++ [100,99..51]))
in and (map (flip SumBag.elem bag) (coerce @[Int] [1..100])) `shouldBe` True
it "works with duplicate elements" $
let bag = iterate (SumBag.insert (si 1)) SumBag.empty !! 10
in SumBag.toAscList bag `shouldBe` (replicate 10 (si 1))
it "is strict" $
let bag = SumBag.insert @(Sum Int) (error "") SumBag.empty
in seq bag (return ()) `shouldThrow` anyErrorCall
deleteSpec :: Spec
deleteSpec = describe "delete" $ do
it "does nothing on the empty List" $
SumBag.delete (si 1) SumBag.empty `shouldBe` SumBag.empty
it "does nothing when the element is not there" $
let bag = SumBag.fromList (coerce @[Int] [2..10])
in SumBag.delete (si 1) bag `shouldBe` bag
it "deletes a single element" $
SumBag.delete (si 1) (SumBag.singleton (si 1)) `shouldBe` SumBag.empty
it "deletes multiple different elements" $
let bag = SumBag.fromList (coerce @[Int] [1..100])
in foldr SumBag.delete bag (map si [1..50]) `shouldBe` SumBag.fromList (map si [51..100])
it "deletes multiple equal elements" $
let bag = SumBag.fromList (replicate 100 (si 1))
in foldr SumBag.delete bag (replicate 50 (si 1))
`shouldBe` SumBag.fromList (replicate 50 (si 1))
sumSpec :: Spec
sumSpec = describe "sum" $ do
it "sums the empty bag to mempty" $
SumBag.sum @(Sum Int) (SumBag.empty) `shouldBe` mempty
it "computes the correct sum of [1..100]" $
SumBag.sum (SumBag.fromList (map si [1..100])) `shouldBe` Sum 5050
it "computes the correct sum of one hunderd twos" $
SumBag.sum (SumBag.fromList (replicate 100 (si 2))) `shouldBe` Sum 200
si :: Int -> Sum Int
si = Sum @Int
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment