diff --git a/bench/BenchMain.hs b/bench/BenchMain.hs
index 37c808a2c1e594c6966c377bf9a7823d2bb5ddf6..77d515189bcab5104e2425203c5cb28cb1ca9a8b 100644
--- a/bench/BenchMain.hs
+++ b/bench/BenchMain.hs
@@ -7,6 +7,7 @@ import qualified MA.Functors.BenchMonoidValued
 import qualified MA.Parser.BenchLexer
 import qualified Data.List.BenchUtils
 import qualified MA.Algorithm.BenchInitialize
+import qualified Data.BenchRefinablePartition
 
 main :: IO ()
 main = defaultMain
@@ -15,4 +16,5 @@ main = defaultMain
   , MA.Parser.BenchLexer.benchmarks
   , Data.List.BenchUtils.benchmarks
   , MA.Algorithm.BenchInitialize.benchmarks
+  , Data.BenchRefinablePartition.benchmarks
   ]
diff --git a/bench/Data/BenchRefinablePartition.hs b/bench/Data/BenchRefinablePartition.hs
new file mode 100644
index 0000000000000000000000000000000000000000..5e3602a97f10e37f9799d3c4e3099467c57e9ed0
--- /dev/null
+++ b/bench/Data/BenchRefinablePartition.hs
@@ -0,0 +1,88 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+module Data.BenchRefinablePartition (benchmarks) where
+
+import           Criterion
+
+import           Control.Monad.ST
+
+import           Control.DeepSeq
+
+import           Data.RefinablePartition
+
+benchmarks :: Benchmark
+benchmarks = bgroup "Data.RefinablePartition"
+                    [benchGroupBy, benchMake, benchMake1, benchMark]
+
+benchGroupBy :: Benchmark
+benchGroupBy = bgroup
+  "groupBy"
+  [ bench "10 states, already sorted"
+    $ withInit (make 10 1 (const 0)) (\p -> groupBy p 0 id)
+  , bench "20 states, already sorted"
+    $ withInit (make 20 1 (const 0)) (\p -> groupBy p 0 id)
+  , bench "100 states, already sorted"
+    $ withInit (make 100 1 (const 0)) (\p -> groupBy p 0 id)
+  , bench "1000 states, already sorted"
+    $ withInit (make 1000 1 (const 0)) (\p -> groupBy p 0 id)
+  , bench "10 states, reversed"
+    $ withInit (make 10 1 (const 0)) (\p -> groupBy p 0 negate)
+  , bench "20 states, reversed"
+    $ withInit (make 20 1 (const 0)) (\p -> groupBy p 0 negate)
+  , bench "100 states, reversed"
+    $ withInit (make 100 1 (const 0)) (\p -> groupBy p 0 negate)
+  , bench "1000 states, reversed"
+    $ withInit (make 1000 1 (const 0)) (\p -> groupBy p 0 negate)
+  ]
+
+
+benchMake :: Benchmark
+benchMake = bgroup
+  "make"
+  [ bench "1 block, 10 states" $ whnfIO (stToIO (make 10 1 (const 0)))
+  , bench "1 block, 20 states" $ whnfIO (stToIO (make 20 1 (const 0)))
+  , bench "1 block, 100 states" $ whnfIO (stToIO (make 100 1 (const 0)))
+  , bench "2 blocks, 10 states" $ whnfIO (stToIO (make 10 2 (bmod 2)))
+  , bench "2 blocks, 20 states" $ whnfIO (stToIO (make 20 2 (bmod 2)))
+  , bench "2 blocks, 100 states" $ whnfIO (stToIO (make 100 2 (bmod 2)))
+  , bench "10 blocks, 10 states" $ whnfIO (stToIO (make 10 10 Block))
+  , bench "20 blocks, 20 states" $ whnfIO (stToIO (make 20 20 Block))
+  , bench "100 blocks, 100 states" $ whnfIO (stToIO (make 100 100 Block))
+  ]
+  where bmod n = Block . (`mod` n)
+
+
+benchMake1 :: Benchmark
+benchMake1 = bgroup
+  "make1"
+  [ bench "10 states" $ whnfIO (stToIO (make1 10))
+  , bench "20 states" $ whnfIO (stToIO (make1 20))
+  , bench "100 states" $ whnfIO (stToIO (make1 100))
+  ]
+
+
+-- hehe
+benchMark :: Benchmark
+benchMark = bgroup
+  "mark"
+  [ bench "0 states" $ withInit (make1 100) (\_ -> return ())
+  , bench "10 states" $ withInit (make1 100) (\p -> mapM_ (mark p) [0 .. 9])
+  , bench "20 states" $ withInit (make1 100) (\p -> mapM_ (mark p) [0 .. 19])
+  , bench "100 states" $ withInit (make1 100) (\p -> mapM_ (mark p) [0 .. 99])
+  , bench "10 states, reverse"
+    $ withInit (make1 100) (\p -> mapM_ (mark p) [9, 8 .. 0])
+  , bench "20 states, reverse"
+    $ withInit (make1 100) (\p -> mapM_ (mark p) [19, 18 .. 0])
+  , bench "100 states, reverse"
+    $ withInit (make1 100) (\p -> mapM_ (mark p) [99, 98 .. 0])
+  ]
+
+instance NFData (RefinablePartition RealWorld) where
+  rnf p = seq p ()
+
+withInit
+  :: NFData a
+  => ST RealWorld (RefinablePartition RealWorld)
+  -> (RefinablePartition RealWorld -> ST RealWorld a)
+  -> Benchmarkable
+withInit initialize action = perRunEnv (stToIO initialize) (stToIO . action)
diff --git a/examples/pp-non-zippable.out b/examples/pp-non-zippable.out
index 2f4d531bb60b7789d63d41a7e2cb9e859f2ca779..6be7d0ac04f581273e9676ea8226e315d1d3122e 100644
--- a/examples/pp-non-zippable.out
+++ b/examples/pp-non-zippable.out
@@ -1,6 +1,6 @@
-Block 0: a1
+Block 0: b1
 Block 1: a2, a7, b2, b6
-Block 2: b1
+Block 2: a1
 Block 3: a4, a6, b4, b7
-Block 4: a5, b3
-Block 5: a3, b5
+Block 4: a3, b5
+Block 5: a5, b3
diff --git a/ma.cabal b/ma.cabal
index 9c45d96e6694fd438b7c1d02be878193f88b520e..7f72f5992ef891548a6411bda51ecee3434d89dd 100644
--- a/ma.cabal
+++ b/ma.cabal
@@ -94,6 +94,7 @@ library
                      , megaparsec >= 7 && <8
                      , deriving-compat
                      , ieee754
+                     , vector-th-unbox >= 0.2 && <0.3
   ghc-options:         -Wall -Wno-name-shadowing
   if flag(release)
     cpp-options:       -DRELEASE
@@ -198,6 +199,7 @@ benchmark bench
                      , MA.Parser.BenchLexer
                      , Data.List.BenchUtils
                      , MA.Algorithm.BenchInitialize
+                     , Data.BenchRefinablePartition
   default-extensions:  GADTs
                      , StandaloneDeriving
                      , DeriveFunctor
diff --git a/src/Data/Partition/Common.hs b/src/Data/Partition/Common.hs
index 5bc158d1ff8398d2c306f9ebf35a4a3069d4eafe..776a4fdfa546240424bdaaf02a63e7333773aa00 100644
--- a/src/Data/Partition/Common.hs
+++ b/src/Data/Partition/Common.hs
@@ -1,4 +1,6 @@
+{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE TemplateHaskell #-}
 
 -- | Type definitions used by both refinable and immutable partitions.
 module Data.Partition.Common
@@ -6,7 +8,10 @@ module Data.Partition.Common
   , Block(..)
   ) where
 
+import           Unsafe.Coerce
+
 import           Control.DeepSeq (NFData)
+import           Data.Vector.Unboxed.Deriving
 
 import           Data.MorphismEncoding (State)
 
@@ -18,3 +23,8 @@ newtype Block = Block { fromBlock :: Int }
 
 instance Show Block where
   show (Block b) = show b
+
+derivingUnbox "Block"
+  [t| Block -> Int |]
+  [| unsafeCoerce |]
+  [| unsafeCoerce |]
diff --git a/src/Data/RefinablePartition.hs b/src/Data/RefinablePartition.hs
index 555be03e82605ebb10ce8aff1ebad6860f0ad10d..917f42016d0c762a619eb40a3f03af9b72029c8f 100644
--- a/src/Data/RefinablePartition.hs
+++ b/src/Data/RefinablePartition.hs
@@ -1,6 +1,6 @@
+{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE StrictData #-}
 {-# LANGUAGE TemplateHaskell #-}
 {-# LANGUAGE MagicHash #-}
 
@@ -11,6 +11,7 @@ module Data.RefinablePartition
   , Block(..)
   -- * Construction
   , make
+  , make1
   -- * Accessors
   , numBlocks
   , blockSize
@@ -39,12 +40,13 @@ import           Data.Ord (comparing)
 import           Data.Primitive.MutVar
 import qualified Data.Vector as V
 import qualified Data.Vector.Algorithms.Heap as VM
-import           Data.Vector.Mutable (MVector)
 import qualified Data.Vector.Mutable as VM
 import qualified Data.Vector.Unboxed.Mutable as VU
+import qualified Data.Vector.Unboxed as VUU
 import qualified Data.Vector.Unboxed as VU (freeze, unsafeFreeze, Vector)
 import           Lens.Micro
 import           Lens.Micro.TH
+import           Data.Vector.Unboxed.Deriving
 
 import           Data.Vector.Utils (iforM_)
 import qualified Data.Vector.Unboxed.Mutable.Utils as VU
@@ -53,19 +55,28 @@ import           Data.Partition (Partition)
 import qualified Data.Partition as Partition
 
 data StateRepr = StateRepr
-  { _block :: {-# UNPACK #-} Block
-  , _location :: {-# UNPACK #-} Int
+  { _block :: Block
+  , _location :: Int
   } deriving (Show)
 
 makeLenses ''StateRepr
 
+derivingUnbox "StateRepr"
+  [t| StateRepr -> (Block,Int) |]
+  [| \(StateRepr b l) -> (b, l) |]
+  [| \(b, l) -> StateRepr b l |]
+
 data BlockRepr = BlockRepr
-  { _startOffset :: {-# UNPACK #-} Int
-  , _endOffset :: {-# UNPACK #-} Int -- exclusive
-  , _unmarkedOffset :: {-# UNPACK #-} Int
+  { _startOffset :: Int
+  , _endOffset :: Int -- exclusive
+  , _unmarkedOffset :: Int
   } deriving (Show)
 
 makeLenses ''BlockRepr
+derivingUnbox "BlockRepr"
+  [t| BlockRepr -> (Int, Int, Int) |]
+  [| \(BlockRepr s e u) -> (s, e, u) |]
+  [| \(s, e, u) -> (BlockRepr s e u) |]
 
 -- | Refinable partition type.
 --
@@ -75,14 +86,15 @@ makeLenses ''BlockRepr
 -- This type is by nature mutable and can be mutated with the operations in this
 -- module. The `s` type variable is there to support the ST monad.
 data RefinablePartition s = Partition
-  { _blockCount :: MutVar s Int
-  , _statesByBlock :: VU.MVector s State
-  , _states :: MVector s StateRepr
-  , _blocks :: MVector s BlockRepr
+  { _blockCount :: !(MutVar s Int)
+  , _statesByBlock :: !(VU.MVector s State)
+  , _states :: !(VU.MVector s StateRepr)
+  , _blocks :: !(VU.MVector s BlockRepr)
   }
 
 makeLenses ''RefinablePartition
 
+
 -- | Create a mutable refinable partition.
 make :: Int -- ^ Number of states n
      -> Int -- ^ Number of initial blocks m
@@ -96,10 +108,10 @@ make numStates numBlocks initPart
   | otherwise = do
 
   statesByBlock <- VU.new numStates
-  states <- VM.new numStates
+  states <- VU.new numStates
   -- we need to reserve space for more blocks, to allow splitting to create them.
   -- There can be at most as many blocks as there are states
-  blocks <- VM.new numStates
+  blocks <- VU.new numStates
   blockCount <- newMutVar numBlocks
 
   -- contains a list of states for each block
@@ -119,13 +131,13 @@ make numStates numBlocks initPart
       stateLocation <- readMutVar currentLocation
       modifyMutVar currentLocation (+1)
       VU.write statesByBlock stateLocation s
-      VM.write states s StateRepr { _block = Block i
+      VU.write states s StateRepr { _block = Block i
                                   , _location = stateLocation
                                   }
 
     endOfBlock <- readMutVar currentLocation
 
-    VM.write blocks i BlockRepr { _startOffset = beginningOfBlock
+    VU.write blocks i BlockRepr { _startOffset = beginningOfBlock
                                 , _endOffset = endOfBlock
                                 , _unmarkedOffset = beginningOfBlock
                                 }
@@ -137,6 +149,38 @@ make numStates numBlocks initPart
                    , _blocks = blocks
                    }
 
+
+-- | Create a new mutable refinable partition with one initial block.
+--
+-- This is a special case of 'make', that fixes the initial block count to 1 is
+-- slightly faster because of that.
+make1
+  :: Int -- ^ Number of states n
+  -> ST s (RefinablePartition s)
+make1 numStates
+  | numStates < 1 = error "RefinablePartition.make1: More blocks than states"
+  | otherwise = do
+
+    statesByBlock <- VUU.thaw (VUU.generate numStates id)
+    blockCount    <- newMutVar 1
+
+    states        <- VUU.thaw (VUU.generate numStates (StateRepr 0))
+
+    blocks        <- VU.new numStates
+    VU.write blocks 0
+      $ BlockRepr
+          { _startOffset    = 0
+          , _endOffset      = numStates
+          , _unmarkedOffset = 0
+          }
+
+    return Partition { _blockCount    = blockCount
+                     , _statesByBlock = statesByBlock
+                     , _states        = states
+                     , _blocks        = blocks
+                     }
+
+
 -- | Return number of blocks in this partition.
 --
 -- Runtime: O(1)
@@ -372,8 +416,8 @@ splitByM !partition !b !predicate = do
 
 -- | Split a block into new blocks according to some atttribute of its states.
 --
--- The result is maximally coarse list of blocks, such all states in a new block
--- have the same value for the given attribute.
+-- The result is maximally coarse list of blocks, such that all states in a new
+-- block have the same value for the given attribute.
 --
 -- One of the blocks inherits the identity of the old block.
 --
@@ -395,14 +439,11 @@ groupBy partition b predicate = do
   let splitAt (currentBlock,newBlocks) index = do
         setBlock partition currentBlock $ unmarkedOffset .~ index
         (Just previousBlock, Just nextBlock) <- splitMarked partition currentBlock
-        return (nextBlock, newBlocks++[previousBlock])
+        return (nextBlock, previousBlock:newBlocks)
 
   (last,blocks) <- foldM splitAt (b, []) indices
 
-  -- unless (null indices) $
-  --   setBlock partition b $ endOffset .~ head indices
-
-  return (blocks ++ [last])
+  return (last:blocks)
 
 -- | Freeze the current refinable partition into an immutable one.
 --
@@ -417,16 +458,16 @@ freeze partition = do
 
 -- helpers
 getBlock :: RefinablePartition s -> Block -> ST s BlockRepr
-getBlock !partition (Block b) = VM.unsafeRead (partition ^. blocks) b
+getBlock !partition (Block b) = VU.unsafeRead (partition ^. blocks) b
 
 setBlock :: RefinablePartition s -> Block -> (BlockRepr -> BlockRepr) -> ST s ()
-setBlock partition (Block b) setter = VM.unsafeModify (_blocks partition) setter b
+setBlock partition (Block b) setter = VU.unsafeModify (_blocks partition) setter b
 
 getState :: RefinablePartition s -> State -> ST s StateRepr
-getState partition s = VM.unsafeRead (partition^.states) s
+getState partition s = VU.unsafeRead (partition^.states) s
 
 setState :: RefinablePartition s -> State -> (StateRepr -> StateRepr) -> ST s ()
-setState partition s setter = VM.modify (partition^.states) setter s
+setState partition s setter = VU.modify (partition^.states) setter s
 
 setStateAt :: RefinablePartition s -> Int -> (StateRepr -> StateRepr) -> ST s ()
 setStateAt partition loc setter = VU.read (partition^.statesByBlock) loc >>= \state ->
diff --git a/src/MA/Algorithm/Initialize.hs b/src/MA/Algorithm/Initialize.hs
index 55a30fcae47219ca14496d91db22793e233b09f7..36e77e82c3e649062bdcd44c3e0ec0775659218c 100644
--- a/src/MA/Algorithm/Initialize.hs
+++ b/src/MA/Algorithm/Initialize.hs
@@ -45,7 +45,7 @@ initialize encoding = do
   pred <- V.unsafeFreeze predMutable
 
   -- Initialize partition with one block and assigning each state to that block
-  partition <- Partition.make (size encoding) 1 (const 0)
+  partition <- Partition.make1 (size encoding)
   -- immediately group according by type
   blocks <- Partition.groupBy partition 0 (typeOf encoding)
 
diff --git a/src/MA/Algorithm/Split.hs b/src/MA/Algorithm/Split.hs
index 7662af34637a1977959b14e99318af509be3564a..3fbdd9fa24b9728759eec9675f2a600d7193fd27 100644
--- a/src/MA/Algorithm/Split.hs
+++ b/src/MA/Algorithm/Split.hs
@@ -161,14 +161,15 @@ collectTouchedBlocks blockS = do
     let Edge x _ _ = graph (encoding as) e
     b <- Partition.blockOfState (partition as) x
 
-    unlessM (Partition.hasMarked (partition as) b) $ do
-      wCx <- readSTRef =<< VM.read (lastW as) (fromEdgeRef e)
-      let v0 = snd3 $ RI.update @h [] wCx
-      modifySTRef markedBlocks ((b, v0):)
+    unlessM ((==1) <$> Partition.blockSize (partition as) b) $ do
+      unlessM (Partition.hasMarked (partition as) b) $ do
+        wCx <- readSTRef =<< VM.read (lastW as) (fromEdgeRef e)
+        let v0 = snd3 $ RI.update @h [] wCx
+        modifySTRef markedBlocks ((b, v0):)
 
-    whenM (null <$> VM.read (toSub as) x) $
-      Partition.mark (partition as) x
+      whenM (null <$> VM.read (toSub as) x) $
+        Partition.mark (partition as) x
 
-    VM.modify (toSub as) (e:) x
+      VM.modify (toSub as) (e:) x
 
   lift $ readSTRef markedBlocks
diff --git a/tests/MA/Algorithm/SplitSpec.hs b/tests/MA/Algorithm/SplitSpec.hs
index 4bd366b5d897e7cfbe93bf866afcb47ed1a4903c..74ae02ed653c378d5222d64485c5d2d50cab2adc 100644
--- a/tests/MA/Algorithm/SplitSpec.hs
+++ b/tests/MA/Algorithm/SplitSpec.hs
@@ -46,7 +46,7 @@ collectTouchedBlocksSpec = describe "collectTouchedBlocks" $ do
       `shouldBe` []
 
   it "returns the correct block when predecessors exist" $ do
-    withState @Powerset (enc [True, False] [(0, (), 1)])
+    withState @Powerset (enc [True, True, False] [(0, (), 2), (1, (), 2)])
                         (map fst <$> collectTouchedBlocks (Block 1))
       `shouldBe` [Block 0]
 
@@ -159,13 +159,6 @@ updateBlockSpec = describe "updateBlock" $ do
 
 splitBlockSpec :: Spec
 splitBlockSpec = describe "splitBlock" $ do
-  it "handles the simple case of a one-element block"
-    $ let res = withState @Powerset (enc [True, False] [(0, (), 1)]) $ do
-            [(b, v0)] <- collectTouchedBlocks (Block 1)
-            updateBlock b v0
-            splitBlock b
-      in  res `shouldBe` [0]
-
   it "splits blocks into marked and unmaked"
     $ let
         res =
@@ -274,7 +267,7 @@ splitSpec = describe "split" $ do
                             ([0] `elem` l)
                               && ([1] `elem` l)
                               && ([2] `elem` l)
-                              && ([5, 6] `elem` l)
+                              && ([5, 6] `elem` (map sort l))
                               && (  ([3] `elem` l && not ([4] `elem` l))
                                  || ([4] `elem` l && not ([3] `elem` l))
                                  )