From f3fe815cfe26ee25e1c8f460fca247ca75fe22bb Mon Sep 17 00:00:00 2001 From: effectfully Date: Tue, 7 Jan 2025 06:47:56 +0100 Subject: [PATCH] [Evaluation] Statically unroll 'itraverseCounter_' (#5265) Results in a statically unrolled loop: ```haskell let! { (# ipv10_X1x, ipv11_X1y #) ~ <- $wf_svLt 0# (ipv8_itGb `cast` ) } in let! { (# ipv12_X1B, ipv13_X1C #) ~ <- $wf_svLt 1# ipv10_X1x } in let! { (# ipv14_X1F, ipv15_X1G #) ~ <- $wf_svLt 2# ipv12_X1B } in let! { (# ipv16_X1U, ipv17_X1V #) ~ <- $wf_svLt 3# ipv14_X1F } in let! { (# ipv18_X1X, ipv19_X1Y #) ~ <- $wf_svLt 4# ipv16_X1U } in let! { (# ipv20_X20, ipv21_X21 #) ~ <- $wf_svLt 5# ipv18_X1X } in let! { (# ipv22_X23, ipv23_X24 #) ~ <- $wf_svLt 6# ipv20_X20 } in let! { (# ipv24_X29, ipv25_X2a #) ~ <- $wf_svLt 7# ipv22_X23 } in let! { (# ipv26_X2f, ipv27_X2g #) ~ <- $wf_svLt 8# ipv24_X29 } in let! { (# ipv28_X2l, ipv29_X2m #) ~ <- $wf_svLt 9# ipv26_X2f } in let! { UnsafeRefl v4_itGA ~ <- unsafeEqualityProof } in let! { UnsafeRefl v5_itGz ~ <- unsafeEqualityProof } in let! { __DEFAULT ~ wild19_itGD <- setByteArray# (ipv5_itD3 `cast` ) 0# 10# 0# (ipv28_X2l `cast` ) } in let! { UnsafeRefl v6_itGF ~ <- unsafeEqualityProof } in (# wild19_itGD, () #) `cast` } in ``` --- ...lly_statically_unroll_itraverseCounter_.md | 3 + .../Evaluation/Machine/Cek/StepCounter.hs | 64 ++++++++++++------- 2 files changed, 44 insertions(+), 23 deletions(-) create mode 100644 plutus-core/changelog.d/20241126_025636_effectfully_statically_unroll_itraverseCounter_.md diff --git a/plutus-core/changelog.d/20241126_025636_effectfully_statically_unroll_itraverseCounter_.md b/plutus-core/changelog.d/20241126_025636_effectfully_statically_unroll_itraverseCounter_.md new file mode 100644 index 00000000000..4e84188ab82 --- /dev/null +++ b/plutus-core/changelog.d/20241126_025636_effectfully_statically_unroll_itraverseCounter_.md @@ -0,0 +1,3 @@ +### Changed + +- In #5265 made `itraverseCounter_` faster increasing overall performance of the evaluator by 2.5%. diff --git a/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Evaluation/Machine/Cek/StepCounter.hs b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Evaluation/Machine/Cek/StepCounter.hs index 8d4419a26ce..10fa8460fe4 100644 --- a/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Evaluation/Machine/Cek/StepCounter.hs +++ b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Evaluation/Machine/Cek/StepCounter.hs @@ -1,16 +1,23 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE StandaloneKindSignatures #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UndecidableInstances #-} module UntypedPlutusCore.Evaluation.Machine.Cek.StepCounter where import Control.Monad.Primitive import Data.Coerce (coerce) +import Data.Kind import Data.Primitive qualified as P import Data.Proxy import Data.Word -import GHC.TypeLits (Nat) -import GHC.TypeNats (KnownNat, natVal) +import GHC.TypeNats (KnownNat, Nat, natVal, type (-)) -- See Note [Step counter data structure] -- You might think that since we can store whatever we like in here we might as well @@ -73,18 +80,39 @@ modifyCounter i f c = do pure modified {-# INLINE modifyCounter #-} +-- | The type of natural numbers in Peano form. +data Peano + = Z + | S Peano + +type NatToPeano :: Nat -> Peano +type family NatToPeano n where + NatToPeano 0 = 'Z + NatToPeano n = 'S (NatToPeano (n - 1)) + +type UpwardsM :: (Type -> Type) -> Peano -> Constraint +class Applicative f => UpwardsM f n where + -- | @upwardsM i k@ means @k i *> k (i + 1) *> ... *> k (i + n - 1)@. + -- We use this function in order to statically unroll a loop in 'itraverseCounter_' through + -- instance resolution. This makes @validation@ benchmarks a couple of percent faster. + upwardsM :: Int -> (Int -> f ()) -> f () + +instance Applicative f => UpwardsM f 'Z where + upwardsM _ _ = pure () + {-# INLINE upwardsM #-} + +instance UpwardsM f n => UpwardsM f ('S n) where + upwardsM !i k = k i *> upwardsM @f @n (i + 1) k + {-# INLINE upwardsM #-} + -- | Traverse the counters with an effectful function. itraverseCounter_ :: forall n m - . (KnownNat n, PrimMonad m) + . (UpwardsM m (NatToPeano n), PrimMonad m) => (Int -> Word8 -> m ()) -> StepCounter n (PrimState m) -> m () itraverseCounter_ f (StepCounter arr) = do - -- The implementation of this function is very performance-sensitive. I believe - -- it may be possible to do better, but it's time-consuming to experiment more - -- and unclear what improves things. - -- The safety of this operation is a little subtle. The frozen array is only -- safe to use if the underlying mutable array is not mutated 'afterwards'. -- In our case it likely _will_ be mutated afterwards... but not until we @@ -92,22 +120,12 @@ itraverseCounter_ f (StepCounter arr) = do -- the whole thing runs in 'm': future accesses to the mutable array can't -- happen until this whole function is finished. arr' <- P.unsafeFreezePrimArray arr - let - sz = fromIntegral $ natVal (Proxy @n) - go !i - | i < sz = do - f i (P.indexPrimArray arr' i) - go (i+1) - | otherwise = pure () - go 0 --- I also tried INLINABLE + SPECIALIZE, which just resulted in it getting inlined, so might --- as well just go straight for that + upwardsM @_ @(NatToPeano n) 0 $ \i -> f i $ P.indexPrimArray arr' i {-# INLINE itraverseCounter_ #-} - -- | Traverse the counters with an effectful function. iforCounter_ - :: (KnownNat n, PrimMonad m) + :: (UpwardsM m (NatToPeano n), PrimMonad m) => StepCounter n (PrimState m) -> (Int -> Word8 -> m ()) -> m ()