diff --git a/Project.toml b/Project.toml
index 19888a8..bff6669 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,7 +23,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 LuxNeuralOperatorsAMDGPUExt = "AMDGPU"
 
 [compat]
-AMDGPU = "0.9.5"
+AMDGPU = "0.8.4, 0.9"
 Aqua = "0.8.7"
 ArgCheck = "2.3.0"
 ChainRulesCore = "1.24.0"
diff --git a/ext/LuxNeuralOperatorsAMDGPUExt.jl b/ext/LuxNeuralOperatorsAMDGPUExt.jl
index c06a647..14a5217 100644
--- a/ext/LuxNeuralOperatorsAMDGPUExt.jl
+++ b/ext/LuxNeuralOperatorsAMDGPUExt.jl
@@ -11,4 +11,4 @@ using LuxNeuralOperators: LuxNeuralOperators
     return stack(*, eachslice(x; dims=3), eachslice(y; dims=3))
 end
 
-end
\ No newline at end of file
+end
diff --git a/src/LuxNeuralOperators.jl b/src/LuxNeuralOperators.jl
index d0e80fc..198c8b8 100644
--- a/src/LuxNeuralOperators.jl
+++ b/src/LuxNeuralOperators.jl
@@ -18,9 +18,6 @@ const CRC = ChainRulesCore
 
 @reexport using Lux
 
-const True = Val(true)
-const False = Val(false)
-
 include("utils.jl")
 include("transform.jl")
 
diff --git a/src/fno.jl b/src/fno.jl
index 8300b31..d6e8f31 100644
--- a/src/fno.jl
+++ b/src/fno.jl
@@ -49,7 +49,7 @@ FourierNeuralOperator(
 """
 function FourierNeuralOperator(
         σ=gelu; chs::Dims{C}=(2, 64, 64, 64, 64, 64, 128, 1), modes::Dims{M}=(16,),
-        permuted::Val{perm}=False, kwargs...) where {C, M, perm}
+        permuted::Val{perm}=Val(false), kwargs...) where {C, M, perm}
     @argcheck length(chs) ≥ 5
 
     map₁ = chs[1] => chs[2]
diff --git a/src/functional.jl b/src/functional.jl
index f895ed7..f1d729f 100644
--- a/src/functional.jl
+++ b/src/functional.jl
@@ -11,8 +11,8 @@ end
     x_size = size(x_tr)
     x_flat = reshape(x_tr, :, x_size[N - 1], x_size[N])
 
-    x_flat_t = permutedims(x_flat, (2, 3, 1))                # i x b x m
-    x_weighted = permutedims(weights ⊠ x_flat_t, (3, 1, 2))  # m x o x b
+    x_flat_t = permutedims(x_flat, (2, 3, 1))                              # i x b x m
+    x_weighted = permutedims(__batched_mul(weights, x_flat_t), (3, 1, 2))  # m x o x b
 
     return reshape(x_weighted, x_size[1:(N - 2)]..., size(x_weighted)[2:3]...)
 end
diff --git a/src/layers.jl b/src/layers.jl
index 8650b58..1bac045 100644
--- a/src/layers.jl
+++ b/src/layers.jl
@@ -1,14 +1,14 @@
 """
-    OperatorConv(ch::Pair{<:Integer, <:Integer}, modes::NTuple{N, <:Integer}, ::Type{TR};
-        init_weight = glorot_uniform, T::Type{TP} = ComplexF32,
-        permuted::Val{P} = Val(false)) where {N, TR <: AbstractTransform, TP, P}
+    OperatorConv(ch::Pair{<:Integer, <:Integer}, modes::NTuple{N, <:Integer},
+        ::Type{TR}; init_weight=glorot_uniform,
+        permuted::Val{perm}=Val(false)) where {N, TR <: AbstractTransform, perm}
 
 ## Arguments
 
   - `ch`: A `Pair` of input and output channel size `ch_in => ch_out`, e.g. `64 => 64`.
   - `modes`: The modes to be preserved. A tuple of length `d`, where `d` is the dimension of
     data.
-  - `::Type{TR}`: The traform to operate the transformation.
+  - `::Type{TR}`: The transform to operate the transformation.
 
 ## Keyword Arguments
 
@@ -16,19 +16,18 @@
   - `permuted`: Whether the dim is permuted. If `permuted = Val(false)`, the layer accepts
     data in the order of `(ch, x_1, ... , x_d, batch)`. Otherwise the order is
     `(x_1, ... , x_d, ch, batch)`.
-  - `T`: Datatype of parameters.
 
 ## Example
 
 ```jldoctest
-julia> OperatorConv(2 => 5, (16,), FourierTransform)
+julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32})
 OperatorConv{FourierTransform}(2 => 5, (16,); permuted = false)()  # 160 parameters
 
-julia> OperatorConv(2 => 5, (16,), FourierTransform; permuted=Val(true))
+julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32}; permuted=Val(true))
 OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)()  # 160 parameters
 ```
 """
-@concrete struct OperatorConv{elType, perm, T <: AbstractTransform} <: AbstractExplicitLayer
+@concrete struct OperatorConv{perm, T <: AbstractTransform} <: AbstractExplicitLayer
     in_chs::Int
     out_chs::Int
     prod_modes::Int
@@ -39,12 +38,12 @@ OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)()  # 160 paramete
     name::String
 end
 
-function LuxCore.initialparameters(
-        rng::AbstractRNG, layer::OperatorConv{elType}) where {elType}
+function LuxCore.initialparameters(rng::AbstractRNG, layer::OperatorConv)
     in_chs, out_chs = layer.in_chs, layer.out_chs
-    scale = real(one(elType)) / (in_chs * out_chs)
-    weights = scale * layer.init_weight(rng, elType, out_chs, in_chs, layer.prod_modes)
-    return (; weights,)
+    scale = real(one(eltype(layer.tform))) / (in_chs * out_chs)
+    return (;
+        weights=scale * layer.init_weight(
+            rng, eltype(layer.tform), out_chs, in_chs, layer.prod_modes))
 end
 
 @inline function LuxCore.parameterlength(layer::OperatorConv)
@@ -52,17 +51,17 @@ end
 end
 
 function OperatorConv(ch::Pair{<:Integer, <:Integer}, modes::NTuple{N, <:Integer},
-        ::Type{TR}; init_weight=glorot_uniform, T::Type{TP}=ComplexF32,
-        permuted::Val{perm}=Val(false)) where {N, TR <: AbstractTransform, TP, perm}
+        ::Type{TR}; init_weight=glorot_uniform,
+        permuted::Val{perm}=Val(false)) where {N, TR <: AbstractTransform{<:Number}, perm}
     name = "OperatorConv{$(string(nameof(TR)))}($(ch[1]) => $(ch[2]), $modes; permuted = $perm)"
-    return OperatorConv{TP, perm}(ch..., prod(modes), TR(modes), init_weight, name)
+    return OperatorConv{perm}(ch..., prod(modes), TR(modes), init_weight, name)
 end
 
-function (conv::OperatorConv{T, true})(x::AbstractArray{<:Real, M}, ps, st) where {T, M}
+function (conv::OperatorConv{true})(x::AbstractArray, ps, st)
     return operator_conv(x, conv.tform, ps.weights), st
 end
 
-function (conv::OperatorConv{T, false})(x::AbstractArray{<:Real, M}, ps, st) where {T, M}
+function (conv::OperatorConv{false})(x::AbstractArray, ps, st)
     N = ndims(conv.tform)
     xᵀ = permutedims(x, (ntuple(i -> i + 1, N)..., 1, N + 2))
     yᵀ = operator_conv(xᵀ, conv.tform, ps.weights)
@@ -73,7 +72,7 @@ end
 """
     SpectralConv(args...; kwargs...)
 
-Construct a `OperatorConv` with `FourierTransform` as the transform. See
+Construct a `OperatorConv` with `FourierTransform{ComplexF32}` as the transform. See
 [`OperatorConv`](@ref) for the individual arguments.
 
 ## Example
@@ -86,7 +85,8 @@ julia> SpectralConv(2 => 5, (16,); permuted=Val(true))
 OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)()  # 160 parameters
 ```
 """
-SpectralConv(args...; kwargs...) = OperatorConv(args..., FourierTransform; kwargs...)
+SpectralConv(args...; kwargs...) = OperatorConv(
+    args..., FourierTransform{ComplexF32}; kwargs...)
 
 """
     OperatorKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N}, transform::Type{TR},
@@ -106,14 +106,13 @@ SpectralConv(args...; kwargs...) = OperatorConv(args..., FourierTransform; kwarg
   - `permuted`: Whether the dim is permuted. If `permuted = Val(true)`, the layer accepts
     data in the order of `(ch, x_1, ... , x_d , batch)`. Otherwise the order is
     `(x_1, ... , x_d, ch, batch)`.
-  - `T`: Datatype of parameters.
 
 All the keyword arguments are passed to the [`OperatorConv`](@ref) constructor.
 
 ## Example
 
 ```jldoctest
-julia> OperatorKernel(2 => 5, (16,), FourierTransform)
+julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64})
 @compact(
     l₁ = Dense(2 => 5),                 # 15 parameters
     l₂ = OperatorConv{FourierTransform}(2 => 5, (16,); permuted = false)(),  # 160 parameters
@@ -125,7 +124,7 @@ julia> OperatorKernel(2 => 5, (16,), FourierTransform)
 end       # Total: 175 parameters,
           #        plus 1 states.
 
-julia> OperatorKernel(2 => 5, (16,), FourierTransform; permuted=Val(true))
+julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64}; permuted=Val(true))
 @compact(
     l₁ = Conv((1,), 2 => 5),            # 15 parameters
     l₂ = OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)(),  # 160 parameters
@@ -140,7 +139,7 @@ end       # Total: 175 parameters,
 """
 function OperatorKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N}, transform::Type{TR},
         act::A=identity; allow_fast_activation::Bool=false, permuted::Val{perm}=Val(false),
-        kwargs...) where {N, TR <: AbstractTransform, perm, A}
+        kwargs...) where {N, TR <: AbstractTransform{<:Number}, perm, A}
     act = allow_fast_activation ? NNlib.fast_act(act) : act
     l₁ = perm ? Conv(map(_ -> 1, modes), ch) : Dense(ch)
     l₂ = OperatorConv(ch, modes, transform; permuted, kwargs...)
@@ -155,7 +154,7 @@ end
 """
     SpectralKernel(args...; kwargs...)
 
-Construct a `OperatorKernel` with `FourierTransform` as the transform. See
+Construct a `OperatorKernel` with `FourierTransform{ComplexF32}` as the transform. See
 [`OperatorKernel`](@ref) for the individual arguments.
 
 ## Example
@@ -188,5 +187,5 @@ end       # Total: 175 parameters,
 """
 function SpectralKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N},
         act::A=identity; kwargs...) where {N, A}
-    return OperatorKernel(ch, modes, FourierTransform, act; kwargs...)
+    return OperatorKernel(ch, modes, FourierTransform{ComplexF32}, act; kwargs...)
 end
diff --git a/src/transform.jl b/src/transform.jl
index 23d9cbb..31f3dc6 100644
--- a/src/transform.jl
+++ b/src/transform.jl
@@ -10,15 +10,16 @@
   - `inverse(<:AbstractTransform, x_transformed::AbstractArray)`: Apply the inverse
     transform to `x_transformed`
 """
-abstract type AbstractTransform end
+abstract type AbstractTransform{T} end
+
+@inline Base.eltype(::Type{<:AbstractTransform{T}}) where {T} = T
 
 # Fourier Transform
-@concrete struct FourierTransform <: AbstractTransform
+@concrete struct FourierTransform{T} <: AbstractTransform{T}
     modes
 end
 
-Base.ndims(T::FourierTransform) = length(T.modes)
-Base.eltype(::Type{FourierTransform}) = ComplexF32
+@inline Base.ndims(T::FourierTransform) = length(T.modes)
 
 @inline transform(ft::FourierTransform, x::AbstractArray) = rfft(x, 1:ndims(ft))
 
@@ -28,7 +29,7 @@ end
 
 @inline truncate_modes(ft::FourierTransform, x_fft::AbstractArray) = low_pass(ft, x_fft)
 
-function inverse(
+@inline function inverse(
         ft::FourierTransform, x_fft::AbstractArray{T, N}, M::NTuple{N, Int64}) where {T, N}
     return real(irfft(x_fft, first(M), 1:ndims(ft)))
 end
diff --git a/test/fno_tests.jl b/test/fno_tests.jl
index a122fa6..cc3fcef 100644
--- a/test/fno_tests.jl
+++ b/test/fno_tests.jl
@@ -22,8 +22,11 @@
             @test size(first(fno(x, ps, st))) == setup.y_size
 
             data = [(x, y)]
-            l2, l1 = train!(fno, ps, st, data; epochs=10)
-            @test l2 < l1
+            broken = mode == "AMDGPU"
+            @test begin
+                l2, l1 = train!(fno, ps, st, data; epochs=10)
+                l2 < l1
+            end broken=broken
         end
     end
 end
diff --git a/test/layers_tests.jl b/test/layers_tests.jl
index 7c65ff3..2624d38 100644
--- a/test/layers_tests.jl
+++ b/test/layers_tests.jl
@@ -30,8 +30,11 @@
             @jet m(x, ps, st)
 
             data = [(x, aType(rand(rng, Float32, setup.y_size...)))]
-            l2, l1 = train!(m, ps, st, data; epochs=10)
-            @test l2 < l1
+            broken = mode == "AMDGPU"
+            @test begin
+                l2, l1 = train!(m, ps, st, data; epochs=10)
+                l2 < l1
+            end broken=broken
         end
     end
 end