Rework tabular approximator

jeremiahpslewis · web-flow · commit 28d3ce55e110 · 2024-03-11T17:41:29.000+01:00
diff --git a/src/ReinforcementLearningCore/src/policies/learners/abstract_learner.jl b/src/ReinforcementLearningCore/src/policies/learners/abstract_learner.jl
@@ -14,27 +14,6 @@ end
 
 function RLBase.optimise!(::AbstractLearner, ::AbstractStage, ::Trajectory) end
 
-
-"""
-    Approximator(model, optimiser)
-
-Wraps a Flux trainable model and implements the `RLBase.optimise!(::Approximator, ::Gradient)` 
-interface. See the RLCore documentation for more information on proper usage.
-"""
-struct Approximator{M,O} <: AbstractLearner
-    model::M
-    optimiser_state::O
-end
-
-function Approximator(; model, optimiser)
-    optimiser_state = Flux.setup(optimiser, model)
-    Approximator(gpu(model), gpu(optimiser_state)) # Pass model to GPU (if available) upon creation
-end
-
-Base.show(io::IO, m::MIME"text/plain", A::Approximator) = show(io, m, convert(AnnotatedStructTree, A))
-
-@functor Approximator (model,)
-
 function RLBase.plan!(explorer::AbstractExplorer, learner::AbstractLearner, env::AbstractEnv)
     legal_action_space_ = RLBase.legal_action_space_mask(env)
     RLBase.plan!(explorer, forward(learner, env), legal_action_space_)
@@ -44,8 +23,3 @@ function RLBase.plan!(explorer::AbstractExplorer, learner::AbstractLearner, env:
     legal_action_space_ = RLBase.legal_action_space_mask(env, player)
     return RLBase.plan!(explorer, forward(learner, env), legal_action_space_)
 end
-
-forward(A::Approximator, args...; kwargs...) = A.model(args...; kwargs...)
-
-RLBase.optimise!(A::Approximator, grad) =
-    Flux.Optimise.update!(A.optimiser_state, A.model, grad)
diff --git a/src/ReinforcementLearningCore/src/policies/learners/approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/approximator.jl
@@ -0,0 +1,28 @@
+"""
+    Approximator(model, optimiser)
+
+Wraps a Flux trainable model and implements the `RLBase.optimise!(::Approximator, ::Gradient)` 
+interface. See the RLCore documentation for more information on proper usage.
+"""
+struct Approximator{M,O} <: AbstractLearner
+    model::M
+    optimiser_state::O
+end
+
+function Approximator(; model, optimiser, gpu=false)
+    optimiser_state = Flux.setup(optimiser, model)
+    if gpu  # Pass model to GPU (if available) upon creation
+        return Approximator(gpu(model), gpu(optimiser_state))
+    else
+        return Approximator(model, optimiser_state)
+    end
+end
+
+Base.show(io::IO, m::MIME"text/plain", A::Approximator) = show(io, m, convert(AnnotatedStructTree, A))
+
+@functor Approximator (model,)
+
+forward(A::Approximator, args...; kwargs...) = A.model(args...; kwargs...)
+
+RLBase.optimise!(A::Approximator, grad) =
+    Flux.Optimise.update!(A.optimiser_state, A.model, grad)
diff --git a/src/ReinforcementLearningCore/src/policies/learners/learners.jl b/src/ReinforcementLearningCore/src/policies/learners/learners.jl
@@ -1,3 +1,4 @@
 include("abstract_learner.jl")
+include("approximator.jl")
 include("tabular_approximator.jl")
 include("target_network.jl")
diff --git a/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl
@@ -1,6 +1,8 @@
 export TabularApproximator, TabularVApproximator, TabularQApproximator
 
-using Flux: gpu
+const TabularApproximator = Approximator{A,O} where {A<:AbstractArray,O}
+const TabularQApproximator = Approximator{A,O} where {A<:AbstractArray,O}
+const TabularVApproximator = Approximator{A,O} where {A<:AbstractVector,O}
 
 """
     TabularApproximator(table<:AbstractArray, opt)
@@ -11,15 +13,10 @@ For `table` of 2-d, it will serve as a state-action value approximator.
 !!! warning
     For `table` of 2-d, the first dimension is action and the second dimension is state.
 """
-# TODO: add back missing AbstractApproximator
-struct TabularApproximator{N,A,O} <: AbstractLearner
-    table::A
-    optimizer::O
-    function TabularApproximator(table::A, opt::O) where {A<:AbstractArray,O}
-        n = ndims(table)
-        n <= 2 || throw(ArgumentError("the dimension of table must be <= 2"))
-        new{n,A,O}(table, opt)
-    end
+function TabularApproximator(table::A, opt::O) where {A<:AbstractArray,O}
+    n = ndims(table)
+    n <= 2 || throw(ArgumentError("the dimension of table must be <= 2"))
+    TabularApproximator{A,O}(table, opt)
 end
 
 TabularVApproximator(; n_state, init = 0.0, opt = InvDecay(1.0)) =
@@ -29,21 +26,22 @@ TabularQApproximator(; n_state, n_action, init = 0.0, opt = InvDecay(1.0)) =
     TabularApproximator(fill(init, n_action, n_state), opt)
 
 # Take Learner and Environment, get state, send to RLCore.forward(Learner, State)
-function forward(L::TabularApproximator, env::E) where {E <: AbstractEnv}
+function forward(L::Approximator{A, Any}, env::E) where {A <:AbstractArray, E <: AbstractEnv}
     env |> state |> (x -> forward(L, x))
 end
 
 RLCore.forward(
-    app::TabularApproximator{1,R,O},
+    app::Approximator{R,O},
     s::I,
-) where {R<:AbstractArray,O,I<:Integer} = @views app.table[s]
+) where {R<:AbstractVector,O} = @views app.model[s]
 
 RLCore.forward(
-    app::TabularApproximator{2,R,O},
+    app::Approximator{R,O},
     s::I,
-) where {R<:AbstractArray,O,I<:Integer} = @views app.table[:, s]
+) where {R<:AbstractArray,O} = @views app.model[:, s]
+
 RLCore.forward(
-    app::TabularApproximator{2,R,O},
+    app::Approximator{R,O},
     s::I1,
     a::I2,
-) where {R<:AbstractArray,O,I1<:Integer,I2<:Integer} = @views app.table[a, s]
+) where {R<:AbstractArray,O} = @views app.model[a, s]