simplify agent proxy setup (#165)

tgolsson · web-flow · commit a8d1db1829a9 · 2023-08-30T23:05:43.000+02:00
This was getting a tiny bit complex and I really want to stop hardcodinginput/output names here. I've tried cloud training with MLP agents -- I'd assume the features+images is equally changed as we're just reusing more code.
diff --git a/emote/proxies.py b/emote/proxies.py
@@ -25,7 +25,11 @@ def policy(self) -> nn.Module:
         pass
 
     @property
-    def input_names(self) -> tuple[str]:
+    def input_names(self) -> tuple[str, ...]:
+        ...
+
+    @property
+    def output_names(self) -> tuple[str, ...]:
         ...
 
 
diff --git a/emote/sac.py b/emote/sac.py
@@ -14,6 +14,7 @@
 from emote.mixins.logging import LoggingMixin
 from emote.proxies import AgentProxy
 from emote.typing import AgentId, DictObservation, DictResponse, EpisodeState
+from emote.utils.deprecated import deprecated
 from emote.utils.gamma_matrix import discount, make_gamma_matrix, split_rollouts
 from emote.utils.spaces import MDPSpace
 
@@ -348,110 +349,45 @@ def __call__(self, *args, **kwargs):
     def input_names(self):
         return self._inner.input_names
 
-
-class FeatureAgentProxy:
-    """An agent proxy for basic MLPs.
-
-    This AgentProxy assumes that the observations will contain a single flat array of features.
-    """
-
-    def __init__(self, policy: nn.Module, device: torch.device, input_key: str = "obs"):
-        """Create a new proxy.
-
-        :param policy: The policy to execute for actions.
-        :param device: The device to run on.
-        :param input_key: The name of the features. (default: "obs")
-        """
-        self.policy = policy
-        self._end_states = [EpisodeState.TERMINAL, EpisodeState.INTERRUPTED]
-        self.device = device
-
-        self._input_key = input_key
-
-    def __call__(
-        self,
-        observations: Dict[AgentId, DictObservation],
-    ) -> Dict[AgentId, DictResponse]:
-        """Runs the policy and returns the actions."""
-        # The network takes observations of size batch x obs for each observation space.
-        assert len(observations) > 0, "Observations must not be empty."
-        active_agents = [
-            agent_id
-            for agent_id, obs in observations.items()
-            if obs.episode_state not in self._end_states
-        ]
-        tensor_obs = torch.tensor(
-            np.array(
-                [
-                    observations[agent_id].array_data[self._input_key]
-                    for agent_id in active_agents
-                ]
-            )
-        ).to(self.device)
-
-        actions = self.policy(tensor_obs)[0].detach().cpu().numpy()
-
-        return {
-            agent_id: DictResponse(list_data={"actions": actions[i]}, scalar_data={})
-            for i, agent_id in enumerate(active_agents)
-        }
-
     @property
-    def input_names(self):
-        return (self._input_key,)
-
-
-class VisionAgentProxy:
-    """This AgentProxy assumes that the observations will contain image observations 'obs'"""
+    def output_names(self):
+        return self._inner.output_names
 
-    def __init__(self, policy: nn.Module, device: torch.device):
-        self.policy = policy
-        self._end_states = [EpisodeState.TERMINAL, EpisodeState.INTERRUPTED]
-        self.device = device
+    @property
+    def policy(self):
+        return self._inner.policy
 
-    def __call__(
-        self, observations: Dict[AgentId, DictObservation]
-    ) -> Dict[AgentId, DictResponse]:
-        """Runs the policy and returns the actions."""
-        # The network takes observations of size batch x obs for each observation space.
-        assert len(observations) > 0, "Observations must not be empty."
-        active_agents = [
-            agent_id
-            for agent_id, obs in observations.items()
-            if obs.episode_state not in self._end_states
-        ]
-        np_obs = np.array(
-            [observations[agent_id].array_data["obs"] for agent_id in active_agents]
-        )
-        tensor_obs = torch.tensor(np_obs).to(self.device)
-        actions = self.policy(tensor_obs)[0].detach().cpu().numpy()
-        return {
-            agent_id: DictResponse(list_data={"actions": actions[i]}, scalar_data={})
-            for i, agent_id in enumerate(active_agents)
-        }
 
+class GenericAgentProxy(AgentProxy):
+    """Observations are dicts that contain multiple input and output keys.
 
-class MultiKeyAgentProxy:
-    """Observations are dicts that contain multiple input keys (e.g. both "features" and "images")"""
+    For example, we might have a policy that takes in both "obs" and
+    "goal" and outputs "actions". In order to be able to properly
+    invoke the network it is the responsibility of this proxy to
+    collate the inputs and decollate the outputs per agent.
+    """
 
     def __init__(
         self,
         policy: nn.Module,
         device: torch.device,
         input_keys: tuple,
-        spaces: MDPSpace = None,
+        output_keys: tuple,
+        spaces: MDPSpace | None = None,
     ):
         """Create a new proxy.
 
-        Args:
-            policy (nn.Module): The policy to execute for actions.
-            device (torch.device): The device to run on.
-            input_keys (tuple): The names of the input.
+        :param policy (nn.Module): The policy to invoke
+        :param device (torch.device): The device to run on
+        :param input_keys (tuple): The names of the inputs to the policy
+        :param output_keys (tuple): The names of the outputs of the policy
+        :param spaces (MDPSpace): The spaces of the inputs and outputs
         """
-        self.policy = policy
+        self._policy = policy
         self._end_states = [EpisodeState.TERMINAL, EpisodeState.INTERRUPTED]
         self.device = device
         self.input_keys = input_keys
+        self.output_keys = output_keys
         self._spaces = spaces
 
     def __call__(
@@ -467,7 +403,7 @@ def __call__(
             if obs.episode_state not in self._end_states
         ]
 
-        dict_tensor_obs = {}
+        tensor_obs_list = [None] * len(self.input_keys)
         for input_key in self.input_keys:
             np_obs = np.array(
                 [
@@ -482,15 +418,98 @@ def __call__(
                     np_obs = np.reshape(np_obs, shape)
 
             tensor_obs = torch.tensor(np_obs).to(self.device)
-            dict_tensor_obs[input_key] = tensor_obs
+            index = self.input_keys.index(input_key)
+            tensor_obs_list[index] = tensor_obs
 
-        actions = self.policy(**dict_tensor_obs)[0].detach().cpu().numpy()
+        outputs: tuple[any, ...] = self._policy(*tensor_obs_list)
+        # we remove element 1 as we don't need the logprobs here
+        outputs = outputs[0:1] + outputs[2:]
 
-        return {
-            agent_id: DictResponse(list_data={"actions": actions[i]}, scalar_data={})
-            for i, agent_id in enumerate(active_agents)
+        outputs = {
+            key: outputs[i].detach().cpu().numpy()
+            for i, key in enumerate(self.output_keys)
         }
 
+        agent_data = [
+            (agent_id, DictResponse(list_data={}, scalar_data={}))
+            for agent_id in active_agents
+        ]
+
+        for i, (_, response) in enumerate(agent_data):
+            for k, data in outputs.items():
+                response.list_data[k] = data[i]
+
+        return dict(agent_data)
+
     @property
     def input_names(self):
         return self.input_keys
+
+    @property
+    def output_names(self):
+        return self.output_keys
+
+    @property
+    def policy(self):
+        return self._policy
+
+
+class FeatureAgentProxy(GenericAgentProxy):
+    """An agent proxy for basic MLPs.
+
+    This AgentProxy assumes that the observations will contain a single flat array of features.
+    """
+
+    @deprecated(reason="Use GenericAgentProxy instead", version="23.1.0")
+    def __init__(self, policy: nn.Module, device: torch.device, input_key: str = "obs"):
+        """Create a new proxy.
+
+        :param policy: The policy to execute for actions.
+        :param device: The device to run on.
+        :param input_key: The name of the features. (default: "obs")
+        """
+
+        super().__init__(
+            policy=policy,
+            device=device,
+            input_keys=(input_key,),
+            output_keys=("actions",),
+        )
+
+
+class VisionAgentProxy(FeatureAgentProxy):
+    """This AgentProxy assumes that the observations will contain image observations 'obs'"""
+
+    @deprecated(reason="Use GenericAgentProxy instead", version="23.1.0")
+    def __init__(self, policy: nn.Module, device: torch.device):
+        super().__init__(policy=policy, device=device, input_key="obs")
+
+
+class MultiKeyAgentProxy(GenericAgentProxy):
+    """Handles multiple input keys.
+
+    Observations are dicts that contain multiple input keys (e.g. both "features" and "images").
+    """
+
+    @deprecated(reason="Use GenericAgentProxy instead", version="23.1.0")
+    def __init__(
+        self,
+        policy: nn.Module,
+        device: torch.device,
+        input_keys: tuple,
+        spaces: MDPSpace = None,
+    ):
+        """Create a new proxy.
+
+        Args:
+            policy (nn.Module): The policy to execute for actions.
+            device (torch.device): The device to run on.
+            input_keys (tuple): The names of the input.
+        """
+        super().__init__(
+            policy=policy,
+            device=device,
+            input_keys=input_keys,
+            output_keys=("actions",),
+            spaces=spaces,
+        )
diff --git a/emote/utils/deprecated.py b/emote/utils/deprecated.py
@@ -0,0 +1,54 @@
+"""
+
+"""
+
+import functools
+import warnings
+
+from typing import Callable
+
+
+def deprecated(
+    original_function: Callable = None,
+    *,
+    reason: str = None,
+    max_warn_count: int = 10,
+    version: str = None,
+) -> Callable:
+    """Function decorator to deprecate an annotated function. Can be used both as a
+    bare decorator, or with parameters to customize the display of the
+    message. Writes to logging.warn.
+
+    :param original_function: Function to decorate. Automatically passed.
+    :param message: Message to show. Function name is automatically added.
+    :param max_warn_count: How many times we will warn for the same function
+    :returns: the wrapped function
+    """
+    reason = f": {reason}" if reason else ""
+    version = f" -- deprecated since version {version}" if version else ""
+
+    def _decorate(function):
+        warn_count = 0
+
+        name = getattr(function, "__qualname__", function.__name__)
+        message = f"Call to deprecated function '{name}'{reason}{version}."
+
+        @functools.wraps(function)
+        def _wrapper(*args, **kwargs):
+            nonlocal warn_count
+            if warn_count < max_warn_count:
+                warnings.warn(
+                    message,
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
+                warn_count += 0
+
+            return function(*args, **kwargs)
+
+        return _wrapper
+
+    if original_function:
+        return _decorate(original_function)
+
+    return _decorate