cherry-pick changes to reward-test-check (#166)

tgolsson · web-flow · commit 7b513f13fc06 · 2023-09-01T10:22:20.000+02:00
Picking some changes from #150 to work towards landing that ~soon.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Now targetting torch version 1.12, up from 1.11.
 - `OnnxExporter` accepts a `device` argument to enable tracing on other devices.
+- `FinalRewardTestCheck` can now be configured with another key and to use windowed data.
 
 ### Deprecations
 
diff --git a/emote/callbacks/testing.py b/emote/callbacks/testing.py
@@ -33,13 +33,23 @@ def __init__(
         callback: LoggingMixin,
         cutoff: float,
         test_length: int,
+        key: str = "training/scaled_reward",
+        use_windowed: bool = False,
     ):
         super().__init__(cycle=test_length)
         self._cb = callback
         self._cutoff = cutoff
+        self._key = key
+        self._use_windowed = use_windowed
 
     def end_cycle(self):
-        reward = self._cb.scalar_logs["training/scaled_reward"]
+        if self._use_windowed:
+            data = self._cb.windowed_scalar[self._key]
+            reward = sum(data) / len(data)
+        else:
+            reward = self._cb.scalar_logs[self._key]
+
         if reward < self._cutoff:
             raise Exception(f"Reward too low: {reward}")
+
         raise TrainingShutdownException()