add google footbal evaluation support

huangshiyu13 · web-flow · commit d4abc64d2ef4 · 2023-08-30T19:39:15.000+08:00
diff --git a/examples/gfootball/README.md b/examples/gfootball/README.md
@@ -0,0 +1,21 @@
+This is the guidance for [Google Research Football](https://github.com/google-research/football).
+
+### Installation
+
+- `pip install gfootball`
+- `pip install tizero`
+- test the installation by `python3 -m gfootball.play_game --action_set=full`.
+
+### Evaluate JiDi submissions locally
+
+If you want to evaluate your JiDi submissions locally, please try to use tizero as illustrated [here](https://github.com/OpenRL-Lab/TiZero#evaluate-jidi-submissions-locally).
+
+
+### Convert dump file to video
+
+After the installation, you can use tizero to convert a dump file to a video file.
+The usage is `tizero dump2video <dump_file> <output_dir> --episode_length <the length> --render_type <2d/3d>`.
+
+You can download an example dump file from [here](http://jidiai.cn/daily_6484285/daily_6484285.dump). 
+And then execute `tizero dump2video daily_6484285.dump ./` in your terminal. By default, the episode length is 3000 and the render type is 2d.
+Wait a minute, you will get a video file named `daily_6484285.avi` in your current directory.
diff --git a/examples/snake/README.md b/examples/snake/README.md
@@ -1,6 +1,12 @@
 
 This is the example for the snake game.
 
+## Installation
+
+```bash
+pip install "openrl[selfplay]"
+```
+
 ## Usage
 
 ```bash
diff --git a/openrl/algorithms/dqn.py b/openrl/algorithms/dqn.py
@@ -167,7 +167,9 @@ def prepare_loss(
         )
 
         q_targets = rewards_batch + self.gamma * max_next_q_values * next_masks_batch
-        q_loss = torch.mean(F.mse_loss(q_values, q_targets.detach()))  # 均方误差损失函数
+        q_loss = torch.mean(
+            F.mse_loss(q_values, q_targets.detach())
+        )  # 均方误差损失函数
 
         loss_list.append(q_loss)
 
diff --git a/openrl/algorithms/vdn.py b/openrl/algorithms/vdn.py
@@ -211,7 +211,9 @@ def prepare_loss(
         rewards_batch = rewards_batch.reshape(-1, self.n_agent, 1)
         rewards_batch = torch.sum(rewards_batch, dim=1, keepdim=True).view(-1, 1)
         q_targets = rewards_batch + self.gamma * max_next_q_values * next_masks_batch
-        q_loss = torch.mean(F.mse_loss(q_values, q_targets.detach()))  # 均方误差损失函数
+        q_loss = torch.mean(
+            F.mse_loss(q_values, q_targets.detach())
+        )  # 均方误差损失函数
 
         loss_list.append(q_loss)
         return loss_list
diff --git a/openrl/arena/__init__.py b/openrl/arena/__init__.py
@@ -17,6 +17,7 @@
 """"""
 from typing import Callable, Optional
 
+import openrl
 from openrl.arena.two_player_arena import TwoPlayerArena
 from openrl.envs import pettingzoo_all_envs
 
@@ -27,8 +28,12 @@ def make_arena(
     render: Optional[bool] = False,
     **kwargs,
 ):
+    print(openrl.envs.PettingZoo.registration.pettingzoo_env_dict.keys())
     if custom_build_env is None:
-        if env_id in pettingzoo_all_envs:
+        if (
+            env_id in pettingzoo_all_envs
+            or env_id in openrl.envs.PettingZoo.registration.pettingzoo_env_dict.keys()
+        ):
             from openrl.envs.PettingZoo import make_PettingZoo_env
 
             render_mode = None
diff --git a/openrl/arena/base_arena.py b/openrl/arena/base_arena.py
@@ -47,6 +47,7 @@ def reset(
         total_games: int,
         max_game_onetime: int = 5,
         seed: int = 0,
+        dispatch_func: Optional[Callable] = None,
     ):
         self.seed = seed
         if self.pbar:
@@ -57,6 +58,10 @@ def reset(
         self.max_game_onetime = max_game_onetime
         self.agents = agents
         assert isinstance(self.game, BaseGame)
+
+        if dispatch_func is not None:
+            self.dispatch_func = dispatch_func
+
         self.game.reset(seed=seed, dispatch_func=self.dispatch_func)
 
     def close(self):
diff --git a/openrl/envs/PettingZoo/__init__.py b/openrl/envs/PettingZoo/__init__.py
@@ -24,10 +24,10 @@
 
 
 def PettingZoo_make(id, render_mode, disable_env_checker, **kwargs):
+    kwargs.__setitem__("id", id)
     if id.startswith("snakes_"):
         from openrl.envs.snake.snake_pettingzoo import SnakeEatBeansAECEnv
 
-        kwargs.__setitem__("id", id)
         register(id, SnakeEatBeansAECEnv)
     if id in pettingzoo_env_dict.keys():
         env = pettingzoo_env_dict[id](render_mode=render_mode, **kwargs)
diff --git a/openrl/envs/snake/snake.py b/openrl/envs/snake/snake.py
@@ -677,7 +677,9 @@ class Snake:
     def __init__(self, player_id, board_width, board_height, init_len):
         self.actions = [-2, 2, -1, 1]
         self.actions_name = {-2: "up", 2: "down", -1: "left", 1: "right"}
-        self.direction = random.choice(self.actions)  # 方向[-2,2,-1,1]分别表示[上，下，左，右]
+        self.direction = random.choice(
+            self.actions
+        )  # 方向[-2,2,-1,1]分别表示[上，下，左，右]
         self.board_width = board_width
         self.board_height = board_height
         x = random.randrange(0, board_height)
diff --git a/openrl/envs/snake/snake_3v3.py b/openrl/envs/snake/snake_3v3.py
@@ -794,7 +794,9 @@ class Snake:
     def __init__(self, player_id, board_width, board_height, init_len):
         self.actions = [-2, 2, -1, 1]
         self.actions_name = {-2: "up", 2: "down", -1: "left", 1: "right"}
-        self.direction = random.choice(self.actions)  # 方向[-2,2,-1,1]分别表示[上，下，左，右]
+        self.direction = random.choice(
+            self.actions
+        )  # 方向[-2,2,-1,1]分别表示[上，下，左，右]
         self.board_width = board_width
         self.board_height = board_height
         x = random.randrange(0, board_height)
diff --git a/openrl/selfplay/opponents/utils.py b/openrl/selfplay/opponents/utils.py
@@ -101,8 +101,10 @@ def load_opponent_from_jidi_path(
     assert opponent_path.exists()
     try:
         sys.path.append(str(opponent_path.parent))
+
+        module_name = ".".join(opponent_path.parts)
         submission_module = __import__(
-            "{}.submission".format(opponent_path.name), fromlist=["submission"]
+            f"{module_name}.submission", fromlist=["submission"]
         )
         opponent_id = get_opponent_id(opponent_info)
         opponent = JiDiOpponent(
diff --git a/setup.py b/setup.py
@@ -65,7 +65,7 @@ def get_extra_requires() -> dict:
             "evaluate",
             "icetk",
         ],
-        "selfplay": ["ray[default]", "ray[serve]", "pettingzoo[classic]"],
+        "selfplay": ["ray[default]", "ray[serve]", "pettingzoo[classic]", "trueskill"],
         "retro": ["gym-retro"],
         "super_mario": ["gym-super-mario-bros"],
     }

Original file line number	Diff line number	Diff line change
`@@ -167,7 +167,9 @@ def prepare_loss(`
`167`	`167`	`)`
`168`	`168`
`169`	`169`	`q_targets = rewards_batch + self.gamma * max_next_q_values * next_masks_batch`
`170`		`- q_loss = torch.mean(F.mse_loss(q_values, q_targets.detach())) # 均方误差损失函数`
	`170`	`+ q_loss = torch.mean(`
	`171`	`+ F.mse_loss(q_values, q_targets.detach())`
	`172`	`+ ) # 均方误差损失函数`
`171`	`173`
`172`	`174`	`loss_list.append(q_loss)`
`173`	`175`