Spaces:
Running
Running
import gym | |
from huggingface_sb3 import load_from_hub | |
from stable_baselines3 import PPO | |
from stable_baselines3.common.evaluation import evaluate_policy | |
# Retrieve the model from the hub | |
## repo_id = id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name}) | |
## filename = name of the model zip file from the repository | |
checkpoint = load_from_hub(repo_id="ThomasSimonini/ppo-LunarLander-v2", filename="ppo-LunarLander-v2.zip") | |
model = PPO.load(checkpoint) | |
# Evaluate the agent | |
eval_env = gym.make('LunarLander-v2') | |
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True) | |
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}") | |
# Watch the agent play | |
obs = eval_env.reset() | |
for i in range(1000): | |
action, _state = model.predict(obs) | |
obs, reward, done, info = eval_env.step(action) | |
eval_env.render() | |
if done: | |
obs = eval_env.reset() | |
eval_env.close() |