Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

grg commited on Nov 9, 2023

Commit

be5548b

0 Parent(s):

Cleaned old git history

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +23 -0
.gitignore +28 -0
LICENSE.txt +8 -0
README-rsrc/doorkey.png +0 -0
README-rsrc/evaluate-terminal-logs.png +0 -0
README-rsrc/model.png +0 -0
README-rsrc/model.xml +1 -0
README-rsrc/train-tensorboard.png +0 -0
README-rsrc/train-terminal-logs.png +0 -0
README.md +164 -0
README_old.md +215 -0
autocrop.sh +14 -0
campain_continuer.py +282 -0
campain_launcher.py +488 -0
data_analysis.ipynb +0 -0
data_analysis.py +1650 -0
data_analysis_neurips.py +570 -0
data_visualize.py +1436 -0
display_LLM_evaluations.py +45 -0
draw_tree.py +104 -0
draw_trees.sh +19 -0
dummy_run.sh +109 -0
eval_LLMs.sh +42 -0
gpuh.py +99 -0
gym-minigrid/.gitignore +9 -0
gym-minigrid/.travis.yml +10 -0
gym-minigrid/LICENSE +201 -0
gym-minigrid/README.md +511 -0
gym-minigrid/benchmark.py +53 -0
gym-minigrid/gym_minigrid/__init__.py +6 -0
gym-minigrid/gym_minigrid/backup_envs/bobo.py +301 -0
gym-minigrid/gym_minigrid/backup_envs/cointhief.py +431 -0
gym-minigrid/gym_minigrid/backup_envs/dancewithonenpc.py +344 -0
gym-minigrid/gym_minigrid/backup_envs/diverseexit.py +584 -0
gym-minigrid/gym_minigrid/backup_envs/exiter.py +347 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoorpolite.py +292 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoorsesame.py +165 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoortalk.py +189 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhard.py +199 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardnpc.py +283 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardsesame.py +204 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardsesamnpc.py +294 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardsesamnpcguides.py +384 -0
gym-minigrid/gym_minigrid/backup_envs/gotodoorwizard.py +209 -0
gym-minigrid/gym_minigrid/backup_envs/guidethief.py +416 -0
gym-minigrid/gym_minigrid/backup_envs/helper.py +295 -0
gym-minigrid/gym_minigrid/backup_envs/showme.py +525 -0
gym-minigrid/gym_minigrid/backup_envs/socialenv.py +194 -0
gym-minigrid/gym_minigrid/backup_envs/spying.py +429 -0
gym-minigrid/gym_minigrid/backup_envs/talkitout.py +385 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,23 @@

+storage/*
+__pycache__/*
+campain_logs/*
+llm_log/*
+*egg-info
+.vscode
+*.idea*
+retrieve_plafrim_data.sh
+sync_plafrim.sh
+retrieve_remy.sh
+sync_remy.sh
+*.gif
+viz/*
+graphics/*
+retrieve_graphics.sh
+retrieve_grg.sh
+run_seeds.sh
+sync_grg.sh
+get_node.sh
+llm_log/
+.git*
+.cache*
+storage_old_2021.tar.gz

.gitignore ADDED Viewed

	@@ -0,0 +1,28 @@

+*__pycache__
+storage/*
+graphics/*
+storage_old_2021.tar.gz
+*egg-info
+.vscode
+*.idea*
+retrieve_plafrim_data.sh
+sync_plafrim.sh
+retrieve_remy.sh
+sync_remy.sh
+*.gif
+viz/*
+retrieve_graphics.sh
+retrieve_grg.sh
+run_seeds.sh
+sync_grg.sh
+get_node.sh
+llm_log/
+.cache/
+.ipynb_checkpoints/*
+campain_logs
+llm_data/backup
+saved_logs_LLMs/*
+plots/*
+retrieve_viz_and_graphics.sh
+retrieve_llm_log.sh
+gym-minigrid/figures/*

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+The MIT License (MIT)
+Copyright © 2021 Flowers Team
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README-rsrc/doorkey.png ADDED Viewed

README-rsrc/evaluate-terminal-logs.png ADDED Viewed

README-rsrc/model.png ADDED Viewed

README-rsrc/model.xml ADDED Viewed

	@@ -0,0 +1 @@

+ <mxfile userAgent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/9.3.1 Chrome/66.0.3359.181 Electron/3.0.6 Safari/537.36" version="9.3.4" editor="www.draw.io" type="device"><diagram id="81a450cc-4610-c693-ab1c-18701f2b71dc" name="Page-1">7Vtbj5s4FP41SLsPW8UYSHhMssnsQytVnV3t9tEBh9ACjohz66+vCXYwNpkyEweS0Y40Gvv4fs53bsZjwWl6eMrRevWJhDix7EF4sOCflm0De2izPwXlWFJcb1ASojwOeaeK8Bz/wJwoum3jEG9qHSkhCY3XdWJAsgwHtEZDeU729W5LktRXXaOIrzioCM8BSrDW7d84pKuSOnKl3n/hOFqJlcGAtyxQ8D3KyTbj61k2XJ5+yuYUibl4/80KhWQvkeDMgtOcEFqW0sMUJwVvBdvKcfMLred95zijbQaIERt6FGfHIWMFr5KcrkhEMpTMKurkdD5czDBgtRVNE1YErIgPMf2vIH9wee0r78S2kx+lpqL6lY/6hik9cgygLSWMVK37kZA176efTWyebPOA754Dj6I8wmfplLTiYNI4zpAnTFLMdsM65DhBNN7VMYA4lKJzv4qdrMA5eoG75RQ7lGz5pFOS7RhF53qdp/tVTPHzGp1OtWc6VufzRU7scE7x4cVD8lZbIJZrKBSI3Fd4B4K2krAOB9fzxdH4oqMwC8eFIrNakKDNJg4UqL0AqIvskVFhtwSFxA+3gR2C1ho7fIXPJGabq8ThKeIYKWwuMc5HyQqtTjT8xUQlD7SJTiI7H7uVFF1NinFa2FXbSxhzJpvtghWjokgFjU0pkQV1kVcUBQYMzbQu+A3NyXc8JQnJGSUjWWGRlnGSKCSUxFFWoIeBADP6pNCNmJn4MW9I4zA8mbMmZaurowl9cxS5+Lq+OQ34sg2om/Cqj2DkZUky5xkiPFoGmthZy9ifOrNJW7fg6QbA78sreJrezNIFDsM4i65zCwZ51x7YfwAF2KClI3EMIHuom6CMnfYdmqAlyagkQW84nvhzMxKE/Vmm0W0DAYMKIVsO765CB9dQ5KA4KMe9WeDgdJ902PfmkHwdVmBo2iOdhjLtQUepw7qQ4KY1noA3eBE2aiKh9GeFcgNvBYuvmYinL/+8Az955nMXfrLJrt5rBNhKfUCDWQZtUzrjEZ3YjZ7o63zvO9E/J4idJPqPm3ksRwEOGq3CHPhT32+N1KYbKac3pF68koL3h9Qur6SAaxSpZzzKaCzB2RaPAu1AxvpphlshFXaB1OYoAqhRh+PXp7gQxb4hoBDHlFRg9ZqE8RHSw9nYm3ieGbX0FckMXU0rb5Ue2jdxH7pCtVXJb9t0LRaPTlJ4SdFvpaaOrqZ2R2oKlWtq4Cpqai5JhH7nocPw7mKHBlGbzxJbu0j9s83H578/XRc3GOTc2yMN0DYmtv3r2WhfD2zdDt1ZdjZsMFFtb82M4xZe70UekuFub4ZCv5ZnSUaAKM7Y73X2woD2A+UTKWibZxjRfj3P6gCMv7oo7/kytDHzgH3Bt+F5woUEYQL+TxIuBIYNWcL5kZTpNAFen7vfu4EXxlzWENjbfWfD04/5NkmOvZt29eOF16Flh977R2HDMwrY3+s6/SHFfaDQ7xGE+pcIazq3im/C3IGtUaY+fpA913RmTUashtKCL9lis5Y6Nr+dKKjSrL+x5T9YQ7bVAXnNvVox4BUXcb/fqZM1EaCq6anfoS+1NQDtOsTOjRHzII99THhCNSBrwBAwgyFWrR6slzd91X8FwNlP</diagram></mxfile>

README-rsrc/train-tensorboard.png ADDED Viewed

README-rsrc/train-terminal-logs.png ADDED Viewed

README.md ADDED Viewed

	@@ -0,0 +1,164 @@

+---
+title: SocialAI School Demo
+emoji: 🧙🏻‍♂️
+colorFrom: gray
+colorTo: indigo
+sdk: docker
+app_port: 7860
+---
+# SocialAI
+[comment]: <> (This repository is the official implementation of [My Paper Title]&#40;https://arxiv.org/abs/2030.12345&#41;. )
+[comment]: <> (TODO: add arxiv link later)
+This repository is the official implementation of SocialAI: Benchmarking Socio-Cognitive Abilities inDeep Reinforcement Learning Agents.
+The website of the project is [here](https://sites.google.com/view/socialai)
+The code is based on:
+[minigrid](https://github.com/maximecb/gym-minigrid)
+Additional repositories used:
+[BabyAI](https://github.com/mila-iqia/babyai)
+[RIDE](https://github.com/facebookresearch/impact-driven-exploration)
+[astar](https://github.com/jrialland/python-astar)
+## Installation
+[comment]: <> (Clone the repo)
+[comment]: <> (```)
+[comment]: <> (git clone https://gitlab.inria.fr/gkovac/act-and-speak.git)
+[comment]: <> (```)
+Create and activate your conda env
+```
+conda create --name social_ai python=3.7
+conda activate social_ai
+conda install -c anaconda graphviz
+```
+Install the required packages
+```
+pip install -r requirements.txt
+pip install -e torch-ac
+pip install -e gym-minigrid
+conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia
+```
+## Interactive policy
+To run an enviroment in the interactive mode run:
+```
+python -m scripts.manual_control.py
+```
+You can test different enviroments with the ```--env``` parameter.
+# RL experiments
+## Training
+### Minimal example
+To train a policy, run:
+```train
+python -m scripts.train --model test_model_name --seed 1  --compact-save --algo ppo --env SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --dialogue --save-interval 1 --log-interval 1 --frames 5000000 --multi-modal-babyai11-agent --arch original_endpool_res --custom-ppo-2
+`````
+The policy should be above 0.95 success rate after the first 2M environment interactions.
+### Recreating all the experiments
+See ```run_SAI_final_case_studies.txt``` for the experiments in the paper.
+#### Regular machine
+To run the experiments on a regular machine `run_SAI_final_case_studies.txt` contains all the bash commands running the RL experiments.
+#### Slurm based cluster (todo:)
+To recreate all the experiments from the paper on a slurm based server configure the `campaign_launcher.py` script and run:
+```
+python campaign_launcher.py run_NeurIPS.txt
+```
+[//]: # (The list of all the experiments and their parameters can be seen in run_NeurIPS.txt)
+[//]: # ()
+[//]: # (For example the bash equivalent of the following configuration:)
+[//]: # (```)
+[//]: # (--slurm_conf jz_long_2gpus_32g --nb_seeds 16 --model NeurIPS_Help_NoSocial_NO_BONUS_ABL  --compact-save --algo ppo --*env MiniGrid-AblationExiter-8x8-v0 --*env_args hidden_npc True --dialogue --save-interval 10 --frames 5000000 --*multi-modal-babyai11-agent --*arch original_endpool_res --*custom-ppo-2)
+[//]: # (```)
+[//]: # (is:)
+[//]: # (```)
+[//]: # (for SEED in {1..16})
+[//]: # (do)
+[//]: # (    python -m scripts.train --model NeurIPS_Help_NoSocial_NO_BONUS_ABL  --compact-save --algo ppo --*env MiniGrid-AblationExiter-8x8-v0 --*env_args hidden_npc True --dialogue --save-interval 10 --frames 5000000 --*multi-modal-babyai11-agent --*arch original_endpool_res --*custom-ppo-2 --seed $SEED & )
+[//]: # (done)
+[//]: # (```)
+## Evaluation
+To evaluate a policy, run:
+```eval
+python -m scripts.evaluate_new --episodes 500  --test-set-seed 1  --model-label test_model --eval-env SocialAI-TestLanguageFeedbackSwitchesInformationSeekingParamEnv-v1  --model-to-evaluate storage/test/ --n-seeds 8
+````
+To visualize a policy, run:
+```
+python -m scripts.visualize --model storage/test_model_name/1/ --pause 0.1 --seed $RANDOM --episodes 20 --gif viz/test
+```
+# LLM experiments
+For LLMs set your ```OPENAI_API_KEY``` (and ```HF_TOKEN```) variable in ```~/.bashrc``` or wherever you want.
+### Creating in-context examples
+To create in_context examples you can use the ```create_LLM_examples.py``` script.
+This script will open an interactive window, where you can manually control the agent.
+By default, nothing is saved.
+The general procedure is to press 'enter' to skip over environments which you don't like.
+When you see a wanted enviroment, move the agent in the wanted position and start recording (press 'r'). The current and the following steps in the episode will be recorded.
+Then control the agent and finish the episode. The new episode will start and recording will be turned off again.
+If you already like some of the previously collected examples and want to append to them you can use the ```--load``` argument.
+### Evaluating LLM-based agents
+The script ```eval_LLMs.sh``` contains the bash commands to run all the experiments in the paper.
+Here is an example of running evaluation on the ```text-ada-001``` model on the AsocialBox environment:
+```
+python -m scripts.LLM_test  --episodes 10 --max-steps 15 --model text-ada-001 --env-args size 7 --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+```
+If you want to control the agent yourself you can set the model to ```interactive```.
+```dummy``` agent just executes the move forward action, and ```random``` executes a random action. These agent are usefull for testing.

README_old.md ADDED Viewed

	@@ -0,0 +1,215 @@

+# Embodied acting and speaking
+This code was based on these repositories:
+[`gym-minigrid`](https://github.com/maximecb/gym-minigrid)
+[`torch-ac`](https://github.com/lcswillems/torch-ac)
+[`rl-starter-files`](add_url)
+## Features
+- **Script to train**, including:
+  - Log in txt, CSV and Tensorboard
+  - Save model
+  - Stop and restart training
+  - Use A2C or PPO algorithms
+- **Script to visualize**, including:
+  - Act by sampling or argmax
+  - Save as Gif
+- **Script to evaluate**, including:
+  - Act by sampling or argmax
+  - List the worst performed episodes
+## Installation
+### Option 1
+[comment]: <> todo: add this part
+[comment]: <> (Clone the repo)
+[comment]: <> (```)
+[comment]: <> (git clone https://gitlab.inria.fr/gkovac/act-and-speak.git)
+[comment]: <> (```)
+Create and activate your conda env
+```
+conda create --name act_and_speak python=3.6
+conda activate act_and_speak
+```
+Install the required packages
+```
+pip install -r requirements.txt
+pip install -e torch-ac
+pip install -e gym-minigrid --use-feature=2020-resolver
+```
+### Option 2
+Alternative use the conda yaml file:
+```
+TODO:
+```
+## Example of use
+Train, visualize and evaluate an agent on the `MiniGrid-DoorKey-5x5-v0` environment:
+<p align="center"><img src="README-rsrc/doorkey.png"></p>
+1. Train the agent on the `MiniGrid-DoorKey-5x5-v0` environment with PPO algorithm:
+```
+python3 -m scripts.train --algo ppo --env MiniGrid-DoorKey-5x5-v0 --model DoorKey --save-interval 10 --frames 80000
+```
+<p align="center"><img src="README-rsrc/train-terminal-logs.png"></p>
+2. Visualize agent's behavior:
+```
+python3 -m scripts.visualize --env MiniGrid-DoorKey-5x5-v0 --model DoorKey
+```
+<p align="center"><img src="README-rsrc/visualize-doorkey.gif"></p>
+3. Evaluate agent's performance:
+```
+python3 -m scripts.evaluate --env MiniGrid-DoorKey-5x5-v0 --model DoorKey
+```
+<p align="center"><img src="README-rsrc/evaluate-terminal-logs.png"></p>
+**Note:** More details on the commands are given below.
+## Other examples
+### Handle textual instructions
+In the `GoToDoor` environment, the agent receives an image along with a textual instruction. To handle the latter, add `--text` to the command:
+```
+python3 -m scripts.train --algo ppo --env MiniGrid-GoToDoor-5x5-v0 --model GoToDoor --text --save-interval 10 --frames 1000000
+```
+<p align="center"><img src="README-rsrc/visualize-gotodoor.gif"></p>
+### Handle dialogue with multi a multi headed agent
+In the `GoToDoorTalk` environment, the agent receives an image along with the dialogue. To handle the latter, add `--dialogue`  and, to use the multi headed agent, add `--multi-headed-agent` to the command:
+```
+python3 -m scripts.train --algo ppo --env MiniGrid-GoToDoorTalk-5x5-v0 --model GoToDoorMultiHead --dialogue --multi-headed-agent --save-interval 10 --frames 1000000
+```
+### Add memory
+In the `RedBlueDoors` environment, the agent has to open the red door then the blue one. To solve it efficiently, when it opens the red door, it has to remember it. To add memory to the agent, add `--recurrence X` to the command:
+```
+python3 -m scripts.train --algo ppo --env MiniGrid-RedBlueDoors-6x6-v0 --model RedBlueDoors --recurrence 4 --save-interval 10 --frames 1000000
+```
+<p align="center"><img src="README-rsrc/visualize-redbluedoors.gif"></p>
+## Files
+This package contains:
+- scripts to:
+  - train an agent \
+  in `script/train.py` ([more details](#scripts-train))
+  - visualize agent's behavior \
+  in `script/visualize.py` ([more details](#scripts-visualize))
+  - evaluate agent's performances \
+  in `script/evaluate.py` ([more details](#scripts-evaluate))
+- a default agent's model \
+in `model.py` ([more details](#model))
+- utilitarian classes and functions used by the scripts \
+in `utils`
+These files are suited for [`gym-minigrid`](https://github.com/maximecb/gym-minigrid) environments and [`torch-ac`](https://github.com/lcswillems/torch-ac) RL algorithms. They are easy to adapt to other environments and RL algorithms by modifying:
+- `model.py`
+- `utils/format.py`
+<h2 id="scripts-train">scripts/train.py</h2>
+An example of use:
+```bash
+python3 -m scripts.train --algo ppo --env MiniGrid-DoorKey-5x5-v0 --model DoorKey --save-interval 10 --frames 80000
+```
+The script loads the model in `storage/DoorKey` or creates it if it doesn't exist, then trains it with the PPO algorithm on the MiniGrid DoorKey environment, and saves it every 10 updates in `storage/DoorKey`. It stops after 80 000 frames.
+**Note:** You can define a different storage location in the environment variable `PROJECT_STORAGE`.
+More generally, the script has 2 required arguments:
+- `--algo ALGO`: name of the RL algorithm used to train
+- `--env ENV`: name of the environment to train on
+and a bunch of optional arguments among which:
+- `--recurrence N`: gradient will be backpropagated over N timesteps. By default, N = 1. If N > 1, a LSTM is added to the model to have memory.
+- `--text`: a GRU is added to the model to handle text input.
+- ... (see more using `--help`)
+During training, logs are printed in your terminal (and saved in text and CSV format):
+<p align="center"><img src="README-rsrc/train-terminal-logs.png"></p>
+**Note:** `U` gives the update number, `F` the total number of frames, `FPS` the number of frames per second, `D` the total duration, `rR:μσmM` the mean, std, min and max reshaped return per episode, `F:μσmM` the mean, std, min and max number of frames per episode, `H` the entropy, `V` the value, `pL` the policy loss, `vL` the value loss and `∇` the gradient norm.
+During training, logs are also plotted in Tensorboard:
+<p><img src="README-rsrc/train-tensorboard.png"></p>
+<h2 id="scripts-visualize">scripts/visualize.py</h2>
+An example of use:
+```
+python3 -m scripts.visualize --env MiniGrid-DoorKey-5x5-v0 --model DoorKey
+```
+<p align="center"><img src="README-rsrc/visualize-doorkey.gif"></p>
+In this use case, the script displays how the model in `storage/DoorKey` behaves on the MiniGrid DoorKey environment.
+More generally, the script has 2 required arguments:
+- `--env ENV`: name of the environment to act on.
+- `--model MODEL`: name of the trained model.
+and a bunch of optional arguments among which:
+- `--argmax`: select the action with highest probability
+- ... (see more using `--help`)
+<h2 id="scripts-evaluate">scripts/evaluate.py</h2>
+An example of use:
+```
+python3 -m scripts.evaluate --env MiniGrid-DoorKey-5x5-v0 --model DoorKey
+```
+<p align="center"><img src="README-rsrc/evaluate-terminal-logs.png"></p>
+In this use case, the script prints in the terminal the performance among 100 episodes of the model in `storage/DoorKey`.
+More generally, the script has 2 required arguments:
+- `--env ENV`: name of the environment to act on.
+- `--model MODEL`: name of the trained model.
+and a bunch of optional arguments among which:
+- `--episodes N`: number of episodes of evaluation. By default, N = 100.
+- ... (see more using `--help`)
+<h2 id="model">model.py</h2>
+The default model is discribed by the following schema:
+<p align="center"><img src="README-rsrc/model.png"></p>
+By default, the memory part (in red) and the langage part (in blue) are disabled. They can be enabled by setting to `True` the `use_memory` and `use_text` parameters of the model constructor.
+This model can be easily adapted to your needs.

autocrop.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+#!/bin/bash
+# Loop through all files in the specified directory
+for file in "$@"
+do
+  # Check if the file is an image
+  if [[ $file == *.jpg || $file == *.png ]]
+  then
+    # Crop the image using the `convert` command from the ImageMagick suite
+    echo "Cropping $file"
+    convert $file -trim +repage $file
+  fi
+done

campain_continuer.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import sys
+from pathlib import Path
+from datetime import date
+import subprocess
+import shutil
+import os
+import stat
+import getpass
+import re
+import glob
+def process_arg_string(expe_args):  # function to extract flagged (with a *) arguments as details for experience name
+    details_string = ''
+    processed_arg_string = expe_args.replace('*', '')  # keep a version of args cleaned from exp name related flags
+    # args = [arg_chunk.split(' -') for arg_chunk in expe_args.split(' --')]
+    arg_chunks = [arg_chunk for arg_chunk in expe_args.split(' --')]
+    args_list = []
+    for arg in arg_chunks:
+        if ' -' in arg and arg.split(' -')[1].isalpha():
+            args_list.extend(arg.split(' -'))
+        else:
+            args_list.append(arg)
+    # args_list = [item for sublist in args for item in sublist]  # flatten
+    for arg in args_list:
+        if arg == '':
+            continue
+        if arg[0] == '*':
+            if arg[-1] == ' ':
+                arg = arg[:-1]
+            details_string += '_' + arg[1:].replace(' ', '_').replace('/', '-')
+    return details_string, processed_arg_string
+slurm_confs = {'curta_extra_long': "#SBATCH -p inria\n"
+                                   "#SBATCH -t 119:00:00\n",
+               'curta_long': "#SBATCH -p inria\n"
+                             "#SBATCH -t 72:00:00\n",
+               'curta_medium': "#SBATCH -p inria\n"
+                               "#SBATCH -t 48:00:00\n",
+               'curta_short': "#SBATCH -p inria\n"
+                              "#SBATCH -t 24:00:00\n",
+               'jz_super_short_gpu':
+                                '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:1\n'
+                                "#SBATCH -t 9:59:00\n"
+                                "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_gpu': '#SBATCH -A imi@v100\n'
+                               '#SBATCH --gres=gpu:1\n'
+                               "#SBATCH -t 19:59:00\n"
+                               "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_gpu_chained': '#SBATCH -A imi@v100\n'
+                               '#SBATCH --gres=gpu:1\n'
+                               "#SBATCH -t 19:59:00\n"
+                               "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_2gpus_chained': '#SBATCH -A imi@v100\n'
+                                       '#SBATCH --gres=gpu:2\n'
+                                       "#SBATCH -t 19:59:00\n"
+                                       "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_medium_gpu': '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:1\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_super_short_2gpus': '#SBATCH -A imi@v100\n'
+                                 '#SBATCH --gres=gpu:2\n'
+                                 "#SBATCH -t 14:59:00\n"
+                                 "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_2gpus': '#SBATCH -A imi@v100\n'
+                               '#SBATCH --gres=gpu:2\n'
+                               "#SBATCH -t 19:59:00\n"
+                               "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_2gpus_32g': '#SBATCH -A imi@v100\n'
+                                 '#SBATCH -C v100-32g\n'
+                                 '#SBATCH --gres=gpu:2\n'
+                                 "#SBATCH -t 19:59:00\n"
+                                 "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_medium_2gpus': '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:2\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_medium_2gpus_32g': '#SBATCH -A imi@v100\n'
+                                '#SBATCH -C v100-32g\n'
+                                '#SBATCH --gres=gpu:2\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_long_gpu': '#SBATCH -A imi@v100\n'
+                              '#SBATCH --gres=gpu:1\n'
+                              "#SBATCH -t 72:00:00\n"
+                              "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_long_2gpus': '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:2\n'
+                                '#SBATCH -t 72:00:00\n'
+                                '#SBATCH --qos=qos_gpu-t4\n',
+               'jz_long_2gpus_32g': '#SBATCH -A imi@v100\n'
+                              '#SBATCH -C v100-32g\n'
+                              '#SBATCH --gres=gpu:2\n'
+                              "#SBATCH -t 72:00:00\n"
+                              "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_super_long_2gpus_32g': '#SBATCH -A imi@v100\n'
+                                    '#SBATCH -C v100-32g\n'
+                                    '#SBATCH --gres=gpu:2\n'
+                                    "#SBATCH -t 99:00:00\n"
+                                    "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_short_cpu': '#SBATCH -A imi@cpu\n'
+                                "#SBATCH -t 19:59:00\n"
+                                "#SBATCH --qos=qos_cpu-t3\n",
+               'jz_medium_cpu': '#SBATCH -A imi@cpu\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_cpu-t4\n",
+               'jz_long_cpu': '#SBATCH -A imi@cpu\n'
+                               "#SBATCH -t 72:00:00\n"
+                               "#SBATCH --qos=qos_cpu-t4\n",
+               'plafrim_cpu_medium': "#SBATCH -t 48:00:00\n",
+               'plafrim_cpu_long': "#SBATCH -t 72:00:00\n",
+               'plafrim_gpu_medium': '#SBATCH -p long_sirocco\n'
+                                     "#SBATCH -t 48:00:00\n"
+                                     '#SBATCH --gres=gpu:1\n'
+               }
+cur_path = str(Path.cwd())
+date = date.today().strftime("%d-%m")
+# create campain log dir if not already done
+Path(cur_path + "/campain_logs/jobouts/").mkdir(parents=True, exist_ok=True)
+Path(cur_path + "/campain_logs/scripts/").mkdir(parents=True, exist_ok=True)
+# Load txt file containing experiments to run (give it as argument to this script)
+filename = 'to_run.txt'
+if len(sys.argv) >= 2:
+    filename = sys.argv[1]
+launch = True
+# Save a copy of txt file
+shutil.copyfile(cur_path + "/" + filename, cur_path + '/campain_logs/scripts/' + date + '_' + filename)
+# one_launch_per_n_seeds = 8
+one_launch_per_n_seeds = 4
+global_seed_offset = 0
+incremental = False
+if len(sys.argv) >= 3:
+    if sys.argv[2] == 'nolaunch':
+        launch = False
+    if sys.argv[2] == 'seed_offset':
+        global_seed_offset = int(sys.argv[3])
+    if sys.argv[2] == 'incremental_seed_offset':
+        global_seed_offset = int(sys.argv[3])
+        incremental = True
+if launch:
+    print('Creating and Launching slurm scripts given arguments from {}'.format(filename))
+    # time.sleep(1.0)
+expe_list = []
+with open(filename, 'r') as f:
+    expe_list = [line.rstrip() for line in f]
+exp_names = set()
+for expe_args in expe_list:
+    seed_offset_to_use = global_seed_offset
+    if len(expe_args) == 0:
+        # empty line
+        continue
+    if expe_args[0] == '#':
+        # comment line
+        continue
+    exp_config = expe_args.split('--')[1:5]
+    if not [arg.split(' ')[0] for arg in exp_config] == ['slurm_conf', 'nb_seeds', 'frames', 'model']:
+        raise ValueError("Arguments must be in the following order {}".format(
+            ['slurm_conf', 'nb_seeds', 'frames', 'model']))
+    slurm_conf_name, nb_seeds, frames, exp_name = [arg.split(' ')[1] for arg in exp_config]
+    user = getpass.getuser()
+    if 'curta' in slurm_conf_name:
+        gpu = ''
+        PYTHON_INTERP = "$HOME/anaconda3/envs/act_and_speak/bin/python"
+        n_cpus = 1
+    elif 'plafrim' in slurm_conf_name:
+        gpu = ''
+        PYTHON_INTERP = '/home/{}/USER/conda/envs/act_and_speak/bin/python'.format(user)
+        n_cpus = 1
+    elif 'jz' in slurm_conf_name:
+        if user == "utu57ed":
+            PYTHON_INTERP='/gpfsscratch/rech/imi/{}/miniconda3/envs/social_ai/bin/python'.format(user)
+        elif user == "uxo14qj":
+            PYTHON_INTERP='/gpfswork/rech/imi/{}/miniconda3/envs/act_and_speak/bin/python'.format(user)
+        else:
+            if user != "flowers":
+                raise ValueError("Who are you? User {} unknown.".format(user))
+        gpu = ''  # '--gpu_id 0'
+        n_cpus = 2
+        n_cpus = 4
+        assert n_cpus*one_launch_per_n_seeds == 16  # cpus_per_task is 8 will result in 16 cpus
+    else:
+        raise Exception("Unrecognized conf name: {} ".format(slurm_conf_name))
+    # assert ((int(nb_seeds) % 8) == 0), 'number of seeds should be divisible by 8'
+    assert ((int(nb_seeds) % 4) == 0), 'number of seeds should be divisible by 8'
+    run_args = expe_args.split(exp_name, 1)[
+        1]  # WARNING: assumes that exp_name comes after slurm_conf and nb_seeds and frames in txt
+    # prepare experiment name formatting (use --* or -* instead of -- or - to use argument in experiment name
+    # print(expe_args.split(exp_name))
+    exp_details, run_args = process_arg_string(run_args)
+    exp_name = date + '_' + exp_name + exp_details
+    # no two trains are to be put in the same dir
+    assert exp_names not in exp_names
+    exp_names.add(exp_name)
+    slurm_script_fullname = cur_path + "/campain_logs/scripts/{}".format(exp_name) + ".sh"
+    # create corresponding slurm script
+    # calculate how many chained jobs we need
+    chained_training = "chained" in slurm_conf_name
+    frames = int(frames)
+    if chained_training:
+        # assume 10M frames per 20h (fps 140 - very conservative)
+        timelimit = slurm_confs[slurm_conf_name].split("-t ")[-1].split("\n")[0]
+        assert timelimit == '19:59:00'
+        one_script_frames = 10000000
+        print(f"One script frames: {one_script_frames}")
+        num_chained_jobs = frames // one_script_frames + bool(frames % one_script_frames)
+    else:
+        one_script_frames = frames
+        num_chained_jobs = 1  # no chaining
+    assert "--frames " not in run_args
+    current_script_frames = min(one_script_frames, frames)
+    # launch scripts (1 launch per 4 seeds)
+    if launch:
+        for i in range(int(nb_seeds) // one_launch_per_n_seeds):
+            # continue jobs
+            cont_job_i = num_chained_jobs  # last job
+            exp_name_no_date = exp_name[5:]
+            continue_slurm_script_fullname = cur_path + "/campain_logs/scripts/*{}_continue_{}".format(exp_name_no_date, "*")
+            matched_scripts = glob.glob(continue_slurm_script_fullname)
+            matched_scripts.sort(key=os.path.getctime)
+            for last_script in reversed(matched_scripts):
+                # start from the latest written script and start the first encountered that has a err file (that was ran)
+                p = re.compile("continue_(.*).sh")
+                last_job_id = int(p.search(last_script).group(1))
+                last_script_name = os.path.basename(last_script)[:-3].replace("_continue_", "_cont_")
+                if len(glob.glob(cur_path + "/campain_logs/jobouts/"+last_script_name+"*.sh.err")) == 1:
+                    # error file found -> script was ran -> this is the script that crashed
+                    break
+            print(f"Continuing job id: {last_job_id}")
+            # last_err_log = glob.glob(cur_path + "/campain_logs/jobouts/"+last_script_name+"*.sh.err")[0]
+            #
+            # print("Then ended with:\n")
+            # print('"""\n')
+            # for l in open(last_err_log).readlines():
+            #     print("\t"+l, end='')
+            # print('"""\n')
+            # write continue script
+            cont_script_name = "{}_continue_{}.sh".format(exp_name, last_job_id)
+            continue_slurm_script_fullname = cur_path + "/campain_logs/scripts/"+cont_script_name
+            current_script_frames = min(one_script_frames*(2+cont_job_i), frames)
+            # run continue job
+            sbatch_pipe = subprocess.Popen(
+                ['sbatch', 'campain_logs/scripts/{}'.format(os.path.basename(last_script)), str((i * one_launch_per_n_seeds) + seed_offset_to_use)],  # 0 4 8 12
+                stdout=subprocess.PIPE
+            )
+    if incremental:
+        global_seed_offset += int(nb_seeds)

campain_launcher.py ADDED Viewed

	@@ -0,0 +1,488 @@

+import sys
+import time
+from pathlib import Path
+from datetime import date
+import subprocess
+import shutil
+import os
+import stat
+import getpass
+def get_sec(time_str):
+    """Get seconds from time."""
+    h, m, s = time_str.split(':')
+    return int(h) * 3600 + int(m) * 60 + int(s)
+def write_script(script_fullname, exp_name, PYTHON_INTERP, n_cpu_cores, slurm_conf_name, run_args, script_frames,
+                 is_continue=False, dependecy_jobid=None):
+    print('creating slurm script with: --model {} {} --frames {} {}'.format(exp_name, run_args, script_frames, "--continue-train auto" if is_continue else ""))
+    logfile_name = "{}{}_jid_%A".format(exp_name, "_cont_"+dependecy_jobid if is_continue else "")
+    with open(script_fullname, 'w') as f:
+        f.write('#!/bin/sh\n')
+        if is_continue:
+            f.write('#SBATCH --dependency=afterok:{}\n'.format(dependecy_jobid))
+            f.write('#SBATCH --kill-on-invalid-dep=yes\n')
+        f.write('#SBATCH --ntasks=1\n')
+        f.write('#SBATCH --cpus-per-task={}\n'.format((n_cpu_cores * n_seeds_per_one_launch)//2))  # cpus asked = num_cores // 2
+        if "jz" in slurm_conf_name:
+            f.write('#SBATCH --hint=nomultithread\n')
+        f.write(slurm_confs[slurm_conf_name])
+        f.write('#SBATCH --open-mode=append\n')  # append logs in logs files instead of truncating
+        f.write('#SBATCH -o campain_logs/jobouts/{}.sh.out\n'
+                '#SBATCH -e campain_logs/jobouts/{}.sh.err\n'.format(logfile_name, logfile_name))
+        f.write("export EXP_INTERP='{}' ;\n".format(PYTHON_INTERP))
+        f.write('# Launch !\n')
+        f.write(
+            'cpu_list=$(taskset -pc $$ | sed -E "s/(.*): (.*)/\\2/g" | tr "," "\\n" | sed -E "s/^[0-9]*$/&-&/g" | sed -E "s/-/ /g" | xargs -l seq | tr "\\n" " ")\n')
+        f.write('echo "cpu list: $cpu_list"\n')
+        f.write('COUNT=${1:-0}\n')
+        f.write('i=0\n')
+        f.write('cpus=""\n')
+        f.write('for cpu in $cpu_list; do\n')
+        f.write('cpus="$cpus$cpu"\n')
+        f.write('i=$(($i+1))\n')
+        f.write('if [ "$i" = "{}" ]; then\n'.format(n_cpu_cores))
+        if "2gpus" in slurm_conf_name:
+            f.write(
+                "{}".format('CUDA_VISIBLE_DEVICES=$(( $COUNT % 2 )); ') +
+                'taskset -c $cpus $EXP_INTERP -m scripts.train --model {}/$COUNT --seed $COUNT'.format(exp_name) +
+                run_args + " --frames {}".format(script_frames) + "{}".format(" --continue-train auto" if is_continue else "") + ' &\n')
+        elif "4gpus" in slurm_conf_name:
+            f.write(
+                "{}".format('CUDA_VISIBLE_DEVICES=$(( $COUNT % 4 )); ') +
+                'taskset -c $cpus $EXP_INTERP -m scripts.train --model {}/$COUNT --seed $COUNT'.format(exp_name) +
+                run_args + " --frames {}".format(script_frames) + "{}".format(" --continue-train auto" if is_continue else "") + ' &\n')
+        else:
+            f.write(
+                # "{}".format('CUDA_VISIBLE_DEVICES=$(( $COUNT % 2 )); ' if "2gpus" in slurm_conf_name else "") +
+                'taskset -c $cpus $EXP_INTERP -m scripts.train --model {}/$COUNT --seed $COUNT'.format(exp_name) +
+                run_args + " --frames {}".format(script_frames) + "{}".format(" --continue-train auto" if is_continue else "") + ' &\n')
+        f.write('echo "Using cpus $cpus for seed $COUNT"\n')
+        f.write('COUNT=$(( $COUNT + 1 ))\n')
+        f.write('cpus=""\n')
+        f.write('i=0\n')
+        f.write('else\n')
+        f.write('cpus="$cpus,"\n')
+        f.write('fi\n')
+        f.write('done\n')
+        f.write('wait\n')
+        f.close()
+    st = os.stat(script_fullname)
+    os.chmod(script_fullname, st.st_mode | stat.S_IEXEC)
+def write_script_one_seed(script_fullname, exp_name, PYTHON_INTERP, n_cpu_cores, slurm_conf_name, run_args, script_frames,
+                 is_continue=False, dependecy_jobid=None):
+    n_cpus = n_cpu_cores//2
+    assert n_seeds_per_one_launch == 1, "Use write_script_old"
+    print('creating slurm script with: --model {} {} --frames {} {}'.format(exp_name, run_args, script_frames, "--continue-train auto" if is_continue else ""))
+    logfile_name = "{}{}_jid_%A".format(exp_name, "_cont_"+dependecy_jobid if is_continue else "")
+    with open(script_fullname, 'w') as f:
+        f.write('#!/bin/sh\n')
+        if is_continue:
+            f.write('#SBATCH --dependency=afterok:{}\n'.format(dependecy_jobid))
+            f.write('#SBATCH --kill-on-invalid-dep=yes\n')
+        f.write('#SBATCH --ntasks=1\n')
+        f.write('#SBATCH --cpus-per-task={}\n'.format((n_cpus)))
+        if "jz" in slurm_conf_name:
+            f.write('#SBATCH --hint=nomultithread\n')
+        f.write(slurm_confs[slurm_conf_name])
+        f.write('#SBATCH --open-mode=append\n')  # append logs in logs files instead of truncating
+        f.write('#SBATCH -o campain_logs/jobouts/{}.sh.out\n'
+                '#SBATCH -e campain_logs/jobouts/{}.sh.err\n'.format(logfile_name, logfile_name))
+        f.write("export EXP_INTERP='{}' ;\n".format(PYTHON_INTERP))
+        f.write('SEED=${1:-0}\n')
+        f.write('# Launch !\n')
+        f.write(
+            '$EXP_INTERP -m scripts.train --model {}/$SEED --seed $SEED'.format(exp_name) +
+            run_args + " --frames {}".format(script_frames) + "{}".format(" --continue-train auto" if is_continue else ""))
+        f.close()
+    st = os.stat(script_fullname)
+    os.chmod(script_fullname, st.st_mode | stat.S_IEXEC)
+def process_arg_string(expe_args):  # function to extract flagged (with a *) arguments as details for experience name
+    details_string = ''
+    processed_arg_string = expe_args.replace('*', '')  # keep a version of args cleaned from exp name related flags
+    # args = [arg_chunk.split(' -') for arg_chunk in expe_args.split(' --')]
+    arg_chunks = [arg_chunk for arg_chunk in expe_args.split(' --')]
+    args_list = []
+    for arg in arg_chunks:
+        if ' -' in arg and arg.split(' -')[1].isalpha():
+            args_list.extend(arg.split(' -'))
+        else:
+            args_list.append(arg)
+    # args_list = [item for sublist in args for item in sublist]  # flatten
+    for arg in args_list:
+        if arg == '':
+            continue
+        if arg[0] == '*':
+            if arg[-1] == ' ':
+                arg = arg[:-1]
+            details_string += '_' + arg[1:].replace(' ', '_').replace('/', '-')
+    return details_string, processed_arg_string
+slurm_confs = {'curta_extra_long': "#SBATCH -p inria\n"
+                                   "#SBATCH -t 119:00:00\n",
+               'curta_long': "#SBATCH -p inria\n"
+                             "#SBATCH -t 72:00:00\n",
+               'curta_medium': "#SBATCH -p inria\n"
+                               "#SBATCH -t 48:00:00\n",
+               'curta_short': "#SBATCH -p inria\n"
+                              "#SBATCH -t 24:00:00\n",
+               'jz_super_short_gpu':
+                                '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:1\n'
+                                "#SBATCH -t 3:59:00\n"
+                                "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_gpu': '#SBATCH -A imi@v100\n'
+                               '#SBATCH --gres=gpu:1\n'
+                               "#SBATCH -t 19:59:00\n"
+                               "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_super_short_gpu_chained':
+                               '#SBATCH -A imi@v100\n'
+                               '#SBATCH --gres=gpu:1\n'
+                               "#SBATCH -t 3:59:00\n"
+                               "#SBATCH -C v100\n"
+                               "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_gpu_chained': '#SBATCH -A imi@v100\n'
+                                         '#SBATCH --gres=gpu:1\n'
+                                         "#SBATCH -t 19:59:00\n"
+                                         "#SBATCH -C v100\n"
+                                         "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_gpu_chained_a100_4h': '#SBATCH -A imi@a100\n'
+                                            '#SBATCH --gres=gpu:1\n'
+                                            "#SBATCH -t 3:59:00\n"
+                                            "#SBATCH -C a100\n"
+                                            "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_gpu_chained_a100': '#SBATCH -A imi@a100\n'
+                                       '#SBATCH --gres=gpu:1\n'
+                                       "#SBATCH -t 19:59:00\n"
+                                       "#SBATCH -C a100\n"
+                                       "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_2gpus_chained': '#SBATCH -A imi@v100\n'
+                                        '#SBATCH --gres=gpu:2\n'
+                                        "#SBATCH -t 19:59:00\n"
+                                        "#SBATCH -C v100\n"
+                                       "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_4gpus_chained': '#SBATCH -A imi@v100\n'
+                                         '#SBATCH --gres=gpu:4\n'
+                                         "#SBATCH -t 19:59:00\n"
+                                         "#SBATCH -C v100\n"
+                                         "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_medium_gpu': '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:1\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_super_short_2gpus': '#SBATCH -A imi@v100\n'
+                                 '#SBATCH --gres=gpu:2\n'
+                                 "#SBATCH -t 14:59:00\n"
+                                 "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_2gpus': '#SBATCH -A imi@v100\n'
+                               '#SBATCH --gres=gpu:2\n'
+                               "#SBATCH -t 19:59:00\n"
+                               "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_short_2gpus_32g': '#SBATCH -A imi@v100\n'
+                                 '#SBATCH -C v100-32g\n'
+                                 '#SBATCH --gres=gpu:2\n'
+                                 "#SBATCH -t 19:59:00\n"
+                                 "#SBATCH --qos=qos_gpu-t3\n",
+               'jz_medium_2gpus': '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:2\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_medium_2gpus_32g': '#SBATCH -A imi@v100\n'
+                                '#SBATCH -C v100-32g\n'
+                                '#SBATCH --gres=gpu:2\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_long_gpu': '#SBATCH -A imi@v100\n'
+                              '#SBATCH --gres=gpu:1\n'
+                              "#SBATCH -t 72:00:00\n"
+                              "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_long_2gpus': '#SBATCH -A imi@v100\n'
+                                '#SBATCH --gres=gpu:2\n'
+                                '#SBATCH -t 72:00:00\n'
+                                '#SBATCH --qos=qos_gpu-t4\n',
+               'jz_long_2gpus_32g': '#SBATCH -A imi@v100\n'
+                              '#SBATCH -C v100-32g\n'
+                              '#SBATCH --gres=gpu:2\n'
+                              "#SBATCH -t 72:00:00\n"
+                              "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_super_long_2gpus_32g': '#SBATCH -A imi@v100\n'
+                                    '#SBATCH -C v100-32g\n'
+                                    '#SBATCH --gres=gpu:2\n'
+                                    "#SBATCH -t 99:00:00\n"
+                                    "#SBATCH --qos=qos_gpu-t4\n",
+               'jz_short_cpu_chained': '#SBATCH -A imi@cpu\n'
+                                       "#SBATCH -t 19:59:00\n"
+                                       "#SBATCH --qos=qos_cpu-t3\n",
+               'jz_short_cpu': '#SBATCH -A imi@cpu\n'
+                                "#SBATCH -t 19:59:00\n"
+                                "#SBATCH --qos=qos_cpu-t3\n",
+               'jz_medium_cpu': '#SBATCH -A imi@cpu\n'
+                                "#SBATCH -t 48:00:00\n"
+                                "#SBATCH --qos=qos_cpu-t4\n",
+               'jz_long_cpu': '#SBATCH -A imi@cpu\n'
+                               "#SBATCH -t 72:00:00\n"
+                               "#SBATCH --qos=qos_cpu-t4\n",
+               'plafrim_cpu_medium': "#SBATCH -t 48:00:00\n",
+               'plafrim_cpu_long': "#SBATCH -t 72:00:00\n",
+               'plafrim_gpu_medium': '#SBATCH -p long_sirocco\n'
+                                     "#SBATCH -t 48:00:00\n"
+                                     '#SBATCH --gres=gpu:1\n'
+               }
+cur_path = str(Path.cwd())
+date = date.today().strftime("%d-%m")
+# create campain log dir if not already done
+Path(cur_path + "/campain_logs/jobouts/").mkdir(parents=True, exist_ok=True)
+Path(cur_path + "/campain_logs/scripts/").mkdir(parents=True, exist_ok=True)
+# Load txt file containing experiments to run (give it as argument to this script)
+filename = 'to_run.txt'
+if len(sys.argv) >= 2:
+    filename = sys.argv[1]
+launch = True
+# Save a copy of txt file
+shutil.copyfile(cur_path + "/" + filename, cur_path + '/campain_logs/scripts/' + date + '_' + filename)
+# how many seeds does one launch launch
+# one_launch_per_n_seeds = 8
+global_seed_offset = 0
+incremental = False
+if len(sys.argv) >= 3:
+    if sys.argv[2] == 'nolaunch':
+        launch = False
+    if sys.argv[2] == 'seed_offset':
+        global_seed_offset = int(sys.argv[3])
+    if sys.argv[2] == 'incremental_seed_offset':
+        global_seed_offset = int(sys.argv[3])
+        incremental = True
+if launch:
+    print('Creating and Launching slurm scripts given arguments from {}'.format(filename))
+    # time.sleep(1.0)
+expe_list = []
+with open(filename, 'r') as f:
+    expe_list = [line.rstrip() for line in f]
+exp_names = set()
+for expe_args in expe_list:
+    seed_offset_to_use = global_seed_offset
+    if len(expe_args) == 0:
+        # empty line
+        continue
+    if expe_args[0] == '#':
+        # comment line
+        continue
+    arguments = ['slurm_conf', 'nb_seeds', 'cpu_cores_per_seed', 'gpus_per_seed', 'seeds_per_launch', 'frames', 'model']
+    exp_config = expe_args.split('--')[1:len(arguments)+1]
+    given_args = [arg.split(' ')[0] for arg in exp_config]
+    if not given_args == arguments:
+        raise ValueError("Arguments must be in the following order {}, and are {}".format(arguments, given_args))
+    slurm_conf_name, nb_seeds, n_cpu_cores_per_seed, n_gpus_per_seed, n_seeds_per_one_launch, frames, exp_name = [arg.split(' ')[1] for arg in exp_config]
+    n_seeds_per_one_launch = int(n_seeds_per_one_launch)
+    n_cpu_cores_per_seed = int(n_cpu_cores_per_seed)
+    user = getpass.getuser()
+    if 'curta' in slurm_conf_name:
+        gpu = ''
+        PYTHON_INTERP = "$HOME/anaconda3/envs/act_and_speak/bin/python"
+        n_cpu_cores_per_seed = 1
+    elif 'plafrim' in slurm_conf_name:
+        gpu = ''
+        PYTHON_INTERP = '/home/{}/USER/conda/envs/act_and_speak/bin/python'.format(user)
+        n_cpu_cores_per_seed = 1
+    elif 'jz' in slurm_conf_name:
+        if user == "utu57ed" or user == 'flowers':
+            PYTHON_INTERP='/gpfsscratch/rech/imi/{}/miniconda3/envs/social_ai/bin/python'.format(user)
+        elif user == "uxo14qj":
+            PYTHON_INTERP='/gpfswork/rech/imi/{}/miniconda3/envs/act_and_speak/bin/python'.format(user)
+        else:
+            if user != "flowers":
+                raise ValueError("Who are you? User {} unknown.".format(user))
+        gpu = ''  # '--gpu_id 0'
+        # n_cpus = 2
+        # n_seeds_per_one_launch = 4
+        # n_cpu_cores = 16 # n cpu cores for one seed
+        # assert n_cpu_cores * n_seeds_per_one_launch == 64
+        # n_seeds_per_one_launch = 2
+        # n_cpu_cores = 16 # n cpu cores for one seed
+        # assert n_cpu_cores * n_seeds_per_one_launch == 32
+        # n_seeds_per_one_launch = 2
+        # n_cpu_cores = 32 # n cpu cores for one seed
+        # assert n_cpu_cores * n_seeds_per_one_launch == 64
+        # n_seeds_per_one_launch = 1
+        # n_cpu_cores = 16 # n cpu cores for one seed
+        # assert n_cpu_cores * n_seeds_per_one_launch == 16
+        #
+        # n_seeds_per_one_launch = 1
+        # n_cpu_cores = 32  # n cpu cores for one seed
+        # assert n_cpu_cores * n_seeds_per_one_launch == 32
+        #
+        # assert n_seeds_per_one_launch == 1
+        # assert n_cpu_cores_per_seed == 64  # n cpu cores for one seed
+        # assert n_cpu_cores_per_seed * n_seeds_per_one_launch == 64
+        # n_cpus = 64 # n cpu cores for one seed
+        # assert n_cpus*one_launch_per_n_seeds == 256  # cpus_per_task is 8 will result in 16 cpu cores
+        if "2gpus" in slurm_conf_name:
+            job_gpus = 2
+        elif "4gpus" in slurm_conf_name:
+            job_gpus = 4
+        elif "gpu" in slurm_conf_name:
+            job_gpus = 1
+        else:
+            print("No gpus used")
+            job_gpus = 1
+        assert float(n_gpus_per_seed) == float(job_gpus / n_seeds_per_one_launch)
+        print(f"\nJob configuration (1 launch):")
+        print(f"\tSeeds: {n_seeds_per_one_launch}")
+        print(f"\tGPUs: {job_gpus}")
+        print(f"\n1 seed configuration:")
+        print(f"\tCPU cores {n_cpu_cores_per_seed}")
+        print(f"\tGPUs {job_gpus / n_seeds_per_one_launch}")
+        time.sleep(0.5)
+    else:
+        raise Exception("Unrecognized conf name: {} ".format(slurm_conf_name))
+    # assert ((int(nb_seeds) % 8) == 0), 'number of seeds should be divisible by 8'
+    assert ((int(nb_seeds) % 4) == 0) or (int(nb_seeds) == 1), f'number of seeds should be divisible by 4 or 1 and is {nb_seeds}'
+    run_args = expe_args.split(exp_name, 1)[
+        1]  # WARNING: assumes that exp_name comes after slurm_conf and nb_seeds and frames in txt
+    # prepare experiment name formatting (use --* or -* instead of -- or - to use argument in experiment name
+    # print(expe_args.split(exp_name))
+    exp_details, run_args = process_arg_string(run_args)
+    exp_name = date + '_' + exp_name + exp_details
+    # no two trains are to be put in the same dir
+    assert exp_names not in exp_names
+    exp_names.add(exp_name)
+    slurm_script_fullname = cur_path + "/campain_logs/scripts/{}".format(exp_name) + ".sh"
+    # create corresponding slurm script
+    # calculate how many chained jobs we need
+    chained_training = "chained" in slurm_conf_name
+    frames = int(frames)
+    print(chained_training)
+    if chained_training:
+        # assume 10M frames per 20h (fps 140 - very conservative)
+        timelimit = slurm_confs[slurm_conf_name].split("-t ")[-1].split("\n")[0]
+        if timelimit == '19:59:00':
+            one_script_frames = 10000000
+        elif timelimit == "3:59:00":
+            one_script_frames = 2500000
+        else:
+            raise ValueError(f"Bad timelimit {timelimit}.")
+        print(f"One script frames: {one_script_frames}")
+        num_chained_jobs = frames // one_script_frames + bool(frames % one_script_frames)
+        # # assume conservative fps - 300 (for one seed per gpu)
+        # fps = 300
+        # timelimit = slurm_confs[slurm_conf_name].split("-t ")[-1].split("\n")[0]
+        # assert timelimit == '3:59:00'
+        # timelimit_secs = get_sec(timelimit)
+        #
+        # one_script_frames = fps*timelimit_secs
+        #
+        # num_chained_jobs = frames // one_script_frames + bool(frames % one_script_frames)
+        #
+        # print(f"One script frames: {one_script_frames} -> num chained jobs {num_chained_jobs}")
+    else:
+        one_script_frames = frames
+        num_chained_jobs = 1  # no chaining
+    assert "--frames " not in run_args
+    current_script_frames = min(one_script_frames, frames)
+    if n_seeds_per_one_launch == 1:
+        write_script_one_seed(slurm_script_fullname, exp_name, PYTHON_INTERP, n_cpu_cores_per_seed,
+                              slurm_conf_name, run_args, current_script_frames, is_continue=False,
+                              dependecy_jobid=None)
+    else:
+        write_script(slurm_script_fullname, exp_name, PYTHON_INTERP, n_cpu_cores_per_seed, slurm_conf_name,
+                     run_args, current_script_frames, is_continue=False, dependecy_jobid=None)
+    # launch scripts
+    if launch:
+        for i in range(int(nb_seeds) // n_seeds_per_one_launch):
+            print('starting from seed {}'.format((i * n_seeds_per_one_launch) + global_seed_offset))
+            # run start job
+            sbatch_pipe = subprocess.Popen(
+                ['sbatch', 'campain_logs/scripts/{}.sh'.format(exp_name), str((i * n_seeds_per_one_launch) + seed_offset_to_use)],  # 0 4 8 12
+                stdout=subprocess.PIPE
+            )
+            job_id = subprocess.check_output(('cut',  '-d', ' ', '-f', '4'), stdin=sbatch_pipe.stdout).decode("utf_8").rstrip()
+            sbatch_pipe.wait()
+            # out = subprocess.run(
+            #     ['sbatch', 'campain_logs/scripts/{}.sh'.format(exp_name), str((i * one_launch_per_n_seeds) + seed_offset_to_use)],  # 0 4 8 12
+            #     capture_output=True
+            # ).stdout.decode("utf-8")
+            # continue jobs
+            for cont_job_i in range(num_chained_jobs-1):
+                # write continue script
+                cont_script_name = "{}_continue_{}.sh".format(exp_name, job_id)
+                continue_slurm_script_fullname = cur_path + "/campain_logs/scripts/"+cont_script_name
+                current_script_frames = min(one_script_frames*(2+cont_job_i), frames)
+                if n_seeds_per_one_launch == 1:
+                    write_script_one_seed(continue_slurm_script_fullname, exp_name, PYTHON_INTERP, n_cpu_cores_per_seed,
+                                 slurm_conf_name, run_args, current_script_frames,
+                                 is_continue=True, dependecy_jobid=job_id)
+                else:
+                    write_script(continue_slurm_script_fullname, exp_name, PYTHON_INTERP, n_cpu_cores_per_seed, slurm_conf_name, run_args, current_script_frames,
+                                 is_continue=True, dependecy_jobid=job_id)
+                # run continue job
+                sbatch_pipe = subprocess.Popen(
+                    ['sbatch', 'campain_logs/scripts/{}'.format(cont_script_name), str((i * n_seeds_per_one_launch) + seed_offset_to_use)],  # 0 4 8 12
+                    stdout=subprocess.PIPE
+                )
+                job_id = subprocess.check_output(('cut',  '-d', ' ', '-f', '4'), stdin=sbatch_pipe.stdout).decode("utf_8").rstrip()
+                sbatch_pipe.wait()
+    if incremental:
+        global_seed_offset += int(nb_seeds)

data_analysis.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data_analysis.py ADDED Viewed

	@@ -0,0 +1,1650 @@

+#!/usr/bin/env python
+import re
+import itertools
+import math
+from itertools import chain
+import time
+# import seaborn
+import numpy as np
+import os
+from collections import OrderedDict, defaultdict
+import pandas as pd
+import matplotlib.pyplot as plt
+import sys
+from termcolor import cprint, colored
+from pathlib import Path
+import pickle
+eval_metric = "test_success_rates"
+# eval_metric = "exploration_bonus_mean"
+super_title = ""
+# super_title = "PPO - No exploration bonus"
+# super_title = "Count Based exploration bonus (Grid Search)"
+# super_title = "PPO + RND"
+# super_title = "PPO + RIDE"
+agg_title = ""
+color_dict = None
+eval_filename = None
+max_frames = 20_000_000
+draw_legend = True
+per_seed = False
+study_eval = True
+plot_train = True
+plot_test = True
+plot_aggregated_test = False
+plot_only_aggregated_test = False
+train_inc_font = 3
+xnbins = 4
+ynbins = 3
+steps_denom = 1e6
+# Global vas for tracking and labeling data at load time.
+exp_idx = 0
+label_parser_dict = None
+label_parser = lambda l, _, label_parser_dict: l
+# smooth_factor = 100
+smooth_factor = 10
+smooth_factor = 0
+print("smooth factor:", smooth_factor)
+eval_smooth_factor = 1
+leg_size = 30
+def smooth(x_, n=50):
+    if type(x_) == list:
+        x_ = np.array(x_)
+    return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))])
+sort_test = False
+def sort_test_set(env_name):
+    helps = [
+        "LanguageFeedback",
+        "LanguageColor",
+        "Pointing",
+        "Emulation",
+    ]
+    problems = [
+        "Boxes",
+        "Switches",
+        "Generators",
+        "Marble",
+        "Doors",
+        "Levers",
+    ]
+    env_names = []
+    for p in problems:
+        for h in helps:
+            env_names.append(h+p)
+    env_names.extend([
+        "LeverDoorColl",
+        "MarblePushColl",
+        "MarblePassColl",
+        "AppleStealing"
+    ])
+    for i, en in enumerate(env_names):
+        if en in env_name:
+            return i
+    raise ValueError(f"Test env {env_name} not known")
+subsample_step = 1
+load_subsample_step = 1
+x_lim = 0
+max_x_lim = 17
+max_x_lim = np.inf
+# x_lim = 100
+summary_dict = {}
+summary_dict_colors = {}
+# default_colors = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
+#                   "cyan", "pink",'purple', u'#1f77b4',
+#                   "darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
+#                   'deeppink','silver','khaki','goldenrod','y','y','y','y','y','y','y','y','y','y','y','y' ] + ['y']*50
+default_colors_ = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
+                  "cyan", "pink",'purple', u'#1f77b4',
+                  "darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
+                  'deeppink','silver','khaki','goldenrod'] * 100
+def get_eval_data(logdir, eval_metric):
+    eval_data = defaultdict(lambda :defaultdict(list))
+    for root, _, files in os.walk(logdir):
+        for file in files:
+            if 'testing_' in file:
+                assert ".pkl" in file
+                test_env_name = file.lstrip("testing_").rstrip(".pkl")
+                try:
+                    with open(root+"/"+file, "rb") as f:
+                        seed_eval_data = pickle.load(f)
+                except:
+                    print("Pickle not loaded: ", root+"/"+file)
+                    time.sleep(1)
+                    continue
+                eval_data[test_env_name]["values"].append(seed_eval_data[eval_metric])
+                eval_data[test_env_name]["steps"].append(seed_eval_data["test_step_nb"])
+        # if 'log.csv' in files:
+        #     run_name = root[8:]
+        #     exp_name = None
+        #
+        #     config = None
+        #     exp_idx += 1
+        #
+        #     # load progress data
+        #     try:
+        #         print(os.path.join(root, 'log.csv'))
+        #         exp_data = pd.read_csv(os.path.join(root, 'log.csv'))
+        #     except:
+        #         size = (Path(root) / 'log.csv').stat().st_size
+        #         if size == 0:
+        #             raise ValueError("CSV {} empty".format(os.path.join(root, 'log.csv')))
+        #         else:
+        #             raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))
+        #
+        #     exp_data = exp_data[::load_subsample_step]
+        #     data_dict = exp_data.to_dict("list")
+        #
+        #     data_dict['config'] = config
+        #     nb_epochs = len(data_dict['frames'])
+        #     print('{} -> {}'.format(run_name, nb_epochs))
+    for test_env, seed_data in eval_data.items():
+        min_len_seed = min([len(s) for s in seed_data['steps']])
+        eval_data[test_env]["values"] = np.array([s[:min_len_seed] for s in eval_data[test_env]["values"]])
+        eval_data[test_env]["steps"] = np.array([s[:min_len_seed] for s in eval_data[test_env]["steps"]])
+    return eval_data
+def get_all_runs(logdir, load_subsample_step=1):
+    """
+    Recursively look through logdir for output files produced by
+    Assumes that any file "log.csv" is a valid hit.
+    """
+    global exp_idx
+    global units
+    datasets = []
+    for root, _, files in os.walk(logdir):
+        if 'log.csv' in files:
+            if (Path(root) / 'log.csv').stat().st_size == 0:
+                print("CSV {} empty".format(os.path.join(root, 'log.csv')))
+                continue
+            run_name = root[8:]
+            exp_name = None
+            config = None
+            exp_idx += 1
+            # load progress data
+            try:
+                exp_data = pd.read_csv(os.path.join(root, 'log.csv'))
+                print("Loaded:", os.path.join(root, 'log.csv'))
+            except:
+                raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))
+            exp_data = exp_data[::load_subsample_step]
+            data_dict = exp_data.to_dict("list")
+            data_dict['config'] = config
+            nb_epochs = len(data_dict['frames'])
+            if nb_epochs == 1:
+                print(f'{run_name} -> {colored(f"nb_epochs {nb_epochs}", "red")}')
+            else:
+                print('{} -> nb_epochs {}'.format(run_name, nb_epochs))
+            datasets.append(data_dict)
+    return datasets
+def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_patterns=("ignore"), require_patterns=()):
+    _, models_list, _ = next(os.walk(rootdir))
+    for dir_name in models_list.copy():
+        # add "ignore" in a directory name to avoid loading its content
+        for ignore_pattern in ignore_patterns:
+            if ignore_pattern in dir_name or load_only not in dir_name:
+                if dir_name in models_list:
+                    models_list.remove(dir_name)
+        if len(require_patterns) > 0:
+            if not any([require_pattern in dir_name for require_pattern in require_patterns]):
+                if dir_name in models_list:
+                    models_list.remove(dir_name)
+    for expe_name in list(labels.keys()):
+        if expe_name not in models_list:
+            del labels[expe_name]
+    # setting per-model type colors
+    for i, m_name in enumerate(models_list):
+        for m_type, m_color in per_model_colors.items():
+            if m_type in m_name:
+                colors[m_name] = m_color
+        print("extracting data for {}...".format(m_name))
+        m_id = m_name
+        models_saves[m_id] = OrderedDict()
+        models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step)
+        print("done")
+        if m_name not in labels:
+            labels[m_name] = m_name
+        model_eval_data[m_id] = get_eval_data(logdir=rootdir+m_name, eval_metric=eval_metric)
+    """
+    retrieve all experiences located in "data to vizu" folder
+    """
+labels = OrderedDict()
+per_model_colors = OrderedDict()
+# per_model_colors = OrderedDict([('ALP-GMM',u'#1f77b4'),
+#                                 ('hmn','pink'),
+#                                 ('ADR','black')])
+# LOAD DATA
+models_saves = OrderedDict()
+colors = OrderedDict()
+model_eval_data = OrderedDict()
+static_lines = {}
+# get_datasets("storage/",load_only="RERUN_WizardGuide")
+# get_datasets("storage/",load_only="RERUN_WizardTwoGuides")
+try:
+    load_pattern = eval(sys.argv[1])
+except:
+    load_pattern = sys.argv[1]
+ignore_patterns = ["_ignore_"]
+require_patterns = [
+    "_"
+]
+# require_patterns = [
+    # "dummy_cs_jz_scaf_A_E_N_A_E",
+    # "03-12_dummy_cs_jz_formats_AE",
+# ]
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     if "single" in label:
+#         ty = "single"
+#     elif "group" in label:
+#         ty = "group"
+#
+#     if "asoc" in label:
+#         return f"Asocial_pretrain({ty})"
+#
+#     if "exp_soc" in label:
+#         return f"Role_B_pretrain({ty})"
+#
+#     return label
+#
+# # DUMMY FORMATS
+# require_patterns = [
+#     "03-12_dummy_cs_formats_CBL",
+#     "dummy_cs_formats_CBL_N_rec_5"
+    # "03-12_dummy_cs_jz_formats_",
+    # "dummy_cs_jz_formats_N_rec_5"
+# ]
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     if "CBL" in label:
+#         eb = "CBL"
+#     else:
+#         eb = "no_bonus"
+#
+#     if "AE" in label:
+#         label = f"AE_PPO_{eb}"
+#     elif "E" in label:
+#         label = f"E_PPO_{eb}"
+#     elif "A" in label:
+#         label = f"A_PPO_{eb}"
+#     elif "N" in label:
+#         label = f"N_PPO_{eb}"
+#
+#     return label
+#
+# DUMMY CLASSIC
+# require_patterns = [
+    # "07-12_dummy_cs_NEW2_Pointing_sm_CB_very_small",
+    # "dummy_cs_JA_Pointing_CB_sm",
+    # "06-12_dummy_cs_NEW_Color_CBL",
+    # "dummy_cs_JA_Color_CBL_new"
+    # "07-12_dummy_cs_NEW2_Feedback_CBL",
+    # "dummy_cs_JA_Feedback_CBL_new"
+    # "08-12_dummy_cs_emulation_no_distr_rec_5_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+    # "08-12_dummy_cs_emulation_no_distr_rec_5_CB",
+    # "dummy_cs_RR_ft_NEW_single_CB_marble_pass_B_exp_soc",
+    # "dummy_cs_RR_ft_NEW_single_CB_marble_pass_B_contr_asoc",
+    # "dummy_cs_RR_ft_NEW_group_CB_marble_pass_A_exp_soc",
+    # "dummy_cs_RR_ft_NEW_group_CB_marble_pass_A_contr_asoc"
+    # "03-12_dummy_cs_jz_formats_A",
+    # "03-12_dummy_cs_jz_formats_E",
+    # "03-12_dummy_cs_jz_formats_AE",
+    # "dummy_cs_jz_formats_N_rec_5"
+    # "03-12_dummy_cs_formats_CBL_A",
+    # "03-12_dummy_cs_formats_CBL_E",
+    # "03-12_dummy_cs_formats_CBL_AE",
+    # "dummy_cs_formats_CBL_N_rec_5"
+    # "03-12_dummy_cs_jz_formats_AE",
+    # "dummy_cs_jz_scaf_A_E_N_A_E_full-AEfull",
+    # "dummy_cs_jz_scaf_A_E_N_A_E_scaf_full-AEfull",
+# ]
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.replace("07-12_dummy_cs_NEW2_Pointing_sm_CB_very_small", "PPO_CB")
+#     label = label.replace("dummy_cs_JA_Pointing_CB_sm", "JA_PPO_CB")
+#
+#     label = label.replace("06-12_dummy_cs_NEW_Color_CBL", "PPO_CBL")
+#     label = label.replace("dummy_cs_JA_Color_CBL_new", "JA_PPO_CBL")
+#
+#     label = label.replace("07-12_dummy_cs_NEW2_Feedback_CBL", "PPO_CBL")
+#     label = label.replace("dummy_cs_JA_Feedback_CBL_new", "JA_PPO_CBL")
+#
+#     label = label.replace(
+#         "08-12_dummy_cs_emulation_no_distr_rec_5_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+#         "PPO_CB_1")
+#     label = label.replace(
+#         "08-12_dummy_cs_emulation_no_distr_rec_5_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+#         "PPO_CB_1")
+#
+#     label = label.replace("dummy_cs_RR_ft_NEW_single_CB_marble_pass_B_exp_soc", "PPO_CB_role_B_single")
+#     label = label.replace("dummy_cs_RR_ft_NEW_single_CB_marble_pass_B_contr_asoc", "PPO_CB_asoc_single")
+#
+#     label = label.replace("dummy_cs_RR_ft_NEW_group_CB_marble_pass_A_exp_soc", "PPO_CB_role_B_group")
+#     label = label.replace("dummy_cs_RR_ft_NEW_group_CB_marble_pass_A_contr_asoc", "PPO_CB_asoc_group")
+#
+#     label = label.replace(
+#         "03-12_dummy_cs_formats_CBL_A_rec_5_env_SocialAI-ALangFeedbackTrainFormatsCSParamEnv-v1_recurrence_5_test-set-name_AFormatsTestSet_exploration-bonus-type_lang",
+#         "PPO_CBL_Ask")
+#     label = label.replace(
+#         "03-12_dummy_cs_formats_CBL_E_rec_5_env_SocialAI-ELangFeedbackTrainFormatsCSParamEnv-v1_recurrence_5_test-set-name_EFormatsTestSet_exploration-bonus-type_lang",
+#         "PPO_CBL_Eye_contact")
+#     label = label.replace(
+#         "03-12_dummy_cs_formats_CBL_AE_rec_5_env_SocialAI-AELangFeedbackTrainFormatsCSParamEnv-v1_recurrence_5_test-set-name_AEFormatsTestSet_exploration-bonus-type_lang",
+#         "PPO_CBL_Ask_Eye_contact")
+#     label = label.replace("dummy_cs_formats_CBL_N_rec_5", "PPO_CBL_No")
+#
+#     label = label.replace(
+#         "03-12_dummy_cs_jz_formats_E_rec_5_env_SocialAI-ELangFeedbackTrainFormatsCSParamEnv-v1_recurrence_5_test-set-name_EFormatsTestSet",
+#         "PPO_no_bonus_Eye_contact")
+#     label = label.replace(
+#         "03-12_dummy_cs_jz_formats_A_rec_5_env_SocialAI-ALangFeedbackTrainFormatsCSParamEnv-v1_recurrence_5_test-set-name_AFormatsTestSet",
+#         "PPO_no_bonus_Ask")
+#     label = label.replace(
+#         "03-12_dummy_cs_jz_formats_AE_rec_5_env_SocialAI-AELangFeedbackTrainFormatsCSParamEnv-v1_recurrence_5_test-set-name_AEFormatsTestSet",
+#         "PPO_no_bonus_Ask_Eye_contact")
+#     label = label.replace("dummy_cs_jz_formats_N_rec_5", "PPO_no_bonus_No")
+#
+#     label = label.replace("03-12_dummy_cs_jz_formats_AE", "PPO_no_bonus_no_scaf")
+#     label = label.replace("dummy_cs_jz_scaf_A_E_N_A_E_full-AEfull", "PPO_no_bonus_scaf_4")
+#     label = label.replace("dummy_cs_jz_scaf_A_E_N_A_E_scaf_full-AEfull", "PPO_no_bonus_scaf_8")
+#
+#     return label
+# Final case studies
+require_patterns = [
+    "_",
+    # pointing
+    # "04-01_Pointing_CB_heldout_doors",
+    # # role reversal
+    # "03-01_RR_ft_single_CB_marble_pass_A_asoc_contr",
+    # "03-01_RR_ft_single_CB_marble_pass_A_soc_exp",
+    # "05-01_RR_ft_group_50M_CB_marble_pass_A_asoc_contr",
+    # "05-01_RR_ft_group_50M_CB_marble_pass_A_soc_exp",
+    # scaffolding
+    # "05-01_scaffolding_50M_no",
+    # "05-01_scaffolding_50M_acl_4_acl-type_intro_seq",
+    # "05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf",
+]
+def label_parser(label, figure_id, label_parser_dict=None):
+    label = label.replace("04-01_Pointing_CB_heldout_doors", "PPO_CB")
+    label = label.replace("05-01_scaffolding_50M_no_acl", "PPO_no_scaf")
+    label = label.replace("05-01_scaffolding_50M_acl_4_acl-type_intro_seq", "PPO_scaf_4")
+    label = label.replace("05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf", "PPO_scaf_8")
+    label = label.replace("03-01_RR_ft_single_CB_marble_pass_A_soc_exp", "PPO_CB_role_B")
+    label = label.replace("03-01_RR_ft_single_CB_marble_pass_A_asoc_contr", "PPO_CB_asocial")
+    label = label.replace("05-01_RR_ft_group_50M_CB_marble_pass_A_soc_exp", "PPO_CB_role_B")
+    label = label.replace("05-01_RR_ft_group_50M_CB_marble_pass_A_asoc_contr", "PPO_CB_asocial")
+    return label
+color_dict = {
+    # JA
+    # "JA_PPO_CBL": "blue",
+    # "PPO_CBL": "orange",
+    # RR group
+    # "PPO_CB_role_B_group": "orange",
+    # "PPO_CB_asoc_group": "blue"
+    # formats No
+    # "PPO_no_bonus_No": "blue",
+    # "PPO_no_bonus_Eye_contact": "magenta",
+    # "PPO_no_bonus_Ask": "orange",
+    # "PPO_no_bonus_Ask_Eye_contact": "green"
+    # formats CBL
+    # "PPO_CBL_No": "blue",
+    # "PPO_CBL_Eye_contact": "magenta",
+    # "PPO_CBL_Ask": "orange",
+    # "PPO_CBL_Ask_Eye_contact": "green"
+}
+# # POINTING_GENERALIZATION (DUMMY)
+# require_patterns = [
+#     "29-10_SAI_Pointing_CS_PPO_CB_",
+#     "29-10_SAI_LangColor_CS_PPO_CB_"
+# ]
+#
+# color_dict = {
+#     "dummy_cs_JA_Feedback_CBL_new": "blue",
+#     "dummy_cs_Feedback_CBL": "orange",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("Pointing_CS_PPO_CB", "PPO_CB_train(DUMMY)")
+#     label=label.replace("LangColor_CS_PPO_CB", "PPO_CB_test(DUMMY)")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Pointing_gen_eval.png"
+# # FEEDBACK GENERALIZATION (DUMMY)
+# require_patterns = [
+#     "29-10_SAI_LangFeedback_CS_PPO_CBL_",
+#     "29-10_SAI_LangColor_CS_PPO_CB_"
+# ]
+#
+# color_dict = {
+#     "PPO_CBL_train(DUMMY)": "blue",
+#     "PPO_CBL_test(DUMMY)": "maroon",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangFeedback_CS_PPO_CBL", "PPO_CBL_train(DUMMY)")
+#     label=label.replace("LangColor_CS_PPO_CB", "PPO_CBL_test(DUMMY)")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Feedback_gen_eval.png"
+# # COLOR GENERALIZATION (DUMMY)
+# require_patterns = [
+#     "29-10_SAI_LangColor_CS_PPO_CBL_",
+#     "29-10_SAI_LangColor_CS_PPO_CB_"
+# ]
+#
+# color_dict = {
+#     "PPO_CBL_train(DUMMY)": "blue",
+#     "PPO_CBL_test(DUMMY)": "maroon",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangColor_CS_PPO_CBL", "PPO_CBL_train(DUMMY)")
+#     label=label.replace("LangColor_CS_PPO_CB", "PPO_CBL_test(DUMMY)")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Color_gen_eval.png"
+# # POINTING - PILOT
+# require_patterns = [
+#     "29-10_SAI_Pointing_CS_PPO_",
+# ]
+#
+# color_dict = {
+#     "PPO_RIDE": "orange",
+#     "PPO_RND": "magenta",
+#     "PPO_no": "maroon",
+#     "PPO_CBL": "green",
+#     "PPO_CB": "blue",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("Pointing_CS_", "")
+#     return label
+# #
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Pointing_eval.png"
+# LANGCOLOR - 7 Colors - PILOT
+# require_patterns = [
+#     "29-10_SAI_LangColor_CS_PPO_",
+# ]
+#
+# color_dict = {
+#     "PPO_RIDE": "orange",
+#     "PPO_RND": "magenta",
+#     "PPO_no": "maroon",
+#     "PPO_CBL": "green",
+#     "PPO_CB": "blue",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangColor_CS_", "")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Color_eval.png"
+# # LangColor - CBL - 3 5 7
+# require_patterns = [
+#     "02-11_SAI_LangColor_CS_5C_PPO_CBL",
+#     "02-11_SAI_LangColor_CS_3C_PPO_CBL",
+#     "29-10_SAI_LangColor_CS_PPO_CBL"
+# ]
+# RND RIDE reference : RIDE > RND > no
+# require_patterns = [
+#     "24-08_new_ref",
+# ]
+# # # LANG FEEDBACK
+# require_patterns = [
+#     "24-10_SAI_LangFeedback_CS_PPO_",
+#     "29-10_SAI_LangFeedback_CS_PPO_",
+# ]
+# color_dict = {
+#     "PPO_RIDE": "orange",
+#     "PPO_RND": "magenta",
+#     "PPO_no": "maroon",
+#     "PPO_CBL": "green",
+#     "PPO_CB": "blue",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangFeedback_CS_", "")
+#     return label
+#
+# # eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Feedback_eval.png"
+#
+# # ROLE REVERSAL - group (DUMMY)
+# require_patterns = [
+#     "24-10_SAI_LangFeedback_CS_PPO_CB_",
+#     "29-10_SAI_LangFeedback_CS_PPO_CBL_",
+# ]
+# color_dict = {
+#     "PPO_CB_experimental": "green",
+#     "PPO_CB_control": "blue",
+# }
+# color_dict=None
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangFeedback_CS_", "")
+#
+#     label=label.replace("PPO_CB", "PPO_CB_control")
+#     label=label.replace("controlL", "experimental")
+#
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/RR_dummy_group.png"
+# # ROLE REVERSAL - single (DUMMY)
+# require_patterns = [
+#     "24-10_SAI_LangFeedback_CS_PPO_CB_",
+#     "24-10_SAI_LangFeedback_CS_PPO_no_",
+# ]
+# color_dict = {
+#     "PPO_CB_experimental": "green",
+#     "PPO_CB_control": "blue",
+# }
+# color_dict=None
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangFeedback_CS_", "")
+#
+#     label=label.replace("PPO_CB", "PPO_CB_control")
+#     label=label.replace("PPO_no", "PPO_CB_experimental")
+#
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/RR_dummy_single.png"
+# # IMITATION train (DUMMY)
+# require_patterns = [
+#     "29-10_SAI_LangFeedback_CS_PPO_CBL_",
+#     "29-10_SAI_Pointing_CS_PPO_RIDE",
+# ]
+#
+# color_dict = {
+#     "PPO_CB_no_distr(DUMMY)": "magenta",
+#     "PPO_CB_distr(DUMMY)": "orange",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangFeedback_CS_PPO_CBL", "PPO_CB_no_distr(DUMMY)")
+#     label=label.replace("Pointing_CS_PPO_RIDE", "PPO_CB_distr(DUMMY)")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Imitation_train.png"
+# # IMITATION test (DUMMY)
+# require_patterns = [
+#     "29-10_SAI_LangFeedback_CS_PPO_CBL_",
+#     "29-10_SAI_Pointing_CS_PPO_RIDE",
+# ]
+#
+# color_dict = {
+#     "PPO_CB_no_distr(DUMMY)": "magenta",
+#     "PPO_CB_distr(DUMMY)": "orange",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("LangFeedback_CS_PPO_CBL", "PPO_CB_no_distr(DUMMY)")
+#     label=label.replace("Pointing_CS_PPO_RIDE", "PPO_CB_distr(DUMMY)")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Imitation_test.png"
+# JA_POINTING
+# require_patterns = [
+#     "29-10_SAI_Pointing_CS_PPO_CB_",
+#     "04-11_SAI_JA_Pointing_CS_PPO_CB_less",  # less reward
+# ]
+# color_dict = {
+#     "JA_Pointing_PPO_CB": "orange",
+#     "Pointing_PPO_CB": "blue",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("_CS_", "_")
+#     label=label.replace("_less_", "")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/JA_Pointing_eval.png"
+# # JA_COLORS (JA, no)  x (3,5,7)
+# max_x_lim = 17
+# require_patterns = [
+#     # "02-11_SAI_JA_LangColor", # max_x_lim = 17
+#     "02-11_SAI_JA_LangColor_CS_3C", # max_x_lim = 17
+#     # "02-11_SAI_LangColor_CS_5C_PPO_CBL", # max_x_lim = 17
+#     "02-11_SAI_LangColor_CS_3C_PPO_CBL",
+#     # "29-10_SAI_LangColor_CS_PPO_CBL"
+# ]
+# color_dict = {
+#     "JA_LangColor_PPO_CBL": "orange",
+#     "LangColor_PPO_CBL": "blue",
+# }
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("_CS_", "_")
+#     label=label.replace("_3C_", "_")
+#     return label
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/JA_Color_eval.png"
+# JA_FEEDBACK -> max_xlim=17
+# max_x_lim = 17
+# require_patterns = [
+#     "02-11_SAI_JA_LangFeedback_CS_PPO_CBL_",
+#     "29-10_SAI_LangFeedback_CS_PPO_CBL_",
+#     "dummy_cs_F",
+#     "dummy_cs_JA_F"
+# ]
+# color_dict = {
+#     "JA_LangFeedback_PPO_CBL": "orange",
+#     "LangFeedback_PPO_CBL": "blue",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("_CS_", "_")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/JA_Feedback_eval.png"
+# # Formats CBL
+# require_patterns = [
+#     "03-11_SAI_LangFeedback_CS_F_NO_PPO_CBL_env_SocialAI",
+#     "29-10_SAI_LangFeedback_CS_PPO_CBL_env_SocialAI",
+#     "03-11_SAI_LangFeedback_CS_F_ASK_PPO_CBL_env_SocialAI",
+#     "03-11_SAI_LangFeedback_CS_F_ASK_EYE_PPO_CBL_env_SocialAI",
+# ]
+# color_dict = {
+#     "LangFeedback_Eye_PPO_CBL": "blue",
+#     "LangFeedback_Ask_PPO_CBL": "orange",
+#     "LangFeedback_NO_PPO_CBL": "green",
+#     "LangFeedback_AskEye_PPO_CBL": "magenta",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("_CS_", "_")
+#     label=label.replace("_F_", "_")
+#
+#     label=label.replace("LangFeedback_PPO", "LangFeedback_EYE_PPO")
+#
+#     label=label.replace("EYE", "Eye")
+#     label=label.replace("No", "No")
+#     label=label.replace("ASK", "Ask")
+#     label=label.replace("Ask_Eye", "AskEye")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Formats_CBL_eval.png"
+# # Formats NO
+# require_patterns = [
+#    "24-10_SAI_LangFeedback_CS_PPO_no", # EYE
+#    "04-11_SAI_LangFeedback_CS_F_NO_PPO_NO_env_SocialAI",
+#    "04-11_SAI_LangFeedback_CS_F_ASK_PPO_NO_env_SocialAI",
+#    "04-11_SAI_LangFeedback_CS_F_ASK_EYE_PPO_NO_env_SocialAI",
+# ]
+#
+# color_dict = {
+#     "LangFeedback_Eye_PPO_no": "blue",
+#     "LangFeedback_Ask_PPO_no": "orange",
+#     "LangFeedback_NO_PPO_no": "green",
+#     "LangFeedback_AskEye_PPO_no": "magenta",
+# }
+#
+# def label_parser(label, figure_id, label_parser_dict=None):
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#     label=label.replace("_CS_", "_")
+#     label=label.replace("_F_", "_")
+#     #
+#     label=label.replace("LangFeedback_PPO", "LangFeedback_EYE_PPO")
+#     label=label.replace("PPO_NO", "PPO_no")
+#
+#     label=label.replace("EYE", "Eye")
+#     label=label.replace("No", "No")
+#     label=label.replace("ASK", "Ask")
+#     label=label.replace("Ask_Eye", "AskEye")
+#     return label
+#
+# eval_filename = f"/home/flowers/Documents/projects/embodied_acting_and_speaking/case_studies_figures/Formats_no_eval.png"
+#
+# require_patterns = [
+#     "11-07_bAI_cb_GS_param_tanh_env_SocialAI-SocialAIParamEnv-v1_exploration-bonus-type_cell_exploration-bonus-params__2_50_exploration-bonus-tanh_0.6",
+#     # "04-11_SAI_ImitationDistr_CS_PPO_CB_small_env_SocialAI-EEmulationDistrInformationSeekingParamEnv-v1_recurrence_10",
+#     # "04-11_SAI_ImitationDistr_CS_PPO_CB_small_env_SocialAI-EEmulationDistrInformationSeekingParamEnv-v1_recurrence_10",
+#     "03-11_SAI_ImitationDistr_CS_PPO_CB_env_SocialAI-EEmulationDistrInformationSeekingParamEnv-v1_recurrence_10",
+#     # "04-11_SAI_ImitationNoDistr_CS_PPO_CB_small_env_SocialAI-EEmulationNoDistrInformationSeekingParamEnv-v1_recurrence_10",
+# ]
+# require_patterns = [
+#    "02-11_SAI_LangColor_CS_3C_PPO_CBL",
+#     "02-11_SAI_JA_LangColor_CS_3C_PPO_CBL",
+# ]  # at least one of those
+# all of those
+include_patterns = [
+    "_"
+]
+#include_patterns = ["rec_5"]
+if eval_filename:
+    # saving
+    fontsize = 40
+    legend_fontsize = 30
+    linewidth = 10
+else:
+    fontsize = 5
+    legend_fontsize = 5
+    linewidth = 1
+fontsize = 5
+legend_fontsize = 5
+linewidth = 1
+title_fontsize = int(fontsize*1.2)
+storage_dir = "storage/"
+if load_pattern.startswith(storage_dir):
+    load_pattern = load_pattern[len(storage_dir):]
+if load_pattern.startswith("./storage/"):
+    load_pattern = load_pattern[len("./storage/"):]
+get_datasets(storage_dir, str(load_pattern), load_subsample_step=load_subsample_step, ignore_patterns=ignore_patterns, require_patterns=require_patterns)
+label_parser_dict = {
+    # "PPO_CB": "PPO_CB",
+    # "02-06_AppleStealing_experiments_cb_bonus_angle_occ_env_SocialAI-OthersPerceptionInferenceParamEnv-v1_exploration-bonus-type_cell": "NPC_visible",
+}
+env_type = str(load_pattern)
+fig_type = "test"
+try:
+    top_n = int(sys.argv[2])
+except:
+    top_n = 8
+to_remove = []
+for tr_ in to_remove:
+    if tr_ in models_saves:
+        del models_saves[tr_]
+print("Loaded:")
+print("\n".join(list(models_saves.keys())))
+#### get_datasets("storage/", "RERUN_WizardGuide_lang64_nameless")
+#### get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_nameless")
+if per_model_colors:  # order runs for legend order as in per_models_colors, with corresponding colors
+    ordered_labels = OrderedDict()
+    for teacher_type in per_model_colors.keys():
+        for k,v in labels.items():
+            if teacher_type in k:
+                ordered_labels[k] = v
+    labels = ordered_labels
+else:
+    print('not using per_model_color')
+    for k in models_saves.keys():
+        labels[k] = k
+def plot_with_shade_seed(subplot_nb, ax, x, y, err, color, shade_color, label,
+                         y_min=None, y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
+                         ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, labelsize=20,
+                         filename=None,
+                         zorder=None, xlabel='perf', ylabel='Env steps'):
+    plt.rcParams.update({'font.size': 15})
+    plt.rcParams['axes.xmargin'] = 0
+    plt.rcParams['axes.ymargin'] = 0
+    ax.locator_params(axis='x', nbins=3)
+    ax.locator_params(axis='y', nbins=3)
+    ax.tick_params(axis='both', which='major', labelsize=labelsize)
+    x = x[:len(y)]
+    # ax.scatter(x, y, color=color, linewidth=linewidth, zorder=zorder)
+    ax.plot(x, y, color=color, label=label, linewidth=linewidth, zorder=zorder)
+    if err is not None:
+        ax.fill_between(x, y-err, y+err, color=shade_color, alpha=0.2)
+    if legend:
+        leg = ax.legend(loc=leg_loc, **leg_args) #34
+        for legobj in leg.legendHandles:
+            legobj.set_linewidth(leg_linewidth)
+    ax.set_xlabel(xlabel, fontsize=fontsize)
+    if subplot_nb == 0:
+        ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=4)
+    ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
+    ax.set_ylim(bottom=ylim[0],top=ylim[1])
+    if title:
+        ax.set_title(title, fontsize=fontsize)
+    # if filename is not None:
+    #     f.savefig(filename)
+# Plot utils
+def plot_with_shade_grg(subplot_nb, ax, x, y, err, color, shade_color, label,
+                        legend=False, leg_loc='best', title=None,
+                        ylim=[0, 100], xlim=[0, 40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, labelsize=20, fontsize=20, title_fontsize=30,
+                        zorder=None, xlabel='Perf', ylabel='Env steps', linestyle="-", xnbins=3, ynbins=3, filename=None):
+    #plt.rcParams.update({'font.size': 15})
+    ax.locator_params(axis='x', nbins=xnbins)
+    ax.locator_params(axis='y', nbins=ynbins)
+    ax.tick_params(axis='y', which='both', labelsize=labelsize)
+    ax.tick_params(axis='x', which='both', labelsize=labelsize*0.8)
+    # ax.tick_params(axis='both', which='both', labelsize="small")
+    # ax.scatter(x, y, color=color,linewidth=linewidth,zorder=zorder, linestyle=linestyle)
+    ax.plot(x, y, color=color, label=label, linewidth=linewidth, zorder=zorder, linestyle=linestyle)
+    ax.fill_between(x, y-err, y+err, color=shade_color, alpha=0.2)
+    if legend:
+        leg = ax.legend(loc=leg_loc, **leg_args)  # 34
+        for legobj in leg.legendHandles:
+            legobj.set_linewidth(leg_linewidth)
+    ax.set_xlabel(xlabel, fontsize=fontsize)
+    if subplot_nb == 0:
+        ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=2)
+    ax.set_xlim(xmin=xlim[0], xmax=xlim[1])
+    ax.set_ylim(bottom=ylim[0], top=ylim[1])
+    if title:
+        ax.set_title(title, fontsize=title_fontsize)
+    # if filename is not None:
+    #     f.savefig(filename)
+# Metric plot
+# metric = 'success_rate_mean'
+# metric = 'mission_string_observed_mean'
+# metric = 'extrinsic_return_mean'
+# metric = 'extrinsic_return_max'
+# metric = "rreturn_mean"
+# metric = 'rreturn_max'
+# metric = 'FPS'
+# metric = 'duration'
+# metric = 'intrinsic_reward_perf2_'
+# metric = 'NPC_intro'
+metrics = [
+    'success_rate_mean',
+    # 'FPS',
+    # 'extrinsic_return_mean',
+    # 'exploration_bonus_mean',
+    'NPC_intro',
+    # 'curriculum_param_mean',
+    # 'curriculum_max_success_rate_mean',
+    # 'rreturn_mean'
+]
+# f, ax = plt.subplots(1, len(metrics), figsize=(15.0, 9.0))
+f, ax = plt.subplots(1, len(metrics), figsize=(9.0, 9.0))
+# f, ax = plt.subplots(1, len(metrics), figsize=(20.0, 20.0))
+# f, ax = plt.subplots(1, 1, figsize=(5.0, 3.0))
+if len(metrics) == 1:
+    ax = [ax]
+max_y = -np.inf
+min_y = np.inf
+# hardcoded
+min_y, max_y = 0.0, 1.0
+max_steps = 0
+exclude_patterns = []
+# def label_parser(label, figure_id, label_parser_dict=None):
+#
+#     label = label.split("_env_")[0].split("SAI_")[1]
+#
+#     # # Pointing
+#     # label=label.replace("Pointing_CS_", "")
+#
+#     # Feedback
+#     label=label.replace("LangFeedback_CS_", "")
+#
+#
+#     # label=label.replace("CS_PPO", "7COL_PPO")
+#     # label=label.replace("CS_3C_PPO", "3COL_PPO")
+#     # label=label.replace("CS_5C_PPO", "5COL_PPO")
+#
+#     # label=label.replace("CS_PPO", "Eye_contact_PPO")
+#     # label=label.replace("CS_F_ASK_PPO", "Ask_PPO")
+#     # label=label.replace("CS_F_NO_PPO", "NO_PPO")
+#     # label=label.replace("CS_F_ASK_EYE_PPO", "Ask_Eye_contact_PPO")
+#     #
+#     # label=label.replace("PPO_no", "PPO_no_bonus")
+#     # label=label.replace("PPO_NO", "PPO_no_bonus")
+#
+#     if label_parser_dict:
+#         if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
+#             if label in label_parser_dict:
+#                 # see if there is an exact match
+#                 return label_parser_dict[label]
+#             else:
+#                 print("ERROR multiple curves match a lable and there is no exact match for {}".format(label))
+#                 exit()
+#
+#         for k, v in label_parser_dict.items():
+#             if k in label: return v
+#
+#     else:
+#         # return label.split("_env_")[1]
+#         if figure_id not in [1, 2, 3, 4]:
+#             return label
+#         else:
+#             # default
+#             pass
+#
+#     return label
+for metric_i, metric in enumerate(metrics):
+    min_y, max_y = 0.0, 1.0
+    default_colors = default_colors_.copy()
+    for model_i, m_id in enumerate(models_saves.keys()):
+        #excluding some experiments
+        if any([ex_pat in m_id for ex_pat in exclude_patterns]):
+            continue
+        if len(include_patterns) > 0:
+            if not any([in_pat in m_id for in_pat in include_patterns]):
+                continue
+        runs_data = models_saves[m_id]['data']
+        ys = []
+        if runs_data[0]['frames'][1] == 'frames':
+            runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames']))
+        ###########################################
+        if per_seed:
+            min_len = None
+        else:
+            # determine minimal run length across seeds
+            lens = [len(run['frames']) for run in runs_data if len(run['frames'])]
+            minimum = sorted(lens)[-min(top_n, len(lens))]
+            min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum])
+            # keep only top k
+            runs_data = [run for run in runs_data if len(run['frames']) >= minimum]
+            # min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10])
+        # compute env steps (x axis)
+        longest_id = np.argmax([len(rd['frames']) for rd in runs_data])
+        steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / steps_denom
+        steps = steps[:min_len]
+        for run in runs_data:
+            if metric not in run:
+                # succes_rate_mean <==> bin_extrinsic_return_mean
+                if metric == 'success_rate_mean':
+                    metric_ = "bin_extrinsic_return_mean"
+                    if metric_ not in run:
+                        raise ValueError("Neither {} or {} is present: {} Possible metrics: {}. ".format(metric, metric_, list(run.keys())))
+                    data = run[metric_]
+                else:
+                    raise ValueError("Unknown metric: {} Possible metrics: {}. ".format(metric, list(run.keys())))
+            else:
+                data = run[metric]
+            if data[1] == metric:
+                data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16)
+            ###########################################
+            if per_seed:
+                ys.append(data)
+            else:
+                if len(data) >= min_len:
+                    if len(data) > min_len:
+                        print("run has too many {} datapoints ({}). Discarding {}".format(m_id, len(data),
+                                                                                          len(data)-min_len))
+                        data = data[0:min_len]
+                    ys.append(data)
+                else:
+                    raise ValueError("How can data be < min_len if it was capped above")
+        ys_same_len = ys
+        # computes stats
+        n_seeds = len(ys_same_len)
+        if per_seed:
+            sems = np.array(ys_same_len)
+            stds = np.array(ys_same_len)
+            means = np.array(ys_same_len)
+            color = default_colors[model_i]
+        else:
+            sems = np.std(ys_same_len, axis=0)/np.sqrt(len(ys_same_len))  # sem
+            stds = np.std(ys_same_len, axis=0)  # std
+            means = np.mean(ys_same_len, axis=0)
+            color = default_colors[model_i]
+        # per-metric adjustments
+        ylabel = metric
+        ylabel = {
+           "success_rate_mean" : "Success rate",
+            "exploration_bonus_mean": "Exploration bonus",
+            "NPC_intro": "Successful introduction (%)",
+        }.get(ylabel, ylabel)
+        if metric == 'duration':
+            ylabel = "time (hours)"
+            means = means / 3600
+            sems = sems / 3600
+            stds = stds / 3600
+        if per_seed:
+            #plot x y bounds
+            curr_max_y = np.max(np.max(means))
+            curr_min_y = np.min(np.min(means))
+            curr_max_steps = np.max(np.max(steps))
+        else:
+            # plot x y bounds
+            curr_max_y = np.max(means+stds)
+            curr_min_y = np.min(means-stds)
+            curr_max_steps = np.max(steps)
+        if curr_max_y > max_y:
+            max_y = curr_max_y
+        if curr_min_y < min_y:
+            min_y = curr_min_y
+        if curr_max_steps > max_steps:
+            max_steps = curr_max_steps
+        if subsample_step:
+            steps = steps[0::subsample_step]
+            means = means[0::subsample_step]
+            stds = stds[0::subsample_step]
+            sems = sems[0::subsample_step]
+            ys_same_len = [y[0::subsample_step] for y in ys_same_len]
+        # display seeds separtely
+        if per_seed:
+            for s_i, seed_ys in enumerate(ys_same_len):
+                seed_c = default_colors[model_i+s_i]
+                # label = m_id#+"(s:{})".format(s_i)
+                label = str(s_i)
+                seed_ys = smooth(seed_ys, smooth_factor)
+                plot_with_shade_seed(0, ax[metric_i], steps, seed_ys, None, seed_c, seed_c, label,
+                                     legend=draw_legend, xlim=[0, max_steps], ylim=[min_y, max_y],
+                                     leg_size=leg_size, xlabel=f"Env steps (1e6)", ylabel=ylabel, linewidth=linewidth,
+                                     labelsize=fontsize,
+                                     # fontsize=fontsize,
+                                     )
+                summary_dict[s_i] = seed_ys[-1]
+                summary_dict_colors[s_i] = seed_c
+        else:
+            label = label_parser(m_id, load_pattern, label_parser_dict=label_parser_dict)
+            if color_dict:
+                color = color_dict[label]
+            else:
+                color = default_colors[model_i]
+            label = label+"({})".format(n_seeds)
+            if smooth_factor:
+                means = smooth(means, smooth_factor)
+                stds = smooth(stds, smooth_factor)
+            x_lim = max(steps[-1], x_lim)
+            x_lim = min(max_x_lim, x_lim)
+            leg_args = {
+                'fontsize': legend_fontsize
+            }
+            plot_with_shade_grg(
+                0, ax[metric_i], steps, means, stds, color, color, label,
+                legend=draw_legend and metric_i == 0,
+                xlim=[0, x_lim],
+                ylim=[0, max_y],
+                xlabel=f"Env steps (1e6)",
+                ylabel=ylabel,
+                title=None,
+                labelsize=fontsize*train_inc_font,
+                fontsize=fontsize*train_inc_font,
+                title_fontsize=title_fontsize,
+                linewidth=linewidth,
+                leg_linewidth=5,
+                leg_args=leg_args,
+                xnbins=xnbins,
+                ynbins=ynbins,
+            )
+            summary_dict[label] = means[-1]
+            summary_dict_colors[label] = color
+    if len(summary_dict) == 0:
+        raise ValueError(f"No experiments found for {load_pattern}.")
+    # print summary
+    best = max(summary_dict.values())
+    pc = 0.3
+    n = int(len(summary_dict)*pc)
+    print("top n: ", n)
+    top_pc = sorted(summary_dict.values())[-n:]
+    bottom_pc = sorted(summary_dict.values())[:n]
+    print("legend:")
+    cprint("\tbest", "green")
+    cprint("\ttop {} %".format(pc), "blue")
+    cprint("\tbottom {} %".format(pc), "red")
+    print("\tothers")
+    print()
+    for l, p in sorted(summary_dict.items(), key=lambda kv: kv[1]):
+        c = summary_dict_colors[l]
+        if p == best:
+            cprint("label: {} ({})".format(l, c), "green")
+            cprint("\t {}:{}".format(metric, p), "green")
+        elif p in top_pc:
+            cprint("label: {} ({})".format(l, c), "blue")
+            cprint("\t {}:{}".format(metric, p), "blue")
+        elif p in bottom_pc:
+            cprint("label: {} ({})".format(l, c), "red")
+            cprint("\t {}:{}".format(metric, p), "red")
+        else:
+            print("label: {} ({})".format(l, c))
+            print("\t {}:{}".format(metric, p))
+    for label, (mean, std, color) in static_lines.items():
+        plot_with_shade_grg(
+            0, ax[metric_i], steps, np.array([mean]*len(steps)), np.array([std]*len(steps)), color, color, label,
+            legend=True,
+            xlim=[0, x_lim],
+            ylim=[0, 1.0],
+            xlabel=f"Env steps (1e6)",
+            ylabel=ylabel,
+            linestyle=":",
+            leg_args=leg_args,
+            fontsize=fontsize,
+            title_fontsize=title_fontsize,
+            xnbins=xnbins,
+            ynbins=ynbins,
+        )
+# plt.tight_layout()
+# f.savefig('graphics/{}_{}_results.svg'.format(str(figure_id, metric)))
+# f.savefig('graphics/{}_{}_results.png'.format(str(figure_id, metric)))
+cprint("Ignore pattern: {}".format(ignore_patterns), "blue")
+if plot_train:
+    plt.tight_layout()
+    # plt.subplots_adjust(hspace=1.5, wspace=0.5, left=0.1, right=0.9, bottom=0.1, top=0.85)
+    plt.subplots_adjust(hspace=1.5, wspace=0.5, left=0.1, right=0.9, bottom=0.1, top=0.85)
+    plt.suptitle(super_title)
+    plt.show()
+plt.close()
+curr_max_y = 0
+x_lim = 0
+max_y = -np.inf
+min_y = np.inf
+# hardcoded
+min_y, max_y = 0.0, 1.0
+grid = True
+draw_eval_legend = True
+if study_eval:
+    print("Evaluation")
+    # evaluation sets
+    number_of_eval_envs = max(list([len(v.keys()) for v in model_eval_data.values()]))
+    if plot_aggregated_test:
+        number_of_eval_envs += 1
+    if number_of_eval_envs == 0:
+        print("No eval envs")
+        exit()
+    if plot_only_aggregated_test:
+        f, ax = plt.subplots(1, 1, figsize=(9.0, 9.0))
+    else:
+        if grid:
+            # grid
+            subplot_y = math.ceil(math.sqrt(number_of_eval_envs))
+            subplot_x = math.ceil(number_of_eval_envs / subplot_y)
+            # from IPython import embed; embed()
+            while subplot_x % 1 != 0:
+                subplot_y -= 1
+                subplot_x = number_of_eval_envs / subplot_y
+            if subplot_x == 1:
+                subplot_y = math.ceil(math.sqrt(number_of_eval_envs))
+                subplot_x = math.floor(math.sqrt(number_of_eval_envs))
+            subplot_y = int(subplot_y)
+            subplot_x = int(subplot_x)
+            assert subplot_y * subplot_x >= number_of_eval_envs
+            f, ax_ = plt.subplots(subplot_y, subplot_x, figsize=(6.0, 6.0), sharey=False)  #, sharex=True, sharey=True)
+            if subplot_y != 1:
+                ax = list(chain.from_iterable(ax_))
+            else:
+                ax=ax_
+        else:
+            # flat
+            f, ax = plt.subplots(1, number_of_eval_envs, figsize=(15.0, 9.0)) #), sharey=True, sharex=True)
+    if number_of_eval_envs == 1:
+        ax = [ax]
+    default_colors = default_colors_.copy()
+    test_summary_dict = defaultdict(dict)
+    test_summary_dict_colors = defaultdict(dict)
+    for model_i, m_id in enumerate(model_eval_data.keys()):
+        # excluding some experiments
+        if any([ex_pat in m_id for ex_pat in exclude_patterns]):
+            continue
+        if len(include_patterns) > 0:
+            if not any([in_pat in m_id for in_pat in include_patterns]):
+                continue
+        # computes stats
+        if sort_test:
+            test_envs_sorted = enumerate(sorted(model_eval_data[m_id].items(), key=lambda kv: sort_test_set(kv[0])))
+        else:
+            test_envs_sorted = enumerate(model_eval_data[m_id].items())
+        if plot_aggregated_test:
+            agg_means = []
+        for env_i, (test_env, env_data) in  test_envs_sorted:
+            ys_same_len = env_data["values"]
+            steps = env_data["steps"].mean(0) / steps_denom
+            n_seeds = len(ys_same_len)
+            if per_seed:
+                sems = np.array(ys_same_len)
+                stds = np.array(ys_same_len)
+                means = np.array(ys_same_len)
+                color = default_colors[model_i]
+            else:
+                sems = np.std(ys_same_len, axis=0) / np.sqrt(len(ys_same_len))  # sem
+                stds = np.std(ys_same_len, axis=0)  # std
+                means = np.mean(ys_same_len, axis=0)
+                color = default_colors[model_i]
+            # per-metric adjusments
+            if per_seed:
+                # plot x y bounds
+                curr_max_y = np.max(np.max(means))
+                curr_min_y = np.min(np.min(means))
+                curr_max_steps = np.max(np.max(steps))
+            else:
+                # plot x y bounds
+                curr_max_y = np.max(means + stds)
+                curr_min_y = np.min(means - stds)
+                curr_max_steps = np.max(steps)
+            if plot_aggregated_test:
+                agg_means.append(means)
+            if curr_max_y > max_y:
+                max_y = curr_max_y
+            if curr_min_y < min_y:
+                min_y = curr_min_y
+            x_lim = max(steps[-1], x_lim)
+            x_lim = min(max_x_lim, x_lim)
+            eval_metric_name = {
+                "test_success_rates": "Success rate",
+                'exploration_bonus_mean': "Exploration bonus",
+            }.get(eval_metric, eval_metric)
+            test_env_name = test_env.replace("Env", "").replace("Test", "")
+            env_types = ["InformationSeeking", "Collaboration", "PerspectiveTaking"]
+            for env_type in env_types:
+                if env_type in test_env_name:
+                    test_env_name = test_env_name.replace(env_type, "")
+                    test_env_name += f"\n({env_type})"
+            if grid:
+                ylabel = eval_metric_name
+                title = test_env_name
+            else:
+                # flat
+                ylabel = test_env_name
+                title = eval_metric_name
+            leg_args = {
+                'fontsize': legend_fontsize // 1
+            }
+            if per_seed:
+                for s_i, seed_ys in enumerate(ys_same_len):
+                    seed_c = default_colors[model_i + s_i]
+                    # label = m_id#+"(s:{})".format(s_i)
+                    label = str(s_i)
+                    if not plot_only_aggregated_test:
+                        seed_ys = smooth(seed_ys, eval_smooth_factor)
+                        plot_with_shade_seed(0, ax[env_i], steps, seed_ys, None, seed_c, seed_c, label,
+                                             legend=draw_eval_legend, xlim=[0, x_lim], ylim=[min_y, max_y],
+                                             leg_size=leg_size, xlabel=f"Steps (1e6)", ylabel=ylabel, linewidth=linewidth, title=title)
+                    test_summary_dict[s_i][test_env] = seed_ys[-1]
+                    test_summary_dict_colors[s_i] = seed_c
+            else:
+                label = label_parser(m_id, load_pattern, label_parser_dict=label_parser_dict)
+                if not plot_only_aggregated_test:
+                    if color_dict:
+                        color = color_dict[label]
+                    else:
+                        color = default_colors[model_i]
+                    label = label + "({})".format(n_seeds)
+                    if smooth_factor:
+                        means = smooth(means, eval_smooth_factor)
+                        stds = smooth(stds, eval_smooth_factor)
+                    plot_with_shade_grg(
+                        0, ax[env_i], steps, means, stds, color, color, label,
+                        legend=draw_eval_legend,
+                        xlim=[0, x_lim+1],
+                        ylim=[0, max_y],
+                        xlabel=f"Env steps (1e6)" if env_i // (subplot_x) == subplot_y -1 else None,  # only last line
+                        ylabel=ylabel if env_i % subplot_x == 0 else None,  # only first row
+                        title=title,
+                        title_fontsize=title_fontsize,
+                        labelsize=fontsize,
+                        fontsize=fontsize,
+                        linewidth=linewidth,
+                        leg_linewidth=5,
+                        leg_args=leg_args,
+                        xnbins=xnbins,
+                        ynbins=ynbins,
+                    )
+                test_summary_dict[label][test_env] = means[-1]
+                test_summary_dict_colors[label] = color
+        if plot_aggregated_test:
+            if plot_only_aggregated_test:
+                agg_env_i = 0
+            else:
+                agg_env_i = number_of_eval_envs - 1 # last one
+            agg_means = np.array(agg_means)
+            agg_mean = agg_means.mean(axis=0)
+            agg_std = agg_means.std(axis=0)  # std
+            if smooth_factor and not per_seed:
+                agg_mean = smooth(agg_mean, eval_smooth_factor)
+                agg_std = smooth(agg_std, eval_smooth_factor)
+            if color_dict:
+                color = color_dict[re.sub("\([0-9]\)", '', label)]
+            else:
+                color = default_colors[model_i]
+            if per_seed:
+                print("Not smooth aggregated because of per seed")
+                for s_i, (seed_ys, seed_st) in enumerate(zip(agg_mean, agg_std)):
+                    seed_c = default_colors[model_i + s_i]
+                    # label = m_id#+"(s:{})".format(s_i)
+                    label = str(s_i)
+                    # seed_ys = smooth(seed_ys, eval_smooth_factor)
+                    plot_with_shade_seed(0,
+                                         ax if plot_only_aggregated_test else ax[agg_env_i],
+                                         steps, seed_ys, seed_st, seed_c, seed_c, label,
+                                         legend=draw_eval_legend, xlim=[0, x_lim], ylim=[min_y, max_y],
+                                         labelsize=fontsize,
+                                         filename=eval_filename,
+                                         leg_size=leg_size, xlabel=f"Steps (1e6)", ylabel=ylabel, linewidth=1, title=agg_title)
+            else:
+                #   just used for creating a dummy Imitation test figure -> delete
+                # agg_mean = agg_mean * 0.1
+                # agg_std = agg_std * 0.1
+                # max_y = 1
+                plot_with_shade_grg(
+                    0,
+                    ax if plot_only_aggregated_test else ax[agg_env_i],
+                    steps, agg_mean, agg_std, color, color, label,
+                    legend=draw_eval_legend,
+                    xlim=[0, x_lim + 1],
+                    ylim=[0, max_y],
+                    xlabel=f"Steps (1e6)" if plot_only_aggregated_test or (agg_env_i // (subplot_x) == subplot_y - 1) else None,  # only last line
+                    ylabel=ylabel if plot_only_aggregated_test or (agg_env_i % subplot_x == 0) else None,  # only first row
+                    title_fontsize=title_fontsize,
+                    title=agg_title,
+                    labelsize=fontsize,
+                    fontsize=fontsize,
+                    linewidth=linewidth,
+                    leg_linewidth=5,
+                    leg_args=leg_args,
+                    xnbins=xnbins,
+                    ynbins=ynbins,
+                    filename=eval_filename,
+                )
+    # print summary
+    means_dict = {
+        lab: np.array(list(lab_sd.values())).mean() for lab, lab_sd in test_summary_dict.items()
+    }
+    best = max(means_dict.values())
+    pc = 0.3
+    n = int(len(means_dict) * pc)
+    print("top n: ", n)
+    top_pc = sorted(means_dict.values())[-n:]
+    bottom_pc = sorted(means_dict.values())[:n]
+    print("Legend:")
+    cprint("\tbest", "green")
+    cprint("\ttop {} %".format(pc), "blue")
+    cprint("\tbottom {} %".format(pc), "red")
+    print("\tothers")
+    print()
+    for l, l_mean in sorted(means_dict.items(), key=lambda kv: kv[1]):
+        l_summary_dict = test_summary_dict[l]
+        c = test_summary_dict_colors[l]
+        print("label: {} ({})".format(l, c))
+        #print("\t{}({}) - Mean".format(l_mean, metric))
+        if l_mean == best:
+            cprint("\t{}({}) - Mean".format(l_mean, eval_metric), "green")
+        elif l_mean in top_pc:
+            cprint("\t{}({}) - Mean".format(l_mean, eval_metric), "blue")
+        elif l_mean in bottom_pc:
+            cprint("\t{}({}) - Mean".format(l_mean, eval_metric), "red")
+        else:
+            print("\t{}({})".format(l_mean, eval_metric))
+        n_over_50 = 0
+        if sort_test:
+            sorted_envs = sorted(l_summary_dict.items(), key=lambda kv: sort_test_set(env_name=kv[0]))
+        else:
+            sorted_envs = l_summary_dict.items()
+        for tenv, p in sorted_envs:
+            if p < 0.5:
+                print("\t{:4f}({}) - \t{}".format(p, eval_metric, tenv))
+            else:
+                print("\t{:4f}({}) -*\t{}".format(p, eval_metric, tenv))
+                n_over_50 += 1
+        print("\tenv over 50 - {}/{}".format(n_over_50, len(l_summary_dict)))
+    if plot_test:
+        plt.tight_layout()
+        # plt.subplots_adjust(hspace=0.8, wspace=0.15, left=0.035, right=0.99, bottom=0.065, top=0.93)
+        plt.show()
+    if eval_filename is not None:
+        plt.subplots_adjust(hspace=0.8, wspace=0.15, left=0.15, right=0.99, bottom=0.15, top=0.93)
+        res= input(f"Save to {eval_filename} (y/n)?")
+        if res == "y":
+            f.savefig(eval_filename)
+            print(f'saved to {eval_filename}')
+        else:
+            print('not saved')

data_analysis_neurips.py ADDED Viewed

	@@ -0,0 +1,570 @@

+#!/usr/bin/env python
+import seaborn
+import numpy as np
+import os
+from collections import OrderedDict
+import pandas as pd
+import matplotlib.pyplot as plt
+import sys
+from termcolor import cprint
+# Load data
+# Global vars for tracking and labeling data at load time.
+exp_idx = 0
+label_parser_dict = None
+smooth_factor = 10
+leg_size = 30
+subsample_step = 1
+load_subsample_step = 50
+default_colors = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
+                  "cyan", "pink",'purple', u'#1f77b4',
+                  "darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
+                  'deeppink','silver','khaki','goldenrod','y','y','y','y','y','y','y','y','y','y','y','y' ]  + ['y']*50
+def get_all_runs(logdir, load_subsample_step=1):
+    """
+    Recursively look through logdir for output files produced by
+    Assumes that any file "progress.txt" is a valid hit.
+    """
+    global exp_idx
+    global units
+    datasets = []
+    for root, _, files in os.walk(logdir):
+        if 'log.csv' in files:
+            run_name = root[8:]
+            exp_name = None
+            # try to load a config file containing hyperparameters
+            config = None
+            try:
+                config_path = open(os.path.join(root,'config.json'))
+                config = json.load(config_path)
+                if 'exp_name' in config:
+                    exp_name = config['exp_name']
+            except:
+                print('No file named config.json')
+            exp_idx += 1
+            # load progress data
+            try:
+                print(os.path.join(root,'log.csv'))
+                exp_data = pd.read_csv(os.path.join(root,'log.csv'))
+            except:
+                raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))
+            exp_data = exp_data[::load_subsample_step]
+            data_dict = exp_data.to_dict("list")
+            data_dict['config'] = config
+            nb_epochs = len(data_dict['frames'])
+            print('{} -> {}'.format(run_name, nb_epochs))
+            datasets.append(data_dict)
+    return datasets
+def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_pattern="ignore"):
+    _, models_list, _ = next(os.walk(rootdir))
+    print(models_list)
+    for dir_name in models_list.copy():
+        # add "ignore" in a directory name to avoid loading its content
+        if ignore_pattern in dir_name or load_only not in dir_name:
+            models_list.remove(dir_name)
+    for expe_name in list(labels.keys()):
+        if expe_name not in models_list:
+            del labels[expe_name]
+    # setting per-model type colors
+    for i,m_name in enumerate(models_list):
+        for m_type, m_color in per_model_colors.items():
+            if m_type in m_name:
+                colors[m_name] = m_color
+        print("extracting data for {}...".format(m_name))
+        m_id = m_name
+        models_saves[m_id] = OrderedDict()
+        models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step)
+        print("done")
+        if m_name not in labels:
+            labels[m_name] = m_name
+    """
+    retrieve all experiences located in "data to vizu" folder
+    """
+labels = OrderedDict()
+per_model_colors = OrderedDict()
+# per_model_colors = OrderedDict([('ALP-GMM',u'#1f77b4'),
+#                                 ('hmn','pink'),
+#                                 ('ADR','black')])
+# LOAD DATA
+models_saves = OrderedDict()
+colors = OrderedDict()
+static_lines = {}
+# get_datasets("storage/",load_only="RERUN_WizardGuide")
+# get_datasets("storage/",load_only="RERUN_WizardTwoGuides")
+try:
+    figure_id = eval(sys.argv[1])
+except:
+    figure_id = sys.argv[1]
+print("fig:", figure_id)
+if figure_id == 0:
+    # train change
+    env_type = "No_NPC_environment"
+    fig_type = "train"
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_deaf_no_explo", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_no_explo", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_curr_dial", load_subsample_step=load_subsample_step)
+    top_n = 16
+elif figure_id == 1:
+    # arch change
+    env_type = "No_NPC_environment"
+    fig_type = "arch"
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_bow", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_no_mem", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_bigru", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardGuide_lang64_attgru", load_subsample_step=load_subsample_step)
+    top_n = 16
+elif figure_id == 2:
+    # train change FULL
+    env_type = "FULL_environment"
+    fig_type = "train"
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_deaf_no_explo", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_explo", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_curr_dial", load_subsample_step=load_subsample_step)
+    top_n = 16
+elif figure_id == 3:
+    # arch change FULL
+    env_type = "FULL_environment"
+    fig_type = "arch"
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bow", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_mem", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bigru", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_attgru", load_subsample_step=load_subsample_step)
+    top_n = 16
+elif str(figure_id) == "ShowMe":
+    get_datasets("storage/", "20-05_NeurIPS_ShowMe_ABL_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3")
+    get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_ABL", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "20-05_NeurIPS_ShowMe_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3")
+    get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_env", load_subsample_step=load_subsample_step)
+    label_parser_dict = {
+        "20-05_NeurIPS_ShowMe_ABL_CEB" : "ShowMe_exp_bonus_no_social_skills_required",
+        "20-05_NeurIPS_ShowMe_NO_BONUS_ABL" : "ShowMe_no_bonus_no_social_skills_required",
+        "20-05_NeurIPS_ShowMe_CEB" : "ShowMe_exp_bonus",
+        "20-05_NeurIPS_ShowMe_NO_BONUS_env" : "ShowMe_no_bonus",
+    }
+    env_type = str(figure_id)
+    fig_type = "test"
+    top_n = 16
+elif str(figure_id) == "Help":
+    # env_type = "Bobo"
+    # get_datasets("storage/", "Bobo")
+    get_datasets("storage/", "24-05_NeurIPS_Help", load_subsample_step=load_subsample_step, ignore_pattern="ABL")
+    # get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step)
+    label_parser_dict = {
+        "Help_NO_BONUS_env": "PPO",
+        "Help_BONUS_env": "PPO+Explo",
+        # "Help_NO_BONUS_ABL_env": "ExiterRole_no_bonus_no_NPC",
+        # "Help_BONUS_ABL_env": "ExiterRole_bonus_no_NPC",
+        "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env": "Unsocial PPO",
+        # "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL": "ExiterRole_Insocial_ABL"
+    }
+    static_lines = {
+        "PPO (helper)": (0.12, 0.05, "#1f77b4"),
+        "PPO+Explo (helper)": (0.11, 0.04, "indianred"),
+        # "Help_exp_bonus": (0.11525, 0.04916 , default_colors[2]),
+        # "HelperRole_ABL_no_exp_bonus": (0.022375, 0.01848, default_colors[3]),
+        "Unsocial PPO (helper)": (0.15, 0.06, "grey"),
+        # "HelperRole_ABL_Insocial": (0.01775, 0.010544, default_colors[4]),
+    }
+    env_type = str(figure_id)
+    fig_type = "test"
+    top_n = 16
+elif str(figure_id) == "TalkItOut":
+    print("You mean Polite")
+    exit()
+elif str(figure_id) == "TalkItOutPolite":
+    # env_type = "TalkItOut"
+    # get_datasets("storage/", "ORIENT_env_MiniGrid-TalkItOut")
+    # env_type = "GuideThief"
+    # get_datasets("storage/", "GuideThief")
+    # env_type = "Bobo"
+    # get_datasets("storage/", "Bobo")
+    get_datasets("storage/", "20-05_NeurIPS_TalkItOutPolite", load_subsample_step=load_subsample_step)
+    # get_datasets("storage/", "21-05_NeurIPS_small_bonus_TalkItOutPolite")
+    get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar", load_subsample_step=load_subsample_step)
+    label_parser_dict = {
+        "TalkItOutPolite_NO_BONUS_env": "PPO",
+        "TalkItOutPolite_e": "PPO+Explo",
+        "TalkItOutPolite_NO_BONUS_NoLiar": "PPO (no liar)",
+        "TalkItOutPolite_NoLiar_e": "PPO+Explo (no liar)",
+        "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env": "Unsocial PPO",
+        "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar": "Unsocial PPO (no liar)",
+    }
+    env_type = str(figure_id)
+    fig_type = "test"
+    top_n = 16
+elif str(figure_id) == "DiverseExit":
+    get_datasets("storage/", "24-05_NeurIPS_DiverseExit", load_subsample_step=load_subsample_step)
+    get_datasets("storage/", "26-05_NeurIPS_gpu_DiverseExit", load_subsample_step=load_subsample_step)
+    label_parser_dict = {
+        "DiverseExit_NO_BONUS": "No_bonus",
+        "DiverseExit_BONUS": "BOnus",
+        "gpu_DiverseExit_NoSocial": "No_social",
+    }
+    env_type = str(figure_id)
+    fig_type = "test"
+    top_n = 16
+else:
+    get_datasets("storage/", str(figure_id), load_subsample_step=load_subsample_step)
+    env_type = str(figure_id)
+    fig_type = "test"
+    top_n = 8
+#### get_datasets("storage/", "RERUN_WizardGuide_lang64_nameless")
+#### get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_nameless")
+if per_model_colors:  # order runs for legend order as in per_models_colors, with corresponding colors
+    ordered_labels = OrderedDict()
+    for teacher_type in per_model_colors.keys():
+        for k,v in labels.items():
+            if teacher_type in k:
+                ordered_labels[k] = v
+    labels = ordered_labels
+else:
+    print('not using per_model_color')
+    for k in models_saves.keys():
+        labels[k] = k
+def plot_with_shade(subplot_nb, ax,x,y,err,color,shade_color,label,
+                  y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
+                  ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20,
+                   zorder=None, xlabel='perf',ylabel='env steps'):
+    #plt.rcParams.update({'font.size': 15})
+    ax.locator_params(axis='x', nbins=4)
+    ax.locator_params(axis='y', nbins=3)
+    ax.tick_params(axis='both', which='major', labelsize=ticksize)
+    ax.plot(x,y, color=color, label=label,linewidth=linewidth,zorder=zorder)
+    ax.fill_between(x,y-err,y+err,color=shade_color,alpha=0.2)
+    if legend:
+        leg = ax.legend(loc=leg_loc, **leg_args) #34
+        for legobj in leg.legendHandles:
+            legobj.set_linewidth(leg_linewidth)
+    ax.set_xlabel(xlabel, fontsize=30)
+    if subplot_nb == 0:
+        ax.set_ylabel(ylabel, fontsize=30,labelpad=-4)
+    ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
+    ax.set_ylim(bottom=ylim[0],top=ylim[1])
+    if title:
+        ax.set_title(title, fontsize=22)
+# Plot utils
+def plot_with_shade_grg(subplot_nb, ax,x,y,err,color,shade_color,label,
+                  y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
+                  ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20,
+                   zorder=None, xlabel='perf',ylabel='env steps', linestyle="-"):
+    #plt.rcParams.update({'font.size': 15})
+    ax.locator_params(axis='x', nbins=4)
+    ax.locator_params(axis='y', nbins=3)
+    ax.tick_params(axis='both', which='major', labelsize=ticksize)
+    ax.plot(x, y, color=color, label=label,linewidth=linewidth,zorder=zorder, linestyle=linestyle)
+    ax.fill_between(x, y-err, y+err,color=shade_color,alpha=0.2)
+    if legend:
+        leg = ax.legend(loc=leg_loc, **leg_args) #34
+        for legobj in leg.legendHandles:
+            legobj.set_linewidth(leg_linewidth)
+    ax.set_xlabel(xlabel, fontsize=30)
+    if subplot_nb == 0:
+        ax.set_ylabel(ylabel, fontsize=30, labelpad=-4)
+    ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
+    ax.set_ylim(bottom=ylim[0],top=ylim[1])
+    if title:
+        ax.set_title(title, fontsize=22)
+# Metric plot
+metric = 'bin_extrinsic_return_mean'
+# metric = 'mission_string_observed_mean'
+# metric = 'extrinsic_return_mean'
+# metric = 'extrinsic_return_max'
+# metric = "rreturn_mean"
+# metric = 'rreturn_max'
+# metric = 'FPS'
+f, ax = plt.subplots(1, 1, figsize=(10.0, 6.0))
+ax = [ax]
+max_y = -np.inf
+min_y = np.inf
+# hardcoded
+min_y, max_y = 0.0, 1.0
+max_steps = 0
+exclude_patterns = []
+include_patterns = []
+def label_parser(label, figure_id, label_parser_dict=None):
+    if label_parser_dict:
+        if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
+            if label in label_parser_dict:
+                # see if there is an exact match
+                return label_parser_dict[label]
+            else:
+                print("ERROR multiple curves match a lable and there is no exact match")
+                print(label)
+                exit()
+        for k, v in label_parser_dict.items():
+            if k in label: return v
+    else:
+        # return label.split("_env_")[1]
+        if figure_id not in [1,2,3,4]:
+            return label
+        else:
+            label_parser_dict = {
+                "RERUN_WizardGuide_lang64_no_explo": "MH-BabyAI",
+                "RERUN_WizardTwoGuides_lang64_no_explo": "MH-BabyAI",
+                "RERUN_WizardGuide_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus",
+                "RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus",
+                "RERUN_WizardGuide_lang64_deaf_no_explo": "Deaf-MH-BabyAI",
+                "RERUN_WizardTwoGuides_lang64_deaf_no_explo": "Deaf-MH-BabyAI",
+                "RERUN_WizardGuide_lang64_bow": "MH-BabyAI-ExpBonus-BOW",
+                "RERUN_WizardTwoGuides_lang64_bow": "MH-BabyAI-ExpBonus-BOW",
+                "RERUN_WizardGuide_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem",
+                "RERUN_WizardTwoGuides_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem",
+                "RERUN_WizardGuide_lang64_bigru": "MH-BabyAI-ExpBonus-bigru",
+                "RERUN_WizardTwoGuides_lang64_bigru": "MH-BabyAI-ExpBonus-bigru",
+                "RERUN_WizardGuide_lang64_attgru": "MH-BabyAI-ExpBonus-attgru",
+                "RERUN_WizardTwoGuides_lang64_attgru": "MH-BabyAI-ExpBonus-attgru",
+                "RERUN_WizardGuide_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue",
+                "RERUN_WizardTwoGuides_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue",
+                "RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_100M": "MH-BabyAI-ExpBonus-100M"
+            }
+            if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
+                print("ERROR multiple curves match a lable")
+                print(label)
+                exit()
+            for k, v in label_parser_dict.items():
+                if k in label: return v
+    return label
+per_seed=False
+for i, m_id in enumerate(models_saves.keys()):
+    #excluding some experiments
+    if any([ex_pat in m_id for ex_pat in exclude_patterns]):
+        continue
+    if len(include_patterns) > 0:
+        if not any([in_pat in m_id for in_pat in include_patterns]):
+            continue
+    runs_data = models_saves[m_id]['data']
+    ys = []
+    # DIRTY FIX FOR FAULTY LOGGING
+    print("m_id:", m_id)
+    if runs_data[0]['frames'][1] == 'frames':
+        runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames']))
+    ###########################################
+    # determine minimal run length across seeds
+    minimum = sorted([len(run['frames']) for run in runs_data if len(run['frames'])])[-top_n]
+    min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum])
+#     min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10])
+    print("min_len:", min_len)
+    #compute env steps (x axis)
+    longest_id = np.argmax([len(rd['frames']) for rd in runs_data])
+    steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / 1000000
+    steps = steps[:min_len]
+    for run in runs_data:
+        data = run[metric]
+        # DIRTY FIX FOR FAULTY LOGGING (headers in data)
+        if data[1] == metric:
+            data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16)
+        ###########################################
+        if len(data) >= min_len:
+            if len(data) > min_len:
+                print("run has too many {} datapoints ({}). Discarding {}".format(m_id, len(data),
+                                                                                  len(data)-min_len))
+                data = data[0:min_len]
+            ys.append(data)
+    ys_same_len = ys  # RUNS MUST HAVE SAME LEN
+    # computes stats
+    n_seeds = len(ys_same_len)
+    sems = np.std(ys_same_len,axis=0)/np.sqrt(len(ys_same_len)) # sem
+    stds = np.std(ys_same_len,axis=0) # std
+    means = np.mean(ys_same_len,axis=0)
+    color = default_colors[i]
+    # per-metric adjusments
+    ylabel=metric
+    if metric == 'bin_extrinsic_return_mean':
+        ylabel = "success rate"
+    if metric == 'duration':
+        ylabel = "time (hours)"
+        means = means / 3600
+        sems = sems / 3600
+        stds = stds / 3600
+    #plot x y bounds
+    curr_max_y = np.max(means)
+    curr_min_y = np.min(means)
+    curr_max_steps = np.max(steps)
+    if curr_max_y > max_y:
+        max_y = curr_max_y
+    if curr_min_y < min_y:
+        min_y = curr_min_y
+    if curr_max_steps > max_steps:
+        max_steps = curr_max_steps
+    if subsample_step:
+        steps = steps[0::subsample_step]
+        means = means[0::subsample_step]
+        stds = stds[0::subsample_step]
+        sems = sems[0::subsample_step]
+        ys_same_len = [y[0::subsample_step] for y in ys_same_len]
+    # display seeds separtely
+    if per_seed:
+        for s_i, seed_ys in enumerate(ys_same_len):
+            seed_c = default_colors[i+s_i]
+            label = m_id#+"(s:{})".format(s_i)
+            plot_with_shade(0, ax[0], steps, seed_ys, stds*0, seed_c, seed_c, label,
+                legend=False, xlim=[0, max_steps], ylim=[min_y, max_y],
+                        leg_size=leg_size, xlabel="env steps (millions)", ylabel=ylabel, smooth_factor=smooth_factor,
+                            )
+    else:
+        label = label_parser(m_id, figure_id, label_parser_dict=label_parser_dict)
+        label = label #+"({})".format(n_seeds)
+        def smooth(x_, n=50):
+            if type(x_) == list:
+                x_ = np.array(x_)
+            return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))])
+        if smooth_factor:
+            means = smooth(means,smooth_factor)
+            stds = smooth(stds,smooth_factor)
+        x_lim = 30
+        if figure_id == "TalkItOutPolite":
+            leg_args = {
+                'ncol': 1,
+                'columnspacing': 1.0,
+                'handlelength': 1.0,
+                'frameon': False,
+                # 'bbox_to_anchor': (0.00, 0.23, 0.10, .102),
+                'bbox_to_anchor': (0.55, 0.35, 0.10, .102),
+                'labelspacing': 0.2,
+                'fontsize': 27
+            }
+        elif figure_id == "Help":
+            leg_args = {
+                'ncol': 1,
+                'columnspacing': 1.0,
+                'handlelength': 1.0,
+                'frameon': False,
+                # 'bbox_to_anchor': (0.00, 0.23, 0.10, .102),
+                'bbox_to_anchor': (0.39, 0.20, 0.10, .102),
+                'labelspacing': 0.2,
+                'fontsize': 27
+            }
+        else:
+            leg_args = {}
+        color_code = dict([
+            ('PPO+Explo', 'indianred'),
+            ('PPO', "#1f77b4"),
+            ('Unsocial PPO', "grey"),
+            ('PPO (no liar)', "#043252"),
+            ('PPO+Explo (no liar)', "darkred"),
+            ('Unsocial PPO (no liar)', "black"),
+            ('PPO+Explo (helper)', 'indianred'),
+            ('PPO (helper)', "#1f77b4"),
+            ('Unsocial PPO (helper)', "grey")]
+        )
+        color = color_code.get(label, np.random.choice(default_colors))
+        print("C:",color)
+        plot_with_shade_grg(
+            0, ax[0], steps, means, stds, color, color, label,
+                    legend=True,
+                    xlim=[0, steps[-1] if not x_lim else x_lim],
+                    ylim=[0, 1.0], xlabel="env steps (millions)", ylabel=ylabel, title=None,
+                        leg_args =leg_args)
+        #
+        # plot_with_shade(0, ax[0], steps, means, stds, color, color,label,
+        #         legend=True, xlim=[0, max_steps], ylim=[min_y, max_y],
+        #                 leg_size=leg_size, xlabel="Env steps (millions)", ylabel=ylabel, linewidth=5.0, smooth_factor=smooth_factor)
+for label, (mean, std, color) in static_lines.items():
+    plot_with_shade_grg(
+        0, ax[0], steps, np.array([mean]*len(steps)), np.array([std]*len(steps)), color, color, label,
+                    legend=True,
+                    xlim=[0, max_steps],
+                    ylim=[0, 1.0],
+                    xlabel="env steps (millions)", ylabel=ylabel, linestyle=":",
+                    leg_args=leg_args)
+plt.tight_layout()
+f.savefig('graphics/{}_results.svg'.format(str(figure_id)))
+f.savefig('graphics/{}_results.png'.format(str(figure_id)))
+plt.show()

data_visualize.py ADDED Viewed

	@@ -0,0 +1,1436 @@

+#!/usr/bin/env python
+import re
+import itertools
+import math
+from itertools import chain
+import time
+# import seaborn
+import numpy as np
+import os
+from collections import OrderedDict, defaultdict
+import pandas as pd
+import matplotlib.pyplot as plt
+import sys
+from termcolor import cprint, colored
+from pathlib import Path
+import pickle
+from scipy import stats
+save = True
+show_plot = False
+metrics = [
+    'success_rate_mean',
+    # 'FPS',
+    # 'extrinsic_return_mean',
+    # 'exploration_bonus_mean',
+    # 'NPC_intro',
+    # 'curriculum_param_mean',
+    # 'curriculum_max_success_rate_mean',
+    # 'rreturn_mean'
+]
+eval_metric = "test_success_rates"
+# eval_metric = "exploration_bonus_mean"
+super_title = ""
+# super_title = "PPO - No exploration bonus"
+# super_title = "Count Based exploration bonus (Grid Search)"
+# super_title = "PPO + RND"
+# super_title = "PPO + RIDE"
+# statistical evaluation p-value
+test_p = 0.05
+agg_title = ""
+color_dict = None
+eval_filename = None
+max_frames = 20_000_000
+legend_show_n_seeds = False
+draw_legend = True
+per_seed = False
+study_train = False
+study_eval = True
+plot_test = True
+plot_aggregated_test = True
+plot_only_aggregated_test = True
+xnbins = 4
+ynbins = 3
+steps_denom = 1e6
+# Global vas for tracking and labeling data at load time.
+exp_idx = 0
+label_parser_dict = None
+label_parser = lambda l, _, label_parser_dict: l
+smooth_factor = 10 # used
+# smooth_factor = 0
+print("smooth factor:", smooth_factor)
+eval_smooth_factor = None
+leg_size = 30
+def smooth(x_, n=50):
+    if n is None:
+        return x_
+    if type(x_) == list:
+        x_ = np.array(x_)
+    return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))])
+sort_test = False
+def sort_test_set(env_name):
+    helps = [
+        "LanguageFeedback",
+        "LanguageColor",
+        "Pointing",
+        "Emulation",
+    ]
+    problems = [
+        "Boxes",
+        "Switches",
+        "Generators",
+        "Marble",
+        "Doors",
+        "Levers",
+    ]
+    env_names = []
+    for p in problems:
+        for h in helps:
+            env_names.append(h+p)
+    env_names.extend([
+        "LeverDoorColl",
+        "MarblePushColl",
+        "MarblePassColl",
+        "AppleStealing"
+    ])
+    for i, en in enumerate(env_names):
+        if en in env_name:
+            return i
+    raise ValueError(f"Test env {env_name} not known")
+subsample_step = 1
+load_subsample_step = 1
+x_lim = 0
+max_x_lim = np.inf
+summary_dict = {}
+summary_dict_colors = {}
+to_plot_dict = {}
+default_colors_ = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
+                  "cyan", "pink",'purple', u'#1f77b4',
+                  "darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
+                  'deeppink','silver','khaki','goldenrod'] * 100
+def get_eval_data(logdir, eval_metric):
+    eval_data = defaultdict(lambda :defaultdict(list))
+    for root, _, files in os.walk(logdir):
+        for file in files:
+            if 'testing_' in file:
+                assert ".pkl" in file
+                test_env_name = file.lstrip("testing_").rstrip(".pkl")
+                try:
+                    with open(root+"/"+file, "rb") as f:
+                        seed_eval_data = pickle.load(f)
+                except:
+                    print("Pickle not loaded: ", root+"/"+file)
+                    time.sleep(1)
+                    continue
+                eval_data[test_env_name]["values"].append(seed_eval_data[eval_metric])
+                eval_data[test_env_name]["steps"].append(seed_eval_data["test_step_nb"])
+    for test_env, seed_data in eval_data.items():
+        min_len_seed = min([len(s) for s in seed_data['steps']])
+        eval_data[test_env]["values"] = np.array([s[:min_len_seed] for s in eval_data[test_env]["values"]])
+        eval_data[test_env]["steps"] = np.array([s[:min_len_seed] for s in eval_data[test_env]["steps"]])
+    return eval_data
+def get_all_runs(logdir, load_subsample_step=1):
+    """
+    Recursively look through logdir for output files produced by
+    Assumes that any file "log.csv" is a valid hit.
+    """
+    global exp_idx
+    global units
+    datasets = []
+    for root, _, files in os.walk(logdir):
+        if 'log.csv' in files:
+            if (Path(root) / 'log.csv').stat().st_size == 0:
+                print("CSV {} empty".format(os.path.join(root, 'log.csv')))
+                continue
+            run_name = root[8:]
+            exp_name = None
+            config = None
+            exp_idx += 1
+            # load progress data
+            try:
+                exp_data = pd.read_csv(os.path.join(root, 'log.csv'))
+                print("Loaded:", os.path.join(root, 'log.csv'))
+            except:
+                raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))
+            exp_data = exp_data[::load_subsample_step]
+            data_dict = exp_data.to_dict("list")
+            data_dict['config'] = config
+            nb_epochs = len(data_dict['frames'])
+            if nb_epochs == 1:
+                print(f'{run_name} -> {colored(f"nb_epochs {nb_epochs}", "red")}')
+            else:
+                print('{} -> nb_epochs {}'.format(run_name, nb_epochs))
+            datasets.append(data_dict)
+    return datasets
+def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_patterns=("ignore"), require_patterns=()):
+    _, models_list, _ = next(os.walk(rootdir))
+    for dir_name in models_list.copy():
+        # add "ignore" in a directory name to avoid loading its content
+        for ignore_pattern in ignore_patterns:
+            if ignore_pattern in dir_name or load_only not in dir_name:
+                if dir_name in models_list:
+                    models_list.remove(dir_name)
+        if len(require_patterns) > 0:
+            if not any([require_pattern in dir_name for require_pattern in require_patterns]):
+                if dir_name in models_list:
+                    models_list.remove(dir_name)
+    for expe_name in list(labels.keys()):
+        if expe_name not in models_list:
+            del labels[expe_name]
+    # setting per-model type colors
+    for i, m_name in enumerate(models_list):
+        for m_type, m_color in per_model_colors.items():
+            if m_type in m_name:
+                colors[m_name] = m_color
+        print("extracting data for {}...".format(m_name))
+        m_id = m_name
+        models_saves[m_id] = OrderedDict()
+        models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step)
+        print("done")
+        if m_name not in labels:
+            labels[m_name] = m_name
+        model_eval_data[m_id] = get_eval_data(logdir=rootdir+m_name, eval_metric=eval_metric)
+    """
+    retrieve all experiences located in "data to vizu" folder
+    """
+labels = OrderedDict()
+per_model_colors = OrderedDict()
+# LOAD DATA
+models_saves = OrderedDict()
+colors = OrderedDict()
+model_eval_data = OrderedDict()
+static_lines = {}
+ignore_patterns = ["_ignore_"]
+to_compare = None
+load_pattern = sys.argv[1]
+test_envs_to_plot = None  # plot all
+min_y, max_y = 0.0, 1.1
+def label_parser(label):
+    label = label.replace("04-01_Pointing_CB_heldout_doors", "PPO_CB")
+    label = label.replace("19-01_Color_CB_heldout_doors", "PPO_CBL")
+    label = label.replace("19-01_Feedback_CB_heldout_doors_20M", "PPO_CBL")
+    label = label.replace("20-01_JA_Color_CB_heldout_doors", "JA_PPO_CBL")
+    label = label.replace("05-01_scaffolding_50M_no_acl", "PPO_no_scaf")
+    label = label.replace("05-01_scaffolding_50M_acl_4_acl-type_intro_seq", "PPO_scaf_4")
+    label = label.replace("05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf", "PPO_scaf_8")
+    label = label.replace("03-01_RR_ft_single_CB_marble_pass_A_soc_exp", "PPO_CB_role_B")
+    label = label.replace("03-01_RR_ft_single_CB_marble_pass_A_asoc_contr", "PPO_CB_asocial")
+    label = label.replace("05-01_RR_ft_group_50M_CB_marble_pass_A_soc_exp", "PPO_CB_role_B")
+    label = label.replace("05-01_RR_ft_group_50M_CB_marble_pass_A_asoc_contr", "PPO_CB_asocial")
+    label = label.replace("20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
+                          "PPO_CB_0.25")
+    label = label.replace("20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
+                          "PPO_CB_0.5")
+    label = label.replace("20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+                          "PPO_CB_1")
+    return label
+color_dict = {
+    'PPO_CB': "blue",
+    'PPO_CB(train)': "blue",
+    "PPO_CB(test)": "orange",
+    'PPO_no_bonus': "orange",
+    'PPO_CBL': "blue",
+    'PPO_CBL(train)': "blue",
+    "PPO_CBL(test)": "orange",
+    'JA_PPO_CBL': "green",
+    "PPO_CB_role_B": "blue",
+    "PPO_CB_asocial": "orange",
+    'PPO_CB_0.25': "blue",
+    'PPO_CB_0.5': "green",
+    'PPO_CB_1': "orange",
+}
+if load_pattern == "RR_single":
+    save = False
+    show_plot = True
+    load_pattern = "_"
+    plot_path = "../case_studies_final_figures/RR_dummy_single"
+    require_patterns = [
+        "03-01_RR_ft_single_CB_marble_pass_A_asoc_contr",
+        "03-01_RR_ft_single_CB_marble_pass_A_soc_exp",
+    ]
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    study_train = True
+    study_eval = False
+elif load_pattern == "RR_group":
+    load_pattern = "_"
+    plot_path = "../case_studies_final_figures/RR_dummy_group"
+    require_patterns = [
+        "05-01_RR_ft_group_50M_CB_marble_pass_A_asoc_contr",
+        "05-01_RR_ft_group_50M_CB_marble_pass_A_soc_exp",
+    ]
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    study_train = True
+    study_eval = False
+elif load_pattern == "scaffolding":
+    load_pattern = "_"
+    plot_path = "../case_studies_final_figures/Scaffolding_test"
+    require_patterns = [
+        "05-01_scaffolding_50M_no_acl",
+        "05-01_scaffolding_50M_acl_4_acl-type_intro_seq",
+        "05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf",
+    ]
+    test_envs_to_plot = None  # aggregate all of them
+    plot_aggregated_test = True
+    plot_only_aggregated_test = True
+    study_train = False
+    study_eval = True
+    to_compare = [
+        ("05-01_scaffolding_50M_acl_4_acl-type_intro_seq_agg_test", "05-01_scaffolding_50M_no_acl_agg_test", "auto_color"),
+        ("05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf_agg_test", "05-01_scaffolding_50M_no_acl_agg_test", "auto_color"),
+    ]
+elif load_pattern == "pointing":
+    study_train = True
+    study_eval = True
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    load_pattern = "_"
+    test_envs_to_plot = [
+        "SocialAI-EPointingDoorsTestInformationSeekingParamEnv-v1",
+    ]
+    plot_path = "../case_studies_final_figures/Pointing_train_test"
+    require_patterns = [
+        "04-01_Pointing_CB_heldout_doors",
+    ]
+    to_compare = [
+        ("04-01_Pointing_CB_heldout_doors", "04-01_Pointing_CB_heldout_doors_SocialAI-EPointingDoorsTestInformationSeekingParamEnv-v1", "black")
+    ]
+elif load_pattern == "color":
+    study_train = True
+    study_eval = True
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = [
+        "SocialAI-ELangColorDoorsTestInformationSeekingParamEnv-v1",
+    ]
+    plot_path = "../case_studies_final_figures/Color_train_test"
+    require_patterns = [
+        "19-01_Color_CB_heldout_doors",
+    ]
+    to_compare = [
+        ("19-01_Color_CB_heldout_doors", "19-01_Color_CB_heldout_doors_SocialAI-ELangColorDoorsTestInformationSeekingParamEnv-v1", "black")
+    ]
+elif load_pattern == "ja_color":
+    study_train = True
+    study_eval = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/JA_Color_train"
+    require_patterns = [
+        "19-01_Color_CB_heldout_doors",
+        "20-01_JA_Color_CB_heldout_doors",
+    ]
+    to_compare = [
+        ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
+    ]
+elif load_pattern == "feedback_per_seed":
+    study_train = True
+    study_eval = False
+    per_seed = True
+    draw_legend = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = [
+        "SocialAI-ELangFeedbackDoorsTestInformationSeekingParamEnv-v1",
+    ]
+    plot_path = "../case_studies_final_figures/Feedback_train_per_seed"
+    require_patterns = [
+        "19-01_Feedback_CB_heldout_doors",
+    ]
+    to_compare = None
+elif load_pattern == "feedback":
+    study_train = True
+    study_eval = True
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = [
+        "SocialAI-ELangFeedbackDoorsTestInformationSeekingParamEnv-v1",
+    ]
+    plot_path = "../case_studies_final_figures/Feedback_train_test"
+    require_patterns = [
+        "19-01_Feedback_CB_heldout_doors",
+    ]
+    to_compare = [
+        ("19-01_Feedback_CB_heldout_doors_20M", "19-01_Feedback_CB_heldout_doors_20M_SocialAI-ELangFeedbackDoorsTestInformationSeekingParamEnv-v1", "black")
+    ]
+elif load_pattern == "imitation_train":
+    study_train = True
+    study_eval = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/Imitation_train"
+    require_patterns = [
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+    ]
+    # to_compare = [
+    #     ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
+    # ]
+    to_compare = None
+elif load_pattern == "imitation_train_intro":
+    metrics = ["NPC_intro"]
+    show_plot = False
+    save = True
+    study_train = True
+    study_eval = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/Imitation_train_intro"
+    require_patterns = [
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+    ]
+    # to_compare = [
+    #     ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
+    # ]
+    to_compare = None
+elif load_pattern == "imitation_test":
+    study_train = False
+    study_eval = True
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 18
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/Imitation_test"
+    require_patterns = [
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
+        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
+    ]
+    # to_compare = [
+    #     ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
+    # ]
+    to_compare = None
+elif load_pattern == "pilot_pointing":
+    study_train = True
+    study_eval = False
+    show_plot = False
+    save = True
+    plot_path = "../case_studies_final_figures/pilot_pointing"
+    load_pattern = "29-10_SAI_Pointing_CS_PPO_"
+    require_patterns = [
+        "29-10_SAI_Pointing_CS_PPO_CB_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_cell_exploration-bonus-params__2_50_exploration-bonus-tanh_0.6",
+        "29-10_SAI_Pointing_CS_PPO_CBL_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_lang_exploration-bonus-params__10_50_exploration-bonus-tanh_0.6",
+        "29-10_SAI_Pointing_CS_PPO_no_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4",
+        "29-10_SAI_Pointing_CS_PPO_RIDE_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_ride_intrinsic-reward-coef_0.01",
+        "29-10_SAI_Pointing_CS_PPO_RND_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_rnd_intrinsic-reward-coef_0.005",
+    ]
+    color_dict = {
+        "PPO_RIDE": "orange",
+        "PPO_RND": "magenta",
+        "PPO_no": "maroon",
+        "PPO_CBL": "green",
+        "PPO_CB": "blue",
+    }
+    def label_parser(label):
+        label = label.split("_env_")[0].split("SAI_")[1]
+        label=label.replace("Pointing_CS_", "")
+        return label
+    to_compare = None
+elif load_pattern == "pilot_color":
+    study_train = True
+    study_eval = False
+    show_plot = False
+    save = True
+    plot_path = "../case_studies_final_figures/pilot_color"
+    load_pattern = "29-10_SAI_LangColor_CS"
+    require_patterns = [
+        "29-10_SAI_LangColor_CS_PPO_CB_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_cell_exploration-bonus-params__2_50_exploration-bonus-tanh_0.6",
+        "29-10_SAI_LangColor_CS_PPO_CBL_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_lang_exploration-bonus-params__10_50_exploration-bonus-tanh_0.6",
+        "29-10_SAI_LangColor_CS_PPO_no_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4",
+        "29-10_SAI_LangColor_CS_PPO_RIDE_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_ride_intrinsic-reward-coef_0.01",
+        "29-10_SAI_LangColor_CS_PPO_RND_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_rnd_intrinsic-reward-coef_0.005"
+    ]
+    color_dict = {
+        "PPO_RIDE": "orange",
+        "PPO_RND": "magenta",
+        "PPO_no": "maroon",
+        "PPO_CBL": "green",
+        "PPO_CB": "blue",
+    }
+    def label_parser(label):
+        label = label.split("_env_")[0].split("SAI_")[1]
+        label=label.replace("LangColor_CS_", "")
+        return label
+    to_compare = None
+elif load_pattern == "formats_train":
+    study_train = True
+    study_eval = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    max_x_lim = 45
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/Formats_train"
+    require_patterns = [
+        "21-01_formats_50M_CBL",
+        "05-01_scaffolding_50M_no_acl",
+    ]
+    to_compare = [
+        ("21-01_formats_50M_CBL", "05-01_scaffolding_50M_no_acl", "black")
+    ]
+    def label_parser(label):
+        label = label.replace("05-01_scaffolding_50M_no_acl", "PPO_no_bonus")
+        label = label.replace("21-01_formats_50M_CBL", "PPO_CBL")
+        return label
+elif load_pattern == "adversarial":
+    show_plot = False
+    save = True
+    study_train = True
+    study_eval = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    # max_x_lim = 45
+    smooth_factor = 0
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/adversarial"
+    require_patterns = [
+        "26-01_Adversarial_2M_PPO_CB_hidden_npc",
+        "26-01_Adversarial_2M_PPO_CB_asoc",
+        "26-01_Adversarial_2M_PPO_CB",
+    ]
+    to_compare = [
+        ("26-01_Adversarial_2M_PPO_CB", "26-01_Adversarial_2M_PPO_CB_hidden_npc", "orange"),
+        ("26-01_Adversarial_2M_PPO_CB", "26-01_Adversarial_2M_PPO_CB_asoc", "green")
+    ]
+    def label_parser(label):
+        label = label.replace("26-01_Adversarial_2M_PPO_CB_hidden_npc", "PPO_CB_invisible_peer")
+        label = label.replace("26-01_Adversarial_2M_PPO_CB_asoc", "PPO_CB_no_peer")
+        label = label.replace("26-01_Adversarial_2M_PPO_CB", "PPO_CB")
+        return label
+    color_dict = {
+        "PPO_CB": "blue",
+        "PPO_CB_invisible_peer": "orange",
+        "PPO_CB_no_peer": "green",
+    }
+elif load_pattern == "adversarial_stumps":
+    study_train = True
+    study_eval = False
+    plot_aggregated_test = False
+    plot_only_aggregated_test = False
+    # max_x_lim = 45
+    smooth_factor = 0
+    load_pattern = "_"
+    test_envs_to_plot = None
+    plot_path = "../case_studies_final_figures/adversarial_stumps"
+    require_patterns = [
+        "26-01_Adversarial_5M_Stumps_PPO_CB_hidden_npc",
+        "26-01_Adversarial_5M_Stumps_PPO_CB_asoc",
+        "26-01_Adversarial_5M_Stumps_PPO_CB",
+    ]
+    to_compare = [
+        ("26-01_Adversarial_5M_Stumps_PPO_CB", "26-01_Adversarial_5M_Stumps_PPO_CB_hidden_npc", "orange"),
+        ("26-01_Adversarial_5M_Stumps_PPO_CB", "26-01_Adversarial_5M_Stumps_PPO_CB_asoc", "green")
+    ]
+    def label_parser(label):
+        label = label.replace("26-01_Adversarial_5M_Stumps_PPO_CB_hidden_npc", "PPO_CB_invisible_peer")
+        label = label.replace("26-01_Adversarial_5M_Stumps_PPO_CB_asoc", "PPO_CB_no_peer")
+        label = label.replace("26-01_Adversarial_5M_Stumps_PPO_CB", "PPO_CB")
+        return label
+    color_dict = {
+        "PPO_CB": "blue",
+        "PPO_CB_invisible_peer": "orange",
+        "PPO_CB_no_peer": "green",
+    }
+else:
+    plot_path = "plots/testplot"
+    require_patterns = [
+        "_",
+        # pointing
+        # "04-01_Pointing_CB_heldout_doors",
+    ]
+if to_compare is None and len(require_patterns) == 2 and "_" not in require_patterns:
+    # if only two curves compare those two automatically
+    to_compare = [(require_patterns[0], require_patterns[1], "black")]
+save=False
+show_plot = True
+# all of those
+include_patterns = []
+#include_patterns = ["rec_5"]
+fontsize = 20
+legend_fontsize = 20
+linewidth = 5
+# linewidth = 1
+leg_args = {
+    'fontsize': legend_fontsize
+}
+title_fontsize = int(fontsize*1.2)
+storage_dir = "storage/"
+if load_pattern.startswith(storage_dir):
+    load_pattern = load_pattern[len(storage_dir):]
+if load_pattern.startswith("./storage/"):
+    load_pattern = load_pattern[len("./storage/"):]
+get_datasets(storage_dir, str(load_pattern), load_subsample_step=load_subsample_step, ignore_patterns=ignore_patterns, require_patterns=require_patterns)
+label_parser_dict = {
+    # "PPO_CB": "PPO_CB",
+    # "02-06_AppleStealing_experiments_cb_bonus_angle_occ_env_SocialAI-OthersPerceptionInferenceParamEnv-v1_exploration-bonus-type_cell": "NPC_visible",
+}
+env_type = str(load_pattern)
+fig_type = "test"
+try:
+    top_n = int(sys.argv[2])
+except:
+    top_n = 8
+to_remove = []
+for tr_ in to_remove:
+    if tr_ in models_saves:
+        del models_saves[tr_]
+print("Loaded:")
+print("\n".join(list(models_saves.keys())))
+if per_model_colors:  # order runs for legend order as in per_models_colors, with corresponding colors
+    ordered_labels = OrderedDict()
+    for teacher_type in per_model_colors.keys():
+        for k,v in labels.items():
+            if teacher_type in k:
+                ordered_labels[k] = v
+    labels = ordered_labels
+else:
+    print('not using per_model_color')
+    for k in models_saves.keys():
+        labels[k] = k
+# Plot utils
+def plot_with_shade(subplot_nb, ax, x, y, err, color, shade_color, label,
+                    legend=False, leg_loc='best', title=None,
+                    ylim=[0, 100], xlim=[0, 40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, labelsize=20, fontsize=20, title_fontsize=30,
+                    zorder=None, xlabel='Perf', ylabel='Env steps', linestyle="-", xnbins=3, ynbins=3):
+    #plt.rcParams.update({'font.size': 15})
+    ax.locator_params(axis='x', nbins=xnbins)
+    ax.locator_params(axis='y', nbins=ynbins)
+    ax.tick_params(axis='y', which='both', labelsize=labelsize)
+    ax.tick_params(axis='x', which='both', labelsize=labelsize*0.8)
+    # ax.tick_params(axis='both', which='both', labelsize="small")
+    # ax.scatter(x, y, color=color,linewidth=linewidth,zorder=zorder, linestyle=linestyle)
+    ax.plot(x, y, color=color, label=label, linewidth=linewidth, zorder=zorder, linestyle=linestyle)
+    if not np.array_equal(err, np.zeros_like(err)):
+        ax.fill_between(x, y-err, y+err, color=shade_color, alpha=0.2)
+    if legend:
+        leg = ax.legend(loc=leg_loc, **leg_args)  # 34
+        for legobj in leg.legendHandles:
+            legobj.set_linewidth(leg_linewidth)
+    ax.set_xlabel(xlabel, fontsize=fontsize)
+    if subplot_nb == 0:
+        ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=2)
+    ax.set_xlim(xmin=xlim[0], xmax=xlim[1])
+    ax.set_ylim(bottom=ylim[0], top=ylim[1])
+    if title:
+        ax.set_title(title, fontsize=title_fontsize)
+# only one figure is drawn -> maybe we can add loops later
+assert len(metrics) == 1
+f, ax = plt.subplots(1, 1, figsize=(9.0, 9.0))
+if len(metrics) == 1:
+    ax = [ax]
+# max_y = -np.inf
+min_y = np.inf
+max_steps = 0
+exclude_patterns = []
+metric = metrics[0]
+ylabel = {
+    "success_rate_mean": "Success rate (%)",
+    "exploration_bonus_mean": "Exploration bonus",
+    "NPC_intro": "Successful introduction (%)",
+}.get(metric, metric)
+# for metric_i, metric in enumerate(metrics):
+default_colors = default_colors_.copy()
+if study_train:
+    for model_i, model_id in enumerate(models_saves.keys()):
+        #excluding some experiments
+        if any([ex_pat in model_id for ex_pat in exclude_patterns]):
+            continue
+        if len(include_patterns) > 0:
+            if not any([in_pat in model_id for in_pat in include_patterns]):
+                continue
+        runs_data = models_saves[model_id]['data']
+        ys = []
+        if runs_data[0]['frames'][1] == 'frames':
+            runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames']))
+        if per_seed:
+            min_len = None
+        else:
+            # determine minimal run length across seeds
+            lens = [len(run['frames']) for run in runs_data if len(run['frames'])]
+            minimum = sorted(lens)[-min(top_n, len(lens))]
+            min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum])
+            # keep only top k
+            runs_data = [run for run in runs_data if len(run['frames']) >= minimum]
+            # min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10])
+        # compute env steps (x axis)
+        longest_id = np.argmax([len(rd['frames']) for rd in runs_data])
+        steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / steps_denom
+        steps = steps[:min_len]
+        for run in runs_data:
+            if metric not in run:
+                raise ValueError(f"Metric {metric} not found. Possible metrics: {list(run.keys())}")
+            data = run[metric]
+            # checking for header
+            if data[1] == metric:
+                data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16)
+            if per_seed:
+                ys.append(data)
+            else:
+                if len(data) >= min_len:
+                    # discard extra
+                    if len(data) > min_len:
+                        print("run has too many {} datapoints ({}). Discarding {}".format(model_id, len(data),
+                                                                                          len(data) - min_len))
+                        data = data[0:min_len]
+                    ys.append(data)
+                else:
+                    raise ValueError("How can data be < min_len if it was capped above")
+        ys_same_len = ys
+        # computes stats
+        n_seeds = len(ys_same_len)
+        if per_seed:
+            sems = np.array(ys_same_len)
+            means = np.array(ys_same_len)
+            stds = np.zeros_like(means)
+            color = default_colors[model_i]
+        else:
+            sems = np.std(ys_same_len, axis=0)/np.sqrt(len(ys_same_len))  # sem
+            stds = np.std(ys_same_len, axis=0)  # std
+            means = np.mean(ys_same_len, axis=0)
+            color = default_colors[model_i]
+        if metric == 'duration':
+            means = means / 3600
+            sems = sems / 3600
+            stds = stds / 3600
+        if per_seed:
+            # plot x y bounds
+            curr_max_steps = np.max(np.max(steps))
+        else:
+            # plot x y bounds
+            curr_max_steps = np.max(steps)
+        if curr_max_steps > max_steps:
+            max_steps = curr_max_steps
+        if subsample_step:
+            steps = steps[0::subsample_step]
+            means = means[0::subsample_step]
+            stds = stds[0::subsample_step]
+            sems = sems[0::subsample_step]
+            ys_same_len = [y[0::subsample_step] for y in ys_same_len]
+        # display seeds separately
+        if per_seed:
+            for s_i, seed_ys in enumerate(ys_same_len):
+                label = label_parser(model_id)
+                if study_eval:
+                   label = label + "_train_"
+                label = label + f"(s:{s_i})"
+                if label in color_dict:
+                    color = color_dict[label]
+                else:
+                    color = default_colors[model_i*20+s_i]
+                curve_ID = f"{model_id}_{s_i}"
+                assert np.array_equal(stds, np.zeros_like(stds))
+                if smooth_factor:
+                    means = smooth(means, smooth_factor)
+                to_plot_dict[curve_ID] = {
+                    "label": label,
+                    "steps": steps,
+                    "means": seed_ys,
+                    "stds": stds,
+                    "ys": ys_same_len,
+                    "color": color
+                }
+        else:
+            label = label_parser(model_id)
+            if study_eval:
+                label = label+"(train)"
+            if color_dict:
+                color = color_dict[label]
+            else:
+                color = default_colors[model_i]
+            if smooth_factor:
+                means = smooth(means, smooth_factor)
+                stds = smooth(stds, smooth_factor)
+            to_plot_dict[model_id] = {
+                "label": label,
+                "steps": steps,
+                "means": means,
+                "stds": stds,
+                "sems": sems,
+                "ys": ys_same_len,
+                "color": color,
+            }
+if study_eval:
+    print("Evaluation")
+    # evaluation sets
+    number_of_eval_envs = max(list([len(v.keys()) for v in model_eval_data.values()]))
+    if plot_aggregated_test:
+        number_of_eval_envs += 1
+    if number_of_eval_envs == 0:
+        print("No eval envs")
+        exit()
+    default_colors = default_colors_.copy()
+    test_summary_dict = defaultdict(dict)
+    test_summary_dict_colors = defaultdict(dict)
+    for model_i, model_id in enumerate(model_eval_data.keys()):
+        # excluding some experiments
+        if any([ex_pat in model_id for ex_pat in exclude_patterns]):
+            continue
+        if len(include_patterns) > 0:
+            if not any([in_pat in model_id for in_pat in include_patterns]):
+                continue
+        # test envs
+        test_envs = model_eval_data[model_id].items()
+        # filter unwanted eval envs
+        if test_envs_to_plot is not None:
+            test_envs = [(name, data) for name, data in test_envs if name in test_envs_to_plot]
+        # computes stats
+        if sort_test:
+            test_envs_sorted = list(sorted(test_envs, key=lambda kv: sort_test_set(kv[0])))
+        else:
+            test_envs_sorted = list(test_envs)
+        if plot_aggregated_test:
+            agg_means = []
+        for env_i, (test_env, env_data) in enumerate(test_envs_sorted):
+            ys_same_len = env_data["values"]
+            steps = env_data["steps"].mean(0) / steps_denom
+            n_seeds = len(ys_same_len)
+            if per_seed:
+                sems = np.array(ys_same_len)
+                stds = np.array(ys_same_len)
+                means = np.array(ys_same_len)
+                color = default_colors[model_i]
+                # plot x y bounds
+                curr_max_steps = np.max(np.max(steps))
+            else:
+                sems = np.std(ys_same_len, axis=0) / np.sqrt(len(ys_same_len))  # sem
+                stds = np.std(ys_same_len, axis=0)  # std
+                means = np.mean(ys_same_len, axis=0)
+                color = default_colors[model_i]
+                curr_max_steps = np.max(steps)
+            if plot_aggregated_test:
+                agg_means.append(means)
+            x_lim = max(steps[-1], x_lim)
+            x_lim = min(max_x_lim, x_lim)
+            eval_metric_name = {
+                "test_success_rates": "Success rate",
+                'exploration_bonus_mean': "Exploration bonus",
+            }.get(eval_metric, eval_metric)
+            test_env_name = test_env.replace("Env", "").replace("Test", "")
+            env_types = ["InformationSeeking", "Collaboration", "PerspectiveTaking"]
+            for env_type in env_types:
+                if env_type in test_env_name:
+                    test_env_name = test_env_name.replace(env_type, "")
+                    test_env_name += f"\n({env_type})"
+            if per_seed:
+                for s_i, seed_ys in enumerate(ys_same_len):
+                    label = label_parser(model_id) + f"_{test_env}" + f"(s:{s_i})"
+                    if eval_smooth_factor:
+                        seed_ys = smooth(seed_ys, eval_smooth_factor)
+                    curve_ID = f"{model_id}_{test_env}_{s_i}"
+                    to_plot_dict[curve_ID] = {
+                        "label": label,
+                        "steps": steps,
+                        "means": seed_ys,
+                        "stds": np.zeros_like(seed_ys),
+                        "ys": ys_same_len,
+                        "color": color
+                    }
+            else:
+                if len(test_envs_sorted) > 1:
+                    label = label_parser(model_id) + f"_{test_env}"
+                else:
+                    label = label_parser(model_id)
+                if study_train:
+                    label=label+"(test)"
+                if not plot_only_aggregated_test:
+                    if label in color_dict:
+                        color = color_dict[label]
+                    else:
+                        color = default_colors[model_i*len(test_envs_sorted)+env_i]
+                    if legend_show_n_seeds:
+                        label = label + "({})".format(n_seeds)
+                    if eval_smooth_factor:
+                        means = smooth(means, eval_smooth_factor)
+                        stds = smooth(stds, eval_smooth_factor)
+                        sems = smooth(sems, eval_smooth_factor)
+                    to_plot_dict[model_id+f"_{test_env}"] = {
+                        "label": label,
+                        "steps": steps,
+                        "means": means,
+                        "stds": stds,
+                        "sems": sems,
+                        "ys": ys_same_len,
+                        "color": color,
+                    }
+        if plot_aggregated_test:
+            ys_same_len = agg_means
+            agg_means = np.array(agg_means)
+            agg_mean = agg_means.mean(axis=0)
+            agg_std = agg_means.std(axis=0)  # std
+            agg_sems = ...
+            label = label_parser(model_id)
+            if study_train:
+                label = label + "(train)"
+            if eval_smooth_factor:
+                agg_mean = smooth(agg_mean, eval_smooth_factor)
+                agg_std = smooth(agg_std, eval_smooth_factor)
+                agg_sems = smooth(agg_sems, eval_smooth_factor)
+            if per_seed:
+                print("Not smooth aggregated because of per seed")
+                for s_i, (seed_ys, seed_st) in enumerate(zip(agg_mean, agg_std)):
+                    seed_c = default_colors[model_i + s_i]
+                    label = str(s_i)
+                    to_plot_dict[curve_ID] = {
+                        "label": label,
+                        "steps": steps,
+                        "means": seed_ys,
+                        "stds": seed_st,
+                        "ys": ys_same_len,
+                        "color": color
+                    }
+            else:
+                if label in color_dict:
+                    color = color_dict[label]
+                else:
+                    color = default_colors[model_i]
+                to_plot_dict[model_id+"_agg_test"] = {
+                    "label": label,
+                    "steps": steps,
+                    "means": agg_mean,
+                    "stds": agg_std,
+                    "sems": agg_sems,
+                    "ys": ys_same_len,
+                    "color": color,
+                }
+# should be labels
+to_scatter_dict = {}
+if to_compare is not None:
+    for comp_i, (a_model_id, b_model_id, color) in enumerate(to_compare):
+        a_data = to_plot_dict[a_model_id]["ys"]
+        b_data = to_plot_dict[b_model_id]["ys"]
+        steps = to_plot_dict[a_model_id]["steps"]
+        if color == "auto_color":
+            color = to_plot_dict[a_model_id]["color"]
+        if len(a_data[0]) != len(b_data[0]):
+            # extract steps present in both
+            a_steps = to_plot_dict[a_model_id]["steps"]
+            b_steps = to_plot_dict[b_model_id]["steps"]
+            steps = list(set(a_steps) & set(b_steps))
+            # keep only the values for those steps
+            mask_a = [(a_s in steps) for a_s in a_steps]
+            a_data = np.array(a_data)[:, mask_a]
+            mask_b = [(b_s in steps) for b_s in b_steps]
+            b_data = np.array(b_data)[:, mask_b]
+        p = stats.ttest_ind(
+            a_data,
+            b_data,
+            equal_var=False
+        ).pvalue
+        steps = [s for s, p in zip(steps, p) if p < test_p]
+        ys = [1.02+0.02*comp_i]*len(steps)
+        to_scatter_dict[f"compare_{a_model_id}_{b_model_id}"] = {
+            "label": "",
+            "xs": steps,
+            "ys": ys,
+            "color": color,
+        }
+for scatter_i, (scatter_ID, scatter_id_data) in enumerate(to_scatter_dict.items()):
+    # unpack data
+    label, xs, ys, color = (
+        scatter_id_data["label"],
+        scatter_id_data["xs"],
+        scatter_id_data["ys"],
+        scatter_id_data["color"],
+    )
+    xlabel = f"Env steps (1e6)"
+    plt.scatter(
+        xs,
+        ys,
+        color=color,
+        marker="x"
+    )
+    summary_dict[label] = xs[-1]
+    summary_dict_colors[label] = color
+for curve_i, (curve_ID, model_id_data) in enumerate(to_plot_dict.items()):
+    # unpack data
+    label, steps, means, stds, sems, ys, color = (
+        model_id_data["label"],
+        model_id_data["steps"],
+        model_id_data["means"],
+        model_id_data["stds"],
+        model_id_data["sems"],
+        model_id_data["ys"],
+        model_id_data["color"]
+    )
+    # if smooth_factor:
+    #     means = smooth(means, smooth_factor)
+    #     stds = smooth(stds, smooth_factor)
+    if legend_show_n_seeds:
+        n_seeds = len(ys)
+        label = label+"({})".format(n_seeds)
+    x_lim = max(steps[-1], x_lim)
+    x_lim = min(max_x_lim, x_lim)
+    xlabel = f"Env steps (1e6)"
+    plot_with_shade(
+        0, ax[0], steps, means, stds, color, color, label,
+        # 0, ax[0], steps, means, sems, color, color, label,
+        legend=draw_legend,
+        xlim=[0, x_lim],
+        ylim=[0, max_y],
+        xlabel=xlabel,
+        ylabel=ylabel,
+        title=None,
+        labelsize=fontsize,
+        fontsize=fontsize,
+        title_fontsize=title_fontsize,
+        linewidth=linewidth,
+        leg_linewidth=5,
+        leg_args=leg_args,
+        xnbins=xnbins,
+        ynbins=ynbins,
+    )
+    summary_dict[label] = means[-1]
+    summary_dict_colors[label] = color
+# plot static lines
+if static_lines:
+    for label, (mean, std, color) in static_lines.items():
+        if label == "":
+            label = None
+        plot_with_shade(
+            0, ax[0], steps, np.array([mean]*len(steps)), np.array([std]*len(steps)), color, color, label,
+            legend=True,
+            xlim=[0, x_lim],
+            ylim=[0, 1.0],
+            xlabel=f"Env steps (1e6)",
+            ylabel=ylabel,
+            linestyle=":",
+            leg_args=leg_args,
+            fontsize=fontsize,
+            title_fontsize=title_fontsize,
+            xnbins=xnbins,
+            ynbins=ynbins,
+        )
+if plot_path:
+    f.savefig(plot_path+".png")
+    f.savefig(plot_path+".svg")
+    print(f"Plot saved to {plot_path}.[png/svg].")
+# Summary dict
+if len(summary_dict) == 0:
+    raise ValueError(f"No experiments found for {load_pattern}.")
+else:
+    # print summary
+    best = max(summary_dict.values())
+    pc = 0.3
+    n = int(len(summary_dict)*pc)
+    print("top n: ", n)
+    top_pc = sorted(summary_dict.values())[-n:]
+    bottom_pc = sorted(summary_dict.values())[:n]
+    print("legend:")
+    cprint("\tbest", "green")
+    cprint("\ttop {} %".format(pc), "blue")
+    cprint("\tbottom {} %".format(pc), "red")
+    print("\tothers")
+    print()
+    for l, p in sorted(summary_dict.items(), key=lambda kv: kv[1]):
+        c = summary_dict_colors[l]
+        if p == best:
+            cprint("label: {} ({})".format(l, c), "green")
+            cprint("\t {}:{}".format(metric, p), "green")
+        elif p in top_pc:
+            cprint("label: {} ({})".format(l, c), "blue")
+            cprint("\t {}:{}".format(metric, p), "blue")
+        elif p in bottom_pc:
+            cprint("label: {} ({})".format(l, c), "red")
+            cprint("\t {}:{}".format(metric, p), "red")
+        else:
+            print("label: {} ({})".format(l, c))
+            print("\t {}:{}".format(metric, p))
+if show_plot:
+    plt.tight_layout()
+    plt.subplots_adjust(hspace=1.5, wspace=0.5, left=0.1, right=0.9, bottom=0.1, top=0.85)
+    plt.suptitle(super_title)
+    plt.show()
+plt.close()

display_LLM_evaluations.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import json
+from pathlib import Path
+# Opening JSON file
+def load_json(path):
+    with open(path) as f:
+        data = json.load(f)
+    return data
+random_asocial = load_json(Path("llm_log/random_asocial_04_01_2023_14:28:53/evaluation_log.json"))
+random_boxes = load_json(Path("llm_log/random_boxes_04_01_2023_14:32:17/evaluation_log.json"))
+ada_asocial = load_json(Path("llm_log/ada_asocial_3_04_01_2023_14:53:16/evaluation_log.json"))
+ada_boxes = load_json(Path("llm_log/ada_3st_boxes_04_01_2023_18:55:38/evaluation_log.json")) # no caretaker
+ada_boxes_c = load_json(Path("llm_log/ada_3st_boxes_caretaker_04_01_2023_20:18:18/evaluation_log.json"))  # caretaker
+davinci_asocial = load_json(Path("llm_log/davinci_asocial_3st_04_01_2023_21:27:23/evaluation_log.json"))
+davinci_boxes = load_json(Path("llm_log/davinci_3st_boxes_04_01_2023_20:37:28/evaluation_log.json"))
+davinci_boxes_c = load_json(Path("llm_log/davinci_3st_boxes_caretaker_04_01_2023_21:17:44/evaluation_log.json"))
+bloom_560_asocial = load_json(Path("llm_log/bloom_560m_asocial_3st_04_01_2023_14:59:44/evaluation_log.json"))
+bloom_560_boxes = load_json(Path("llm_log/bloom_560_3st_boxes_04_01_2023_20:14:13/evaluation_log.json"))  # no caretaker
+bloom_560_boxes_c = load_json(Path("llm_log/bloom_560_3st_boxes_caretaker_04_01_2023_20:05:08/evaluation_log.json")) #  caretaker
+data = [
+    random_asocial,
+    random_boxes,
+    ada_asocial,
+    # ada_boxes,
+    ada_boxes_c,
+    davinci_asocial,
+    # davinci_boxes,
+    davinci_boxes_c,
+    bloom_560_asocial,
+    # bloom_560_boxes,
+    bloom_560_boxes_c,
+]
+for d in data:
+    print(f'Model: {d["model"]} Env: {d["env_name"]} {"hist" if d["feed_full_ep"] else ""} ---> {d["mean_success_rate"]} ({len(d["success_rates"])})')

draw_tree.py ADDED Viewed

	@@ -0,0 +1,104 @@

+#!/usr/bin/env python3
+import sys
+from utils import *
+from gym_minigrid.parametric_env import *
+class DummyTreeParamEnv(gym.Env):
+    """
+    Meta-Environment containing all other environment (multi-task learning)
+    """
+    def __init__(
+            self,
+    ):
+        # construct the tree
+        self.parameter_tree = self.construct_tree()
+        self.parameter_tree.print_tree()
+    def draw_tree(self, ignore_labels=[], folded_nodes=[]):
+        self.parameter_tree.draw_tree("viz/param_tree_{}".format(self.spec.id), ignore_labels=ignore_labels, folded_nodes=folded_nodes)
+    def print_tree(self):
+        self.parameter_tree.print_tree()
+    def construct_tree(self):
+        tree = ParameterTree()
+        env_type_nd = tree.add_node("Env_type", type="param")
+        # Information seeking
+        inf_seeking_nd = tree.add_node("Information_seeking", parent=env_type_nd, type="value")
+        prag_fr_compl_nd = tree.add_node("Introductory_sequence", parent=inf_seeking_nd, type="param")
+        tree.add_node("Eye_contact", parent=prag_fr_compl_nd, type="value")
+        # scaffolding
+        scaffolding_nd = tree.add_node("Scaffolding", parent=inf_seeking_nd, type="param")
+        scaffolding_N_nd = tree.add_node("N", parent=scaffolding_nd, type="value")
+        cue_type_nd = tree.add_node("Cue_type", parent=scaffolding_N_nd, type="param")
+        # tree.add_node("Language_Color", parent=cue_type_nd, type="value")
+        # tree.add_node("Language_Feedback", parent=cue_type_nd, type="value")
+        tree.add_node("Pointing", parent=cue_type_nd, type="value")
+        # N_bo_nd = tree.add_node("N", parent=inf_seeking_nd, type="param")
+        # tree.add_node("2", parent=N_bo_nd, type="value")
+        problem_nd = tree.add_node("Problem", parent=inf_seeking_nd, type="param")
+        tree.add_node("Boxes", parent=problem_nd, type="value")
+        tree.add_node("Switches", parent=problem_nd, type="value")
+        tree.add_node("Marbles", parent=problem_nd, type="value")
+        tree.add_node("Generators", parent=problem_nd, type="value")
+        tree.add_node("Doors", parent=problem_nd, type="value")
+        tree.add_node("Levers", parent=problem_nd, type="value")
+        return tree
+filename = sys.argv[1]
+if len(sys.argv) > 2:
+    env_name = sys.argv[2]
+    env = gym.make(env_name)
+else:
+    env = DummyTreeParamEnv()
+# draw tree
+folded_nodes = [
+    # "Information_Seeking",
+    # "Perspective_Inference",
+]
+# selected_parameters_labels = {
+#     "Env_type": "Information_Seeking",
+#     "Distractor": "Yes",
+#     "Problem": "Boxes",
+# }
+env.parameter_tree.draw_tree(
+    filename=f"viz/{filename}",
+    ignore_labels=["Num_of_colors"],
+    # selected_parameters=selected_parameters_labels,
+    folded_nodes=folded_nodes,
+    label_parser={
+        "Scaffolding": "Help"
+    }
+)
+# for i in range(3):
+#     params = env.parameter_tree.sample_env_params()
+#     selected_parameters_labels = {k.label: v.label for k, v in params.items()}
+#
+#     env.parameter_tree.draw_tree(
+#         filename=f"viz/{filename}_{i}",
+#         ignore_labels=["Num_of_colors"],
+#         selected_parameters=selected_parameters_labels,
+#         folded_nodes=folded_nodes,
+#     )
+#

draw_trees.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+## Pointing
+#python draw_tree.py cs_trees/pointing_tree_train SocialAI-EPointingHeldoutDoorsTrainInformationSeekingParamEnv-v1
+#python draw_tree.py cs_trees/pointing_tree_test SocialAI-EPointingDoorsTestInformationSeekingParamEnv-v1
+#
+## Role Reversal
+#python draw_tree.py cs_trees/rr_tree_B_single SocialAI-MarblePassBCollaborationParamEnv-v1
+#python draw_tree.py cs_trees/rr_tree_asoc_single SocialAI-AsocialMarbleCollaborationParamEnv-v1
+#python draw_tree.py cs_trees/rr_tree_B_group SocialAI-RoleReversalGroupExperimentalCollaborationParamEnv-v1
+#python draw_tree.py cs_trees/rr_tree_asoc_group SocialAI-RoleReversalGroupControlCollaborationParamEnv-v1
+#python draw_tree.py cs_trees/rr_tree_A SocialAI-MarblePassACollaborationParamEnv-v1
+#
+## Scaffolding
+#python draw_tree.py cs_trees/scaf_tree_test SocialAI-AELangFeedbackTrainFormatsCSParamEnv-v1
+#python draw_tree.py cs_trees/scaf_tree_4 SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 -acl-type intro_seq
+#python draw_tree.py cs_trees/scaf_tree_8 SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --acl-type intro_seq_seq
+# LLMs
+#python draw_tree.py cs_trees/llms_tree_asoc_apple SocialAI-AsocialBoxInformationSeekingParamEnv-v1
+#python draw_tree.py cs_trees/llms_tree_color_box SocialAI-ColorBoxesLLMCSParamEnv-v1

dummy_run.sh ADDED Viewed

	@@ -0,0 +1,109 @@

+# test
+#rm -rf storage/test && python -m scripts.train --frames 100000000 --model test --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 1 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-JAELangColorTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name JALangColorTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6 --test-seed 42 --seed 1234
+# no test
+#rm -rf storage/test && python -m scripts.train --frames 100000000 --model test --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EPointingInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name PointingTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+# dummy case studies
+#python -m scripts.train --frames 100000000 --model dummy_cs_Pointing_CB --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EPointingTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name PointingTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 30000000 --model dummy_cs_NEW_Pointing_sm_CB --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EPointingTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name PointingTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  1 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 15000000 --model dummy_cs_NEW_Color_CBL --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-ELangColorTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name LangColorTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 10000000 --model dummy_cs_NEW_Feedback_CBL --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-ELangFeedbackTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name LangFeedbackTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+# dummy JA
+#python -m scripts.train --frames 100000000 --model dummy_cs_JA_Pointing_CB --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-JAEPointingTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name JAPointingTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 100000000 --model dummy_cs_JA_Pointing_CB_sm --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-JAEPointingTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name JAPointingTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  1 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 100000000 --model dummy_cs_JA_Color_CBL_new --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-JAELangColorTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name JALangColorTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 100000000 --model dummy_cs_JA_Feedback_CBL_new --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-JAELangFeedbackTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name JALangFeedbackTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+# Marble Feedback rec quick test
+#python -m scripts.train --frames 30000000 --model dummy_marbl_rec_test_rec_5 --algo ppo --dialogue --save-interval 5 --log-interval 5 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackMarbleTestFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --continue storage/dummy_marbl_rec_test_rec_5
+#python -m scripts.train --frames 30000000 --model dummy_marbl_rec_test_rec_10 --algo ppo --dialogue --save-interval 5 --log-interval 5 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackMarbleTestFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+#python -m scripts.train --frames 30000000 --model dummy_marbl_rec_test_rec_20 --algo ppo --dialogue --save-interval 5 --log-interval 5 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackMarbleTestFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 20 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+# dummy Formats
+# CB
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_formats_N_rec_5 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackTrainFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_formats_N_rec_10 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackTrainFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+## CBL
+#python -m scripts.train --frames 30000000 --model dummy_cs_formats_CBL_N_rec_5 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackTrainFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 30000000 --model dummy_cs_formats_CBL_N_rec_10 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackTrainFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+# scaffolding
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_AE --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_E --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-ELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name EFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.75 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_A --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-ALangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.75 --acl-average-interval 500  --acl-minimum-episodes 1000
+# test a100 vs v100
+#python -m scripts.train --frames 100000 --model test_a100_rec_10 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackTrainFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+#python -m scripts.train --frames 100000 --model test_a100_rec_5 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-NLangFeedbackTrainFormatsCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name NFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+# case study - Generators heldoutgenerators
+#python -m scripts.train --frames 30000000 --model dummy_cs_Feedback2_HGen_CBL --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-ELangFeedbackHeldoutGeneratorsTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name LangFeedbackHGenTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+#python -m scripts.train --frames 30000000 --model dummy_cs_Feedback2_HMar_CBL --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-ELangFeedbackTrainInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name LangFeedbackTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type lang --exploration-bonus-params  10 50 --exploration-bonus-tanh 0.6
+# old Emulation
+#rm -rf storage/test_emulation_no_distr_cb && python -m scripts.train --frames 100000000 --model test_emulation_no_distr_cb --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationNoDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+#rm -rf storage/test && python -m scripts.train --frames 100000000 --model test --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EPointingInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name PointingTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+# role reversal
+#python -m scripts.train --frames 30000000 --model dummy_cs_RR_all_train --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-RoleReversalCollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+# single - experimental ( trained on marble pass B)
+#python -m scripts.train --frames 10000000 --model dummy_cs_RR_single_marble_pass_B --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-MarblePassBCollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+# single - control ( trained on asocial marble)
+#python -m scripts.train --frames 10000000 --model dummy_cs_RR_single_asoc_marble --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AsocialMarbleInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64
+# RR CB
+# single
+# experimental ( trained on marble pass B)
+#python -m scripts.train --frames 5000000 --model dummy_cs_RR_single_CB_marble_pass_B --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-MarblePassBCollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+# control ( trained on asocial marble)
+#python -m scripts.train --frames 5000000 --model dummy_cs_RR_single_CB_asoc_marble --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AsocialMarbleInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+# new env
+#python -m scripts.train --frames 5000000 --model dummy_cs_RR_single_CB_asoc_marble_new --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AsocialMarbleCollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6
+# RR evaluation training single
+#python -m scripts.train --frames 1000000 --model dummy_cs_RR_ft_NEW_single_CB_marble_pass_B_exp_soc --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-MarblePassACollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6 --finetune-train storage/dummy_cs_RR_single_CB_marble_pass_B
+#python -m scripts.train --frames 1000000 --model dummy_cs_RR_ft_NEW_single_CB_marble_pass_B_contr_asoc --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-MarblePassACollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6 --finetune-train storage/dummy_cs_RR_single_CB_asoc_marble_new
+# group
+# experimental ( trained on marble pass B)
+#python -m scripts.train --frames 20000000 --model dummy_cs_RR_group_CB_marble_pass_B --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-RoleReversalGroupExperimentalCollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6 --continue-train auto
+# control ( trained on asocial marble)
+#python -m scripts.train --frames 20000000 --model dummy_cs_RR_group_CB_asoc_marble --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-RoleReversalGroupControlCollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6 --continue-train auto
+# group-finetune
+#python -m scripts.train --frames 500000 --model dummy_cs_RR_ft_NEW_group_CB_marble_pass_A_exp_soc --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-MarblePassACollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6 --finetune-train storage/dummy_cs_RR_group_CB_marble_pass_B
+#python -m scripts.train --frames 500000 --model dummy_cs_RR_ft_NEW_group_CB_marble_pass_A_contr_asoc --algo ppo --dialogue --save-interval 1 --log-interval 1 --test-interval 0 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-MarblePassACollaborationParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name RoleReversalTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --exploration-bonus --episodic-exploration-bonus  --exploration-bonus-type cell --exploration-bonus-params  2 50 --exploration-bonus-tanh 0.6 --finetune-train storage/dummy_cs_RR_group_CB_asoc_marble
+# 3 phase scaffolding
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_Esc-AEsc-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_Esc-Efull-Acs_or_Efull-A_or_Efull-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_Esc-Efull-Acs_or_Efull-A_or_Efull-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+# Ncs-Nfull-A_E_N_A_E_full-AEfull
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_Ncs-Nfull-A_E_N_A_E_full-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_A_E_N_A_E_scaf_full-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_A_E_N_A_E_full-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+#python -m scripts.train --frames 30000000 --model dummy_cs_jz_scaf_A_E_AE_scaf_full-AEfull --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-thresholds 0.90 0.90 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000
+## Emulation
+# dummy emulation rec 10
+#python -m scripts.train --frames 20000000 --model dummy_cs_emulation_no_distr_rec_10 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationNoDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name NoDistrEmulationTestSet
+#python -m scripts.train --frames 20000000 --model dummy_cs_emulation_distr_rec_10 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name DistrEmulationTestSet
+# rec 5
+#python -m scripts.train --frames 20000000 --model dummy_cs_emulation_no_distr_rec_5 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationNoDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name NoDistrEmulationTestSet
+#python -m scripts.train --frames 20000000 --model dummy_cs_emulation_distr_rec_5 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name DistrEmulationTestSet
+#python -m scripts.train --frames 40000000 --model 07-12_dummy_cs_emulation_distr_rec_10/0 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 10 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name DistrEmulationTestSet --continue-train auto
+#python -m scripts.train --frames 40000000 --model 07-12_dummy_cs_emulation_distr_rec_5/0 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name DistrEmulationTestSet --continue-train auto
+#python -m scripts.train --frames 40000000 --model test_a100 --algo ppo --dialogue --save-interval 10 --log-interval 10 --test-interval 100 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-EEmulationDistrInformationSeekingParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --test-set-name DistrEmulationTestSet
+python -m scripts.train --frames 30000000 --model test_scaff --algo ppo --dialogue --save-interval 100 --log-interval 100 --test-interval 1000 --frames-per-proc 40 --multi-modal-babyai11-agent --env SocialAI-AELangFeedbackTrainScaffoldingCSParamEnv-v1 --clipped-rewards --batch-size 640 --clip-eps 0.2 --recurrence 5 --max-grad-norm 0.5 --epochs 4 --optim-eps 1e-05 --lr 1e-4 --entropy-coef 0.00001 --test-set-name AEFormatsTestSet --env-args see_through_walls False --arch bow_endpool_res --bAI-lang-model attgru --memory-dim 2048 --procs 64 --acl --acl-type intro_seq --acl-thresholds 0.90 0.90 0.90 0.90 --acl-average-interval 500  --acl-minimum-episodes 1000 --seed 1

eval_LLMs.sh ADDED Viewed

	@@ -0,0 +1,42 @@

+# AsocialBox (6 in cont)
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model text-ada-001 --env-args size 7 --skip-check --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model gpt-3.5-turbo-0613 --env-args size 7 --skip-check --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model gpt-3.5-turbo-instruct-0914 --env-args size 7  --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model gpt-4-0613 --env-args size 7 --skip-check --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model text-davinci-003 --env-args size 7 --skip-check --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model bloom_560m --env-args size 7 --skip-check --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model random --env-args size 7 --skip-check --env-name SocialAI-AsocialBoxInformationSeekingParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl
+### ColorBoxes
+# 10 episodes
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model text-ada-001 --env-args size 7 --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model gpt-3.5-turbo-0613 --env-args size 7 --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model gpt-4-0613 --env-args size 7 --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model text-davinci-003 --env-args size 7 --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model bloom_560m --env-args size 7 --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model random --env-args size 7 --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+# 20 episodes
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model gpt-4-0613 --env-args size 7  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model gpt-3.5-turbo-0613 --env-args size 7  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model gpt-3.5-turbo-instruct-0914 --env-args size 7  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model text-ada-001 --env-args size 7  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model text-davinci-003 --env-args size 7  --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model random --env-args size 7  --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model bloom_560m --env-args size 7  --skip-check --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl
+### ColorBoxes generalization
+# 10 episodes generalization
+#python -m scripts.LLM_test   --episodes 10 --max-steps 15 --model gpt-4-0613 --env-args size 7  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_SocialAI-ColorLLMCSParamEnv-v1_2023_09_18_17_24_24/episodes.pkl
+# 20 episodes generalization
+#python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model gpt-4-0613 --env-args size 7  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_SocialAI-ColorLLMCSParamEnv-v1_2023_09_18_17_24_24/episodes.pkl
+python -m scripts.LLM_test   --episodes 20 --max-steps 15 --model random --env-args size 7 --skip-check  --env-name SocialAI-ColorBoxesLLMCSParamEnv-v1 --in-context-path llm_data/in_context_examples/in_context_SocialAI-ColorLLMCSParamEnv-v1_2023_09_18_17_24_24/episodes.pkl

gpuh.py ADDED Viewed

	@@ -0,0 +1,99 @@

+point_conf = 0.3
+feedb_conf = 0.3
+color_conf = 0.3
+ja_point_conf = 0.3
+ja_feedb_conf = 0.3
+ja_color_conf = 0.3
+emul_conf = 2
+rri_conf = 0.2*4
+op_conf = 0.5 * 3  # hidden, normal, expert
+form_conf = 0.2*8
+scaf_conf = 0.2*4
+configurations = point_conf + feedb_conf + color_conf + ja_point_conf + ja_feedb_conf + ja_color_conf + emul_conf + rri_conf + op_conf + form_conf + scaf_conf
+# configurations = 1
+#
+configurations = 0.3 + 0.3 + 0.3 + 8*0.3
+# configurations = 3*0.2 + 0.5 + 0.04*2 + 0.5*2
+# num_of_trains = 3 + 3 + 2 + 4 + 3 + 8 + 4
+# print("num_of_trains:", num_of_trains)
+configurations = 0.01 * 6
+print(f"Number of trains: {configurations}")
+frames = 100_000_000
+# frames = 75_000_000
+# frames = 50_000_000
+seeds = 8
+# seeds = 4
+print(f"Number of seeds: {seeds}")
+# ## one GPU
+# fps = 300
+fps = 580  # ssh jz
+# fps = 500 # ssh pf
+gpus_per_seed = 1
+print(f"\n{gpus_per_seed} GPU")
+seed_frames = frames
+one_seed_time = 1_000_000 / (fps * 60 * 60)
+print("train time (1M frames): {}h - {:d}d {:.0f}h".format(
+    one_seed_time,
+    int(one_seed_time // 24), one_seed_time % 24)
+)
+total_gpuh = configurations*seeds*gpus_per_seed*frames/(fps*60*60)
+print("total gpu hours 1 gpups:", total_gpuh)
+# ## half a GPU
+#
+# fps = 275
+# fps = 370  # ssh jz
+# # fps = 300 # ssh pf
+# gpus_per_seed = 0.5
+#
+# print(f"\n{gpus_per_seed} GPU")
+# one_seed = frames/(fps*60*60)
+# print("train time: {}h - {:d}d {:.0f}h".format(one_seed, int(one_seed // 24), one_seed % 24))
+#
+# total_gpuh = configurations*seeds*gpus_per_seed*frames/(fps*60*60)
+# print("total gpu hours 0.5 gpups:", total_gpuh)
+#
+# # ## 1/3 of a GPU
+# fps = 250 # ssh jz 1/3
+# # fps = 250 # ssh 1/3 pf
+#
+# gpus_per_seed = 0.33
+# print(f"\n{gpus_per_seed} GPU")
+#
+# one_seed = frames/(fps*60*60)
+# print("train time: {}h - {:d}d {:.0f}h".format(one_seed, int(one_seed // 24), one_seed % 24))
+#
+# total_gpuh = configurations*seeds*gpus_per_seed*frames/(fps*60*60)
+# print("total gpu hours 0.33 gpups:", total_gpuh)
+#
+#
+# # ## 1/4 of gpu
+# # fps = 190 # ssh 1/4 pf
+# #
+# # gpus_per_seed = 0.25
+# # print(f"\n{gpus_per_seed} GPU")
+# #
+# # one_seed = frames/(fps*60*60)
+# # print("train time: {}h - {:d}d {:.0f}h".format(one_seed, int(one_seed // 24), one_seed % 24))
+# #
+# # total_gpuh = configurations*seeds*gpus_per_seed*frames/(fps*60*60)
+# # print("total gpu hours 0.25 gpups:", total_gpuh)

gym-minigrid/.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+*.pyc
+*__pycache__
+*egg-info
+trained_models
+# PyPI
+build/*
+dist/*
+.idea/

gym-minigrid/.travis.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+language: python
+python:
+  - "3.5"
+# command to install dependencies
+install:
+  - pip3 install -e .
+# command to run tests
+script: ./run_tests.py

gym-minigrid/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2019 Maxime Chevalier-Boisvert
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

gym-minigrid/README.md ADDED Viewed

	@@ -0,0 +1,511 @@

+# Minimalistic Gridworld Environment (MiniGrid)
+[![Build Status](https://travis-ci.org/maximecb/gym-minigrid.svg?branch=master)](https://travis-ci.org/maximecb/gym-minigrid)
+There are other gridworld Gym environments out there, but this one is
+designed to be particularly simple, lightweight and fast. The code has very few
+dependencies, making it less likely to break or fail to install. It loads no
+external sprites/textures, and it can run at up to 5000 FPS on a Core i7
+laptop, which means you can run your experiments faster. A known-working RL
+implementation can be found [in this repository](https://github.com/lcswillems/torch-rl).
+Requirements:
+- Python 3.5+
+- OpenAI Gym
+- NumPy
+- Matplotlib (optional, only needed for display)
+Please use this bibtex if you want to cite this repository in your publications:
+```
+@misc{gym_minigrid,
+  author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman},
+  title = {Minimalistic Gridworld Environment for OpenAI Gym},
+  year = {2018},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/maximecb/gym-minigrid}},
+}
+```
+List of publications & submissions using MiniGrid or BabyAI (please open a pull request to add missing entries):
+- [Prioritized Level Replay](https://arxiv.org/pdf/2010.03934.pdf) (FAIR, October 2020)
+- [Learning with AMIGO: Adversarially Motivated Intrinsic Goals](https://arxiv.org/pdf/2006.12122.pdf) (MIT, FAIR, June 2020)
+- [RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments](https://openreview.net/forum?id=rkg-TJBFPB) (FAIR, ICLR 2020)
+- [Learning to Request Guidance in Emergent Communication](https://arxiv.org/pdf/1912.05525.pdf) (University of Amsterdam, Dec 2019)
+- [Working Memory Graphs](https://arxiv.org/abs/1911.07141) (MSR, Nov 2019)
+- [Fast Task-Adaptation for Tasks Labeled Using
+Natural Language in Reinforcement Learning](https://arxiv.org/pdf/1910.04040.pdf) (Oct 2019, University of Antwerp)
+- [Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck
+](https://arxiv.org/abs/1910.12911) (MSR, NeurIPS, Oct 2019)
+- [Recurrent Independent Mechanisms](https://arxiv.org/pdf/1909.10893.pdf) (Mila, Sept 2019)
+- [Learning Effective Subgoals with Multi-Task Hierarchical Reinforcement Learning](http://surl.tirl.info/proceedings/SURL-2019_paper_10.pdf) (Tsinghua University, August 2019)
+- [Mastering emergent language: learning to guide in simulated navigation](https://arxiv.org/abs/1908.05135) (University of Amsterdam, Aug 2019)
+- [Transfer Learning by Modeling a Distribution over Policies](https://arxiv.org/abs/1906.03574) (Mila, June 2019)
+- [Reinforcement Learning with Competitive Ensembles
+of Information-Constrained Primitives](https://arxiv.org/abs/1906.10667) (Mila, June 2019)
+- [Learning distant cause and effect using only local and immediate credit assignment](https://arxiv.org/abs/1905.11589) (Incubator 491, May 2019)
+- [Practical Open-Loop Optimistic Planning](https://arxiv.org/abs/1904.04700) (INRIA, April 2019)
+- [Learning World Graphs to Accelerate Hierarchical Reinforcement Learning](https://arxiv.org/abs/1907.00664) (Salesforce Research, 2019)
+- [Variational State Encoding as Intrinsic Motivation in Reinforcement Learning](https://mila.quebec/wp-content/uploads/2019/05/WebPage.pdf) (Mila, TARL 2019)
+- [Unsupervised Discovery of Decision States Through Intrinsic Control](https://tarl2019.github.io/assets/papers/modhe2019unsupervised.pdf) (Georgia Tech, TARL 2019)
+- [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Mila, ICLR 2019)
+- [Unifying Ensemble Methods for Q-learning via Social Choice Theory](https://arxiv.org/pdf/1902.10646.pdf) (Max Planck Institute, Feb 2019)
+- [Planning Beyond The Sensing Horizon Using a Learned Context](https://personalrobotics.cs.washington.edu/workshops/mlmp2018/assets/docs/18_CameraReadySubmission.pdf) (MLMP@IROS, 2018)
+- [Guiding Policies with Language via Meta-Learning](https://arxiv.org/abs/1811.07882) (UC Berkeley, Nov 2018)
+- [On the Complexity of Exploration in Goal-Driven Navigation](https://arxiv.org/abs/1811.06889) (CMU, NeurIPS, Nov 2018)
+- [Transfer and Exploration via the Information Bottleneck](https://openreview.net/forum?id=rJg8yhAqKm) (Mila, Nov 2018)
+- [Creating safer reward functions for reinforcement learning agents in the gridworld](https://gupea.ub.gu.se/bitstream/2077/62445/1/gupea_2077_62445_1.pdf) (University of Gothenburg, 2018)
+- [BabyAI: First Steps Towards Grounded Language Learning With a Human In the Loop](https://arxiv.org/abs/1810.08272) (Mila, ICLR, Oct 2018)
+This environment has been built as part of work done at [Mila](https://mila.quebec). The Dynamic obstacles environment has been added as part of work done at [IAS in TU Darmstadt](https://www.ias.informatik.tu-darmstadt.de/) and the University of Genoa for mobile robot navigation with dynamic obstacles.
+## Installation
+There is now a [pip package](https://pypi.org/project/gym-minigrid/) available, which is updated periodically:
+```
+pip3 install gym-minigrid
+```
+Alternatively, to get the latest version of MiniGrid, you can clone this repository and install the dependencies with `pip3`:
+```
+git clone https://github.com/maximecb/gym-minigrid.git
+cd gym-minigrid
+pip3 install -e .
+```
+## Basic Usage
+There is a UI application which allows you to manually control the agent with the arrow keys:
+```
+./manual_control.py
+```
+The environment being run can be selected with the `--env` option, eg:
+```
+./manual_control.py --env MiniGrid-Empty-8x8-v0
+```
+## Reinforcement Learning
+If you want to train an agent with reinforcement learning, I recommend using the code found in the [torch-rl](https://github.com/lcswillems/torch-rl) repository. This code has been tested and is known to work with this environment. The default hyper-parameters are also known to converge.
+A sample training command is:
+```
+cd torch-rl
+python3 -m scripts.train --env MiniGrid-Empty-8x8-v0 --algo ppo
+```
+## Wrappers
+MiniGrid is built to support tasks involving natural language and sparse rewards.
+The observations are dictionaries, with an 'image' field, partially observable
+view of the environment, a 'mission' field which is a textual string
+describing the objective the agent should reach to get a reward, and a 'direction'
+field which can be used as an optional compass. Using dictionaries makes it
+easy for you to add additional information to observations
+if you need to, without having to encode everything into a single tensor.
+There are a variery of wrappers to change the observation format available in [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py). If your RL code expects one single tensor for observations, take a look at
+`FlatObsWrapper`. There is also an `ImgObsWrapper` that gets rid of the 'mission' field in observations,
+leaving only the image field tensor.
+Please note that the default observation format is a partially observable view of the environment using a
+compact and efficient encoding, with 3 input values per visible grid cell, 7x7x3 values total.
+These values are **not pixels**. If you want to obtain an array of RGB pixels as observations instead,
+use the `RGBImgPartialObsWrapper`. You can use it as follows:
+```
+from gym_minigrid.wrappers import *
+env = gym.make('MiniGrid-Empty-8x8-v0')
+env = RGBImgPartialObsWrapper(env) # Get pixel observations
+env = ImgObsWrapper(env) # Get rid of the 'mission' field
+obs = env.reset() # This now produces an RGB tensor only
+```
+## Design
+Structure of the world:
+- The world is an NxM grid of tiles
+- Each tile in the grid world contains zero or one object
+  - Cells that do not contain an object have the value `None`
+- Each object has an associated discrete color (string)
+- Each object has an associated type (string)
+  - Provided object types are: wall, floor, lava, door, key, ball, box and goal
+- The agent can pick up and carry exactly one object (eg: ball or key)
+- To open a locked door, the agent has to be carrying a key matching the door's color
+Actions in the basic environment:
+- Turn left
+- Turn right
+- Move forward
+- Pick up an object
+- Drop the object being carried
+- Toggle (open doors, interact with objects)
+- Done (task completed, optional)
+Default tile/observation encoding:
+- Each tile is encoded as a 3 dimensional tuple: (OBJECT_IDX, COLOR_IDX, STATE)
+- OBJECT_TO_IDX and COLOR_TO_IDX mapping can be found in [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py)
+- e.g. door STATE -> 0: open, 1: closed, 2: locked
+By default, sparse rewards are given for reaching a green goal tile. A
+reward of 1 is given for success, and zero for failure. There is also an
+environment-specific time step limit for completing the task.
+You can define your own reward function by creating a class derived
+from `MiniGridEnv`. Extending the environment with new object types or new actions
+should be very easy. If you wish to do this, you should take a look at the
+[gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
+## Included Environments
+The environments listed below are implemented in the [gym_minigrid/envs](/gym_minigrid/envs) directory.
+Each environment provides one or more configurations registered with OpenAI gym. Each environment
+is also programmatically tunable in terms of size/complexity, which is useful for curriculum learning
+or to fine-tune difficulty.
+### Empty environment
+Registered configurations:
+- `MiniGrid-Empty-5x5-v0`
+- `MiniGrid-Empty-Random-5x5-v0`
+- `MiniGrid-Empty-6x6-v0`
+- `MiniGrid-Empty-Random-6x6-v0`
+- `MiniGrid-Empty-8x8-v0`
+- `MiniGrid-Empty-16x16-v0`
+<p align="center">
+<img src="/figures/empty-env.png" width=250>
+</p>
+This environment is an empty room, and the goal of the agent is to reach the
+green goal square, which provides a sparse reward. A small penalty is
+subtracted for the number of steps to reach the goal. This environment is
+useful, with small rooms, to validate that your RL algorithm works correctly,
+and with large rooms to experiment with sparse rewards and exploration.
+The random variants of the environment have the agent starting at a random
+position for each episode, while the regular variants have the agent always
+starting in the corner opposite to the goal.
+### Four rooms environment
+Registered configurations:
+- `MiniGrid-FourRooms-v0`
+<p align="center">
+<img src="/figures/four-rooms-env.png" width=380>
+</p>
+Classic four room reinforcement learning environment. The agent must navigate
+in a maze composed of four rooms interconnected by 4 gaps in the walls. To
+obtain a reward, the agent must reach the green goal square. Both the agent
+and the goal square are randomly placed in any of the four rooms.
+### Door & key environment
+Registered configurations:
+- `MiniGrid-DoorKey-5x5-v0`
+- `MiniGrid-DoorKey-6x6-v0`
+- `MiniGrid-DoorKey-8x8-v0`
+- `MiniGrid-DoorKey-16x16-v0`
+<p align="center">
+<img src="/figures/door-key-env.png">
+</p>
+This environment has a key that the agent must pick up in order to unlock
+a goal and then get to the green goal square. This environment is difficult,
+because of the sparse reward, to solve using classical RL algorithms. It is
+useful to experiment with curiosity or curriculum learning.
+### Multi-room environment
+Registered configurations:
+- `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
+- `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
+- `MiniGrid-MultiRoom-N6-v0` (six rooms)
+<p align="center">
+<img src="/figures/multi-room.gif" width=416 height=424>
+</p>
+This environment has a series of connected rooms with doors that must be
+opened in order to get to the next room. The final room has the green goal
+square the agent must get to. This environment is extremely difficult to
+solve using RL alone. However, by gradually increasing the number of
+rooms and building a curriculum, the environment can be solved.
+### Fetch environment
+Registered configurations:
+- `MiniGrid-Fetch-5x5-N2-v0`
+- `MiniGrid-Fetch-6x6-N2-v0`
+- `MiniGrid-Fetch-8x8-N3-v0`
+<p align="center">
+<img src="/figures/fetch-env.png" width=450>
+</p>
+This environment has multiple objects of assorted types and colors. The
+agent receives a textual string as part of its observation telling it
+which object to pick up. Picking up the wrong object produces a negative
+reward.
+### Go-to-door environment
+Registered configurations:
+- `MiniGrid-GoToDoor-5x5-v0`
+- `MiniGrid-GoToDoor-6x6-v0`
+- `MiniGrid-GoToDoor-8x8-v0`
+<p align="center">
+<img src="/figures/gotodoor-6x6.png" width=400>
+</p>
+This environment is a room with four doors, one on each wall. The agent
+receives a textual (mission) string as input, telling it which door to go to,
+(eg: "go to the red door"). It receives a positive reward for performing the
+`done` action next to the correct door, as indicated in the mission string.
+### Put-near environment
+Registered configurations:
+- `MiniGrid-PutNear-6x6-N2-v0`
+- `MiniGrid-PutNear-8x8-N3-v0`
+The agent is instructed through a textual string to pick up an object and
+place it next to another object. This environment is easy to solve with two
+objects, but difficult to solve with more, as it involves both textual
+understanding and spatial reasoning involving multiple objects.
+### Red and blue doors environment
+Registered configurations:
+- `MiniGrid-RedBlueDoors-6x6-v0`
+- `MiniGrid-RedBlueDoors-8x8-v0`
+The purpose of this environment is to test memory.
+The agent is randomly placed within a room with one red and one blue door
+facing opposite directions. The agent has to open the red door and then open
+the blue door, in that order.  The agent, when facing one door, cannot see
+the door behind him. Hence, the agent needs to remember whether or not he has
+previously opened the other door in order to reliably succeed at completing
+the task.
+### Memory environment
+Registered configurations:
+- `MiniGrid-MemoryS17Random-v0`
+- `MiniGrid-MemoryS13Random-v0`
+- `MiniGrid-MemoryS13-v0`
+- `MiniGrid-MemoryS11-v0`
+- `MiniGrid-MemoryS9-v0`
+- `MiniGrid-MemoryS7-v0`
+This environment is a memory test. The agent starts in a small room
+where it sees an object. It then has to go through a narrow hallway
+which ends in a split. At each end of the split there is an object,
+one of which is the same as the object in the starting room. The
+agent has to remember the initial object, and go to the matching
+object at split.
+### Locked room environment
+Registed configurations:
+- `MiniGrid-LockedRoom-v0`
+The environment has six rooms, one of which is locked. The agent receives
+a textual mission string as input, telling it which room to go to in order
+to get the key that opens the locked room. It then has to go into the locked
+room in order to reach the final goal. This environment is extremely difficult
+to solve with vanilla reinforcement learning alone.
+### Key corridor environment
+Registed configurations:
+- `MiniGrid-KeyCorridorS3R1-v0`
+- `MiniGrid-KeyCorridorS3R2-v0`
+- `MiniGrid-KeyCorridorS3R3-v0`
+- `MiniGrid-KeyCorridorS4R3-v0`
+- `MiniGrid-KeyCorridorS5R3-v0`
+- `MiniGrid-KeyCorridorS6R3-v0`
+<p align="center">
+    <img src="figures/KeyCorridorS3R1.png" width="250">
+    <img src="figures/KeyCorridorS3R2.png" width="250">
+    <img src="figures/KeyCorridorS3R3.png" width="250">
+    <img src="figures/KeyCorridorS4R3.png" width="250">
+    <img src="figures/KeyCorridorS5R3.png" width="250">
+    <img src="figures/KeyCorridorS6R3.png" width="250">
+</p>
+This environment is similar to the locked room environment, but there are
+multiple registered environment configurations of increasing size,
+making it easier to use curriculum learning to train an agent to solve it.
+The agent has to pick up an object which is behind a locked door. The key is
+hidden in another room, and the agent has to explore the environment to find
+it. The mission string does not give the agent any clues as to where the
+key is placed. This environment can be solved without relying on language.
+### Unlock environment
+Registed configurations:
+- `MiniGrid-Unlock-v0`
+<p align="center">
+    <img src="figures/Unlock.png" width="200">
+</p>
+The agent has to open a locked door. This environment can be solved without
+relying on language.
+### Unlock pickup environment
+Registed configurations:
+- `MiniGrid-UnlockPickup-v0`
+<p align="center">
+    <img src="figures/UnlockPickup.png" width="250">
+</p>
+The agent has to pick up a box which is placed in another room, behind a
+locked door. This environment can be solved without relying on language.
+### Blocked unlock pickup environment
+Registed configurations:
+- `MiniGrid-BlockedUnlockPickup-v0`
+<p align="center">
+    <img src="figures/BlockedUnlockPickup.png" width="250">
+</p>
+The agent has to pick up a box which is placed in another room, behind a
+locked door. The door is also blocked by a ball which the agent has to move
+before it can unlock the door. Hence, the agent has to learn to move the ball,
+pick up the key, open the door and pick up the object in the other room.
+This environment can be solved without relying on language.
+## Obstructed maze environment
+Registered configurations:
+- `MiniGrid-ObstructedMaze-1Dl-v0`
+- `MiniGrid-ObstructedMaze-1Dlh-v0`
+- `MiniGrid-ObstructedMaze-1Dlhb-v0`
+- `MiniGrid-ObstructedMaze-2Dl-v0`
+- `MiniGrid-ObstructedMaze-2Dlh-v0`
+- `MiniGrid-ObstructedMaze-2Dlhb-v0`
+- `MiniGrid-ObstructedMaze-1Q-v0`
+- `MiniGrid-ObstructedMaze-2Q-v0`
+- `MiniGrid-ObstructedMaze-Full-v0`
+<p align="center">
+  <img src="figures/ObstructedMaze-1Dl.png" width="250">
+  <img src="figures/ObstructedMaze-1Dlh.png" width="250">
+  <img src="figures/ObstructedMaze-1Dlhb.png" width="250">
+  <img src="figures/ObstructedMaze-2Dl.png" width="100">
+  <img src="figures/ObstructedMaze-2Dlh.png" width="100">
+  <img src="figures/ObstructedMaze-2Dlhb.png" width="100">
+  <img src="figures/ObstructedMaze-1Q.png" width="250">
+  <img src="figures/ObstructedMaze-2Q.png" width="250">
+  <img src="figures/ObstructedMaze-4Q.png" width="250">
+</p>
+The agent has to pick up a box which is placed in a corner of a 3x3 maze.
+The doors are locked, the keys are hidden in boxes and doors are obstructed
+by balls. This environment can be solved without relying on language.
+## Distributional shift environment
+Registered configurations:
+- `MiniGrid-DistShift1-v0`
+- `MiniGrid-DistShift2-v0`
+This environment is based on one of the DeepMind [AI safety gridworlds](https://github.com/deepmind/ai-safety-gridworlds).
+The agent starts in the top-left corner and must reach the goal which is in the top-right corner, but has to avoid stepping
+into lava on its way. The aim of this environment is to test an agent's ability to generalize. There are two slightly
+different variants of the environment, so that the agent can be trained on one variant and tested on the other.
+<p align="center">
+  <img src="figures/DistShift1.png" width="200">
+  <img src="figures/DistShift2.png" width="200">
+</p>
+## Lava gap environment
+Registered configurations:
+- `MiniGrid-LavaGapS5-v0`
+- `MiniGrid-LavaGapS6-v0`
+- `MiniGrid-LavaGapS7-v0`
+<p align="center">
+  <img src="figures/LavaGapS6.png" width="200">
+</p>
+The agent has to reach the green goal square at the opposite corner of the room,
+and must pass through a narrow gap in a vertical strip of deadly lava. Touching
+the lava terminate the episode with a zero reward. This environment is useful
+for studying safety and safe exploration.
+## Lava crossing environment
+Registered configurations:
+- `MiniGrid-LavaCrossingS9N1-v0`
+- `MiniGrid-LavaCrossingS9N2-v0`
+- `MiniGrid-LavaCrossingS9N3-v0`
+- `MiniGrid-LavaCrossingS11N5-v0`
+<p align="center">
+  <img src="figures/LavaCrossingS9N1.png" width="200">
+  <img src="figures/LavaCrossingS9N2.png" width="200">
+  <img src="figures/LavaCrossingS9N3.png" width="200">
+  <img src="figures/LavaCrossingS11N5.png" width="250">
+</p>
+The agent has to reach the green goal square on the other corner of the room
+while avoiding rivers of deadly lava which terminate the episode in failure.
+Each lava stream runs across the room either horizontally or vertically, and
+has a single crossing point which can be safely used;  Luckily, a path to the
+goal is guaranteed to exist. This environment is useful for studying safety and
+safe exploration.
+## Simple crossing environment
+Registered configurations:
+- `MiniGrid-SimpleCrossingS9N1-v0`
+- `MiniGrid-SimpleCrossingS9N2-v0`
+- `MiniGrid-SimpleCrossingS9N3-v0`
+- `MiniGrid-SimpleCrossingS11N5-v0`
+<p align="center">
+  <img src="figures/SimpleCrossingS9N1.png" width="200">
+  <img src="figures/SimpleCrossingS9N2.png" width="200">
+  <img src="figures/SimpleCrossingS9N3.png" width="200">
+  <img src="figures/SimpleCrossingS11N5.png" width="250">
+</p>
+Similar to the `LavaCrossing` environment, the agent has to reach the green
+goal square on the other corner of the room, however lava is replaced by
+walls. This MDP is therefore much easier and and maybe useful for quickly
+testing your algorithms.
+### Dynamic obstacles environment
+Registered configurations:
+- `MiniGrid-Dynamic-Obstacles-5x5-v0`
+- `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
+- `MiniGrid-Dynamic-Obstacles-6x6-v0`
+- `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
+- `MiniGrid-Dynamic-Obstacles-8x8-v0`
+- `MiniGrid-Dynamic-Obstacles-16x16-v0`
+<p align="center">
+<img src="/figures/dynamic_obstacles.gif">
+</p>
+This environment is an empty room with moving obstacles. The goal of the agent is to reach the green goal square without colliding with any obstacle. A large penalty is subtracted if the agent collides with an obstacle and the episode finishes. This environment is useful to test Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in Partial Observability.

gym-minigrid/benchmark.py ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env python3
+import time
+import argparse
+import gym_minigrid
+import gym
+from gym_minigrid.wrappers import *
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--env-name",
+    dest="env_name",
+    help="gym environment to load",
+    default='MiniGrid-LavaGapS7-v0'
+)
+parser.add_argument("--num_resets", default=200)
+parser.add_argument("--num_frames", default=5000)
+args = parser.parse_args()
+env = gym.make(args.env_name)
+# Benchmark env.reset
+t0 = time.time()
+for i in range(args.num_resets):
+    env.reset()
+t1 = time.time()
+dt = t1 - t0
+reset_time = (1000 * dt) / args.num_resets
+# Benchmark rendering
+t0 = time.time()
+for i in range(args.num_frames):
+    env.render('rgb_array')
+t1 = time.time()
+dt = t1 - t0
+frames_per_sec = args.num_frames / dt
+# Create an environment with an RGB agent observation
+env = gym.make(args.env_name)
+env = RGBImgPartialObsWrapper(env)
+env = ImgObsWrapper(env)
+# Benchmark rendering
+t0 = time.time()
+for i in range(args.num_frames):
+    obs, reward, done, info = env.step(0)
+t1 = time.time()
+dt = t1 - t0
+agent_view_fps = args.num_frames / dt
+print('Env reset time: {:.1f} ms'.format(reset_time))
+print('Rendering FPS : {:.0f}'.format(frames_per_sec))
+print('Agent view FPS: {:.0f}'.format(agent_view_fps))

gym-minigrid/gym_minigrid/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Import the envs module so that envs register themselves
+import gym_minigrid.envs
+import gym_minigrid.social_ai_envs
+# Import wrappers so it's accessible when installing with pip
+import gym_minigrid.wrappers

gym-minigrid/gym_minigrid/backup_envs/bobo.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import numpy as np
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class Peer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    """
+    def __init__(self, color, name, env, knowledgeable=False):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = 0
+        self.env = env
+        self.knowledgeable = knowledgeable
+        self.npc_actions = []
+        self.dancing_step_idx = 0
+        self.actions = MiniGridEnv.Actions
+        self.add_npc_direction = True
+        self.available_moves = [self.rotate_left, self.rotate_right, self.go_forward, self.toggle_action]
+        self.exited = False
+    def step(self):
+        if self.exited:
+            return
+        if all(np.array(self.cur_pos) == np.array(self.env.door_pos)):
+            # todo: disappear
+            # todo: close door
+            self.env.grid.set(*self.cur_pos, self.env.object)
+            self.cur_pos = np.array([np.nan, np.nan])
+            self.env.object.toggle(self.env, self.cur_pos)
+            self.exited = True
+        elif self.knowledgeable:
+            if all(self.front_pos == self.env.door_pos):
+                # in front of door
+                if self.env.object.is_open:
+                    self.go_forward()
+                else:
+                    self.toggle_action()
+            else:
+                if (self.cur_pos[0] == self.env.door_pos[0]) or (self.cur_pos[1] == self.env.door_pos[1]):
+                    # is either in the correct row on in the correct column
+                    next_wanted_position = self.env.door_pos
+                else:
+                    # choose the midpoint
+                    for cand_x, cand_y in [
+                        (self.cur_pos[0], self.env.door_pos[1]),
+                        (self.env.door_pos[0], self.cur_pos[1])
+                    ]:
+                        if (
+                                cand_x > 0 and cand_x < self.env.wall_x
+                        ) and (
+                                cand_y > 0 and cand_y < self.env.wall_y
+                        ):
+                            next_wanted_position = (cand_x, cand_y)
+                if self.cur_pos[1] == next_wanted_position[1]:
+                    # same y
+                    if self.cur_pos[0] < next_wanted_position[0]:
+                        wanted_dir = 0
+                    else:
+                        wanted_dir = 2
+                    if self.npc_dir == wanted_dir:
+                        self.go_forward()
+                    else:
+                        self.rotate_left()
+                elif self.cur_pos[0] == next_wanted_position[0]:
+                    # same x
+                    if self.cur_pos[1] < next_wanted_position[1]:
+                        wanted_dir = 1
+                    else:
+                        wanted_dir = 3
+                    if self.npc_dir == wanted_dir:
+                        self.go_forward()
+                    else:
+                        self.rotate_left()
+                else:
+                    raise ValueError("Something is wrong.")
+        else:
+            self.env._rand_elem(self.available_moves)()
+class BoboGrammar(object):
+    templates = ["Move your", "Shake your"]
+    things = ["body", "head"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class BoboEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        diminished_reward=True,
+        step_penalty=False,
+        knowledgeable=False,
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.knowledgeable = knowledgeable
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *BoboGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        self.wall_x = width - 1
+        self.wall_y = height - 1
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        door_color = self._rand_elem(COLOR_NAMES)
+        wall_for_door = self._rand_int(0, 4)
+        if wall_for_door < 2:
+            w = self._rand_int(1, width-1)
+            h = height-1 if wall_for_door == 0 else 0
+        else:
+            w = width-1 if wall_for_door == 3 else 0
+            h = self._rand_int(1, height-1)
+        self.door_pos = (w, h)
+        self.door = Door(door_color)
+        self.grid.set(*self.door_pos, self.door)
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        self.peer = Peer(color, "Jim", self, knowledgeable=self.knowledgeable)
+        # Place it on the middle left side of the room
+        peer_pos = np.array((self._rand_int(1, width - 1), self._rand_int(1, height - 1)))
+        self.grid.set(*peer_pos, self.peer)
+        self.peer.init_pos = peer_pos
+        self.peer.cur_pos = peer_pos
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Generate the mission string
+        self.mission = 'watch dancer and repeat his moves afterwards'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        if np.isnan(p_action):
+            pass
+        if p_action == self.actions.done:
+            done = True
+        self.peer.step()
+        if all(self.agent_pos == self.door_pos):
+            reward = self._reward()
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print("conversation:\n", self.conversation)
+        print("utterance_history:\n", self.utterance_history)
+        self.window.set_caption(self.conversation, [self.peer.name])
+        return obs
+class Bobo8x8Env(BoboEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class Bobo6x6Env(BoboEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# knowledgeable
+class BoboKnowledgeableEnv(BoboEnv):
+    def __init__(self):
+        super().__init__(size=5, knowledgeable=True)
+class BoboKnowledgeable6x6Env(BoboEnv):
+    def __init__(self):
+        super().__init__(size=6, knowledgeable=True)
+class BoboKnowledgeable8x8Env(BoboEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True)
+register(
+    id='MiniGrid-Bobo-5x5-v0',
+    entry_point='gym_minigrid.envs:BoboEnv'
+)
+register(
+    id='MiniGrid-Bobo-6x6-v0',
+    entry_point='gym_minigrid.envs:Bobo6x6Env'
+)
+register(
+    id='MiniGrid-Bobo-8x8-v0',
+    entry_point='gym_minigrid.envs:Bobo8x8Env'
+)
+register(
+    id='MiniGrid-BoboKnowledgeable-5x5-v0',
+    entry_point='gym_minigrid.envs:BoboKnowledgeableEnv'
+)
+register(
+    id='MiniGrid-BoboKnowledgeable-6x6-v0',
+    entry_point='gym_minigrid.envs:BoboKnowledgeable6x6Env'
+)
+register(
+    id='MiniGrid-BoboKnowledgeable-8x8-v0',
+    entry_point='gym_minigrid.envs:BoboKnowledgeable8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/cointhief.py ADDED Viewed

	@@ -0,0 +1,431 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class Thief(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    NPC executes a sequence of movement and utterances
+    """
+    def __init__(self, color, name, env, thief_pos, hidden_npc=False, tag_visible_coins=False, view_size=5, look_around=True):
+        super().__init__(color)
+        self.name = name
+        self.npc_type = 0
+        self.env = env
+        self.cur_pos = thief_pos
+        self.init_pos = thief_pos
+        self.view_size = view_size
+        self.npc_dir = self._look_at_agent()  # Thief stares at its victim
+        self.init_dir = self.npc_dir
+        self.actions = self.env.possible_actions
+        self.tag_visible_coins = tag_visible_coins
+        self.nb_given_coins = None
+        self.look_around = look_around
+        if self.look_around:  # randomly select in which direction NPC is looking around
+            if np.random.random() > 0.5:  # will look left
+                self.look = self.rotate_left
+                self.look_back = self.rotate_right
+            else:  # will look right
+                self.look = self.rotate_right
+                self.look_back = self.rotate_left
+        self.nb_seen_coins = self._count_coins()  # This is how much coins Thief wants, at least
+        self.add_npc_direction = True
+        self.nb_steps = 0
+        self.hidden_npc = hidden_npc
+    def step(self, agent_action, agent_utterance):
+        agent_disobeyed = False
+        agent_gave_coins = False
+        utterance = None
+        if self.nb_steps == 0:
+            utterance = "Freeze! Give me all the coins you have!"
+        if self.nb_steps >= 0 and self.look_around:
+            if self.npc_dir == self.init_dir:  # start to look around
+                self.look()
+            else:  # resume looking to agent
+                self.look_back()
+        if not(agent_utterance is None):
+            self.nb_given_coins = int(agent_utterance[-2])
+            if self.nb_given_coins >= self.nb_seen_coins:
+                agent_gave_coins = True
+            else:  # agent gave not enough coins
+                agent_disobeyed = True
+        # The thief forbids the agent to move, apart from looking around (rotating)
+        if not (np.isnan(agent_action) or agent_action == self.actions.left or agent_action == self.actions.right):
+            agent_disobeyed = True
+        self.nb_steps += 1
+        return agent_disobeyed, agent_gave_coins, utterance
+    def get_status_str(self):
+        return "thief sees: {} \n agent gives: {}".format(self.nb_seen_coins, self.nb_given_coins)
+    def _count_coins(self):
+        # get seen coins
+        coins_pos = self.get_pos_visible_coins()
+        if self.look_around:
+            self.look()
+            # add coins visible from this new direction
+            coins_pos += self.get_pos_visible_coins()
+            # remove coins that we already saw
+            if len(coins_pos) > 0:
+                coins_pos = np.unique(coins_pos, axis=0).tolist()
+            self.look_back()
+        return len(coins_pos)
+    def _look_at_agent(self):
+        npc_dir = None
+        ax, ay = self.env.agent_pos
+        tx, ty = self.cur_pos
+        delta_x, delta_y = ax - tx, ay - ty
+        if delta_x == 1:
+            npc_dir = 0
+        elif delta_x == -1:
+            npc_dir = 2
+        elif delta_y == 1:
+            npc_dir = 1
+        elif delta_y == -1:
+            npc_dir = 3
+        else:
+            raise NotImplementedError
+        return npc_dir
+    def gen_npc_obs_grid(self):
+        """
+                Generate the sub-grid observed by the npc.
+                This method also outputs a visibility mask telling us which grid
+                cells the npc can actually see.
+        """
+        view_size = self.view_size
+        topX, topY, botX, botY = self.env.get_view_exts(dir=self.npc_dir, view_size=view_size, pos=self.cur_pos)
+        grid = self.env.grid.slice(topX, topY, view_size, view_size)
+        for i in range(self.npc_dir + 1):
+            grid = grid.rotate_left()
+        # Process occluders and visibility
+        # Note that this incurs some performance cost
+        if not self.env.see_through_walls:
+            vis_mask = grid.process_vis(agent_pos=(view_size // 2, view_size - 1))
+        else:
+            vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)
+        # Make it so the agent sees what it's carrying
+        # We do this by placing the carried object at the agent's position
+        # in the agent's partially observable view
+        # agent_pos = grid.width // 2, grid.height - 1
+        # if self.carrying:
+        #     grid.set(*agent_pos, self.carrying)
+        # else:
+        #     grid.set(*agent_pos, None)
+        return grid, vis_mask
+    def get_pos_visible_coins(self):
+        """
+        Generate the npc's view (partially observable, low-resolution encoding)
+        return the list of unique visible coins
+        """
+        grid, vis_mask = self.gen_npc_obs_grid()
+        coins_pos = []
+        for obj in grid.grid:
+            if isinstance(obj, Ball):
+                coins_pos.append(obj.cur_pos)
+                if self.tag_visible_coins:
+                    obj.tag()
+        return coins_pos
+    def can_overlap(self):
+        # If the NPC is hidden, agent can overlap on it
+        return self.hidden_npc
+class CoinThiefGrammar(object):
+    templates = ["Here is"]
+    things = ["0","1","2","3","4","5","6"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+    @classmethod
+    def random_utterance(cls):
+        return np.random.choice(cls.templates) + " " + np.random.choice(cls.things) + " "
+class ThiefActions(IntEnum):
+    # Turn left, turn right, move forward
+    left = 0
+    right = 1
+    forward = 2
+class CoinThiefEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False,
+        hidden_npc=False,
+        max_steps=20,
+        full_obs=False,
+        few_actions=False,
+        tag_visible_coins=False,
+        nb_coins=6,
+        npc_view_size=5,
+        npc_look_around=True
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.hidden_npc = hidden_npc
+        self.few_actions = few_actions
+        self.possible_actions = ThiefActions if self.few_actions else MiniGridEnv.Actions
+        self.nb_coins = nb_coins
+        self.tag_visible_coins = tag_visible_coins
+        self.npc_view_size = npc_view_size
+        self.npc_look_around = npc_look_around
+        if max_steps is None:
+            max_steps = 5*size**2
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            full_obs=full_obs,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(self.possible_actions),
+                *CoinThiefGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        # width = self._rand_int(5, width+1)
+        # height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Get possible near-agent positions, and place thief in one of them
+        ax, ay = self.agent_pos
+        near_agent_pos = [[ax, ay + 1], [ax, ay - 1], [ax - 1, ay], [ax + 1, ay]]
+        # get empty cells positions
+        available_pos = []
+        for p in near_agent_pos:
+            if self.grid.get(*p) is None:
+                available_pos.append(p)
+        thief_pos = self._rand_elem(available_pos)
+        # Add randomly placed coins
+        # Types and colors of objects we can generate
+        types = ['ball']
+        objs = []
+        objPos = []
+        # Until we have generated all the objects
+        while len(objs) < self.nb_coins:
+            objType = self._rand_elem(types)
+            objColor = 'yellow'
+            if objType == 'ball':
+                obj = Ball(objColor)
+            else:
+                raise NotImplementedError
+            pos = self.place_obj(obj, reject_fn=lambda env,pos: pos.tolist() == thief_pos)
+            objs.append((objType, objColor))
+            objPos.append(pos)
+        # Set a randomly coloured Thief NPC next to the agent
+        color = self._rand_elem(COLOR_NAMES)
+        self.thief = Thief(color, "Eve", self, thief_pos,
+                           hidden_npc=self.hidden_npc,
+                           tag_visible_coins=self.tag_visible_coins,
+                           view_size=self.npc_view_size,
+                           look_around=self.npc_look_around)
+        self.grid.set(*thief_pos, self.thief)
+        # Generate the mission string
+        self.mission = 'save as much coins as possible'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+        self.outcome_info = None
+    def step(self, action):
+        p_action = action[0] if np.isnan(action[0]) else int(action[0])
+        if len(action) == 1:  # agent cannot speak
+            utterance_action = [np.nan, np.nan]
+        else:
+            utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        if speak_flag:
+            utterance = CoinThiefGrammar.construct_utterance(utterance_action)
+            self.conversation += "{}: {} \n".format("Agent", utterance)
+        # Don't let the agent open any doors
+        if not self.few_actions and p_action == self.actions.toggle:
+            done = True
+        if not self.few_actions and p_action == self.actions.done:
+            done = True
+        # npc's turn
+        agent_disobeyed, agent_gave_coins, npc_utterance = self.thief.step(p_action, utterance if speak_flag else None)
+        if self.hidden_npc:
+            npc_utterance = None
+        if npc_utterance:
+            self.utterance += "{} \n".format(npc_utterance)
+            self.conversation += "{}: {} \n".format(self.thief.name, npc_utterance)
+        if agent_disobeyed:
+            done = True
+        if agent_gave_coins:
+            done = True
+            if self.thief.nb_seen_coins == self.thief.nb_given_coins:
+                reward = self._reward()
+                self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward,1))
+        if done and reward == 0:
+            self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        if self.hidden_npc:
+            # remove npc from agent view
+            npc_obs_idx = np.argwhere(obs['image'] == 11)
+            if npc_obs_idx.size != 0:  # agent sees npc
+                obs['image'][npc_obs_idx[0][0], npc_obs_idx[0][1], :] = [1, 0, 0, 0]
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print("conversation:\n", self.conversation)
+        print("utterance_history:\n", self.utterance_history)
+        self.window.clear_text()  # erase previous text
+        self.window.set_caption(self.conversation)  # overwrites super class caption
+        self.window.ax.set_title(self.thief.get_status_str(), loc="left")
+        if self.outcome_info:
+            color = None
+            if "SUCCESS" in self.outcome_info:
+                color = "lime"
+            elif "FAILURE" in self.outcome_info:
+                color = "red"
+            self.window.add_text(*(0.01, 0.85, self.outcome_info),
+                                 **{'fontsize':15, 'color':color, 'weight':"bold"})
+        self.window.show_img(obs)  # re-draw image to add changes to window
+        return obs
+class CoinThief8x8Env(CoinThiefEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, **kwargs)
+class CoinThief6x6Env(CoinThiefEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
+register(
+    id='MiniGrid-CoinThief-5x5-v0',
+    entry_point='gym_minigrid.envs:CoinThiefEnv'
+)
+register(
+    id='MiniGrid-CoinThief-6x6-v0',
+    entry_point='gym_minigrid.envs:CoinThief6x6Env'
+)
+register(
+    id='MiniGrid-CoinThief-8x8-v0',
+    entry_point='gym_minigrid.envs:CoinThief8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/dancewithonenpc.py ADDED Viewed

	@@ -0,0 +1,344 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class Dancer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    NPC executes a sequence of movement and utterances
+    """
+    def __init__(self, color, name, env, dancing_pattern=None,
+                 dance_len=3, p_sing=.5, hidden_npc=False, sing_only=False):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = 0
+        self.env = env
+        self.actions = self.env.possible_actions
+        self.p_sing = p_sing
+        self.sing_only = sing_only
+        if self.sing_only:
+            p_sing = 1
+        self.dancing_pattern = dancing_pattern if dancing_pattern else self._gen_dancing_pattern(dance_len, p_sing)
+        self.agent_actions = deque(maxlen=len(self.dancing_pattern))
+        self.movement_id_to_fun = {self.actions.left: self.rotate_left,
+                                   self.actions.right: self.rotate_right,
+                                   self.actions.forward: self.go_forward}
+        # for vizualisation only
+        self.movement_id_to_str = {self.actions.left: "left",
+                                   self.actions.right: "right",
+                                   self.actions.forward: "forward",
+                                   self.actions.pickup: "pickup",
+                                   self.actions.drop: "drop",
+                                   self.actions.toggle: "toggle",
+                                   self.actions.done: "done",
+                                   None: "None"}
+        self.dancing_step_idx = 0
+        self.done_dancing = False
+        self.add_npc_direction = True
+        self.nb_steps = 0
+        self.hidden_npc = hidden_npc
+    def step(self, agent_action, agent_utterance):
+        agent_matched_moves = False
+        utterance = None
+        if self.nb_steps == 0:
+            utterance = "Look at me!"
+        if self.nb_steps >= 2:  # Wait a couple steps before dancing
+            if not self.done_dancing:
+                if self.dancing_step_idx == len(self.dancing_pattern):
+                    self.done_dancing = True
+                    utterance = "Now repeat my moves!"
+                else:
+                    # NPC moves and speaks according to dance step
+                    move_id, utterance = self.dancing_pattern[self.dancing_step_idx]
+                    self.movement_id_to_fun[move_id]()
+                    self.dancing_step_idx += 1
+            else:  # record agent dancing pattern
+                self.agent_actions.append((agent_action, agent_utterance))
+                if not self.sing_only and list(self.agent_actions) == list(self.dancing_pattern):
+                    agent_matched_moves = True
+                if self.sing_only:  # only compare utterances
+                    if [x[1] for x in self.agent_actions] == [x[1] for x in self.dancing_pattern]:
+                        agent_matched_moves = True
+        self.nb_steps += 1
+        return agent_matched_moves, utterance
+    def get_status_str(self):
+        readable_dancing_pattern = [(self.movement_id_to_str[dp[0]], dp[1]) for dp in self.dancing_pattern]
+        readable_agent_actions = [(self.movement_id_to_str[aa[0]], aa[1]) for aa in self.agent_actions]
+        return "dance: {} \n agent: {}".format(readable_dancing_pattern, readable_agent_actions)
+    def _gen_dancing_pattern(self, dance_len, p_sing):
+        available_moves = [self.actions.left, self.actions.right, self.actions.forward]
+        dance_pattern = []
+        for _ in range(dance_len):
+            move = self.env._rand_elem(available_moves)
+            sing = None
+            if np.random.random() < p_sing:
+                sing = DanceWithOneNPCGrammar.random_utterance()
+            dance_pattern.append((move, sing))
+        return dance_pattern
+    def can_overlap(self):
+        # If the NPC is hidden, agent can overlap on it
+        return self.hidden_npc
+class DanceWithOneNPCGrammar(object):
+    templates = ["Move your", "Shake your"]
+    things = ["body", "head"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+    @classmethod
+    def random_utterance(cls):
+        return np.random.choice(cls.templates) + " " + np.random.choice(cls.things) + " "
+class DanceActions(IntEnum):
+    # Turn left, turn right, move forward
+    left = 0
+    right = 1
+    forward = 2
+class DanceWithOneNPCEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False,
+        dance_len=3,
+        hidden_npc=False,
+        p_sing=.5,
+        max_steps=20,
+        full_obs=False,
+        few_actions=False,
+        sing_only=False
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.dance_len = dance_len
+        self.hidden_npc = hidden_npc
+        self.p_sing = p_sing
+        self.few_actions = few_actions
+        self.possible_actions = DanceActions if self.few_actions else MiniGridEnv.Actions
+        self.sing_only = sing_only
+        if max_steps is None:
+            max_steps = 5*size**2
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            full_obs=full_obs,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(self.possible_actions),
+                *DanceWithOneNPCGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        self.dancer = Dancer(color, "Ren", self, dance_len=self.dance_len,
+                             p_sing=self.p_sing, hidden_npc=self.hidden_npc, sing_only=self.sing_only)
+        # Place it on the middle left side of the room
+        left_pos = (int((width / 2) - 1), int(height / 2))
+        #right_pos = [(width / 2) + 1, height / 2]
+        self.grid.set(*left_pos, self.dancer)
+        self.dancer.init_pos = left_pos
+        self.dancer.cur_pos = left_pos
+        # Place it randomly left or right
+        #self.place_obj(self.dancer,
+        #               size=(width, height))
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Generate the mission string
+        self.mission = 'watch dancer and repeat his moves afterwards'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+        self.outcome_info = None
+    def step(self, action):
+        p_action = action[0] if np.isnan(action[0]) else int(action[0])
+        if len(action) == 1:  # agent cannot speak
+            assert self.p_sing == 0, "Non speaking agent used in a dance env requiring to speak"
+            utterance_action = [np.nan, np.nan]
+        else:
+            utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        if np.isnan(p_action):
+            pass
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        if speak_flag:
+            utterance = DanceWithOneNPCGrammar.construct_utterance(utterance_action)
+            self.conversation += "{}: {} \n".format("Agent", utterance)
+        # Don't let the agent open any of the doors
+        if not self.few_actions and p_action == self.actions.toggle:
+            done = True
+        if not self.few_actions and p_action == self.actions.done:
+            done = True
+        # npc's turn
+        agent_matched_moves, npc_utterance = self.dancer.step(p_action if not np.isnan(p_action) else None,
+                                                              utterance if speak_flag else None)
+        if self.hidden_npc:
+            npc_utterance = None
+        if npc_utterance:
+            self.utterance += "{} \n".format(npc_utterance)
+            self.conversation += "{}: {} \n".format(self.dancer.name, npc_utterance)
+        if agent_matched_moves:
+            reward = self._reward()
+            self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        if self.hidden_npc:
+            # remove npc from agent view
+            npc_obs_idx = np.argwhere(obs['image'] == 11)
+            if npc_obs_idx.size != 0:  # agent sees npc
+                obs['image'][npc_obs_idx[0][0], npc_obs_idx[0][1], :] = [1, 0, 0, 0]
+        if done and reward == 0:
+            self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print("conversation:\n", self.conversation)
+        print("utterance_history:\n", self.utterance_history)
+        self.window.clear_text()  # erase previous text
+        self.window.set_caption(self.conversation)  # overwrites super class caption
+        self.window.ax.set_title(self.dancer.get_status_str(), loc="left", fontsize=10)
+        if self.outcome_info:
+            color = None
+            if "SUCCESS" in self.outcome_info:
+                color = "lime"
+            elif "FAILURE" in self.outcome_info:
+                color = "red"
+            self.window.add_text(*(0.01, 0.85, self.outcome_info),
+                                 **{'fontsize':15, 'color':color, 'weight':"bold"})
+        self.window.show_img(obs)  # re-draw image to add changes to window
+        return obs
+class DanceWithOneNPC8x8Env(DanceWithOneNPCEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, **kwargs)
+class DanceWithOneNPC6x6Env(DanceWithOneNPCEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
+register(
+    id='MiniGrid-DanceWithOneNPC-5x5-v0',
+    entry_point='gym_minigrid.envs:DanceWithOneNPCEnv'
+)
+register(
+    id='MiniGrid-DanceWithOneNPC-6x6-v0',
+    entry_point='gym_minigrid.envs:DanceWithOneNPC6x6Env'
+)
+register(
+    id='MiniGrid-DanceWithOneNPC-8x8-v0',
+    entry_point='gym_minigrid.envs:DanceWithOneNPC8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/diverseexit.py ADDED Viewed

	@@ -0,0 +1,584 @@

+import numpy as np
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class TeacherPeer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    """
+    def __init__(self, color, name, env, npc_type=0, knowledgeable=False, easier=False, idl=False):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = npc_type
+        self.env = env
+        self.knowledgeable = knowledgeable
+        self.npc_actions = []
+        self.dancing_step_idx = 0
+        self.actions = MiniGridEnv.Actions
+        self.add_npc_direction = True
+        self.available_moves = [self.rotate_left, self.rotate_right, self.go_forward, self.toggle_action]
+        self.was_introduced_to = False
+        self.easier = easier
+        assert not self.easier
+        self.idl = idl
+        self.must_eye_contact = True if (self.npc_type // 3) % 2 == 0 else False
+        self.wanted_intro_utterances = [
+            EasyTeachingGamesGrammar.construct_utterance([2, 2]),
+            EasyTeachingGamesGrammar.construct_utterance([0, 1])
+        ]
+        self.wanted_intro_utterance = self.wanted_intro_utterances[0] if (self.npc_type // 3) // 2 == 0 else self.wanted_intro_utterances[1]
+        if self.npc_type % 3 == 0:
+            # must be far, must not poke
+            self.must_be_poked = False
+            self.must_be_close = False
+        elif self.npc_type % 3 == 1:
+            # must be close, must not poke
+            self.must_be_poked = False
+            self.must_be_close = True
+        elif self.npc_type % 3 == 2:
+            # must be close, must poke
+            self.must_be_poked = True
+            self.must_be_close = True
+        else:
+            raise ValueError("npc tyep {} unknown". format(self.npc_type))
+        # print("Peer type: ", self.npc_type)
+        # print("Peer conf: ", self.wanted_intro_utterance, self.must_eye_contact, self.must_be_close, self.must_be_poked)
+        if self.must_be_poked and not self.must_be_close:
+            raise ValueError("Must be poked means it must be close also.")
+        self.poked = False
+        self.exited = False
+        self.joint_attention_achieved = False
+    def toggle(self, env, pos):
+        """Method to trigger/toggle an action this object performs"""
+        self.poked = True
+        return True
+    def is_introduction_state_ok(self):
+        if (self.must_be_poked and self.introduction_state["poked"]) or (
+                not self.must_be_poked and not self.introduction_state["poked"]):
+            if (self.must_be_close and self.introduction_state["close"]) or (
+                    not self.must_be_close and not self.introduction_state["close"]):
+                if (self.must_eye_contact and self.introduction_state["eye_contact"]) or (
+                    not self.must_eye_contact and not self.introduction_state["eye_contact"]
+                ):
+                    if self.introduction_state["intro_utterance"] == self.wanted_intro_utterance:
+                        return True
+        return False
+    def can_overlap(self):
+        # If the NPC is hidden, agent can overlap on it
+        return self.env.hidden_npc
+    def encode(self, nb_dims=3):
+        if self.env.hidden_npc:
+            if nb_dims == 3:
+                return (1, 0, 0)
+            elif nb_dims == 4:
+                return (1, 0, 0, 0)
+        else:
+            return super().encode(nb_dims=nb_dims)
+    def step(self, agent_utterance):
+        super().step()
+        if self.knowledgeable:
+            if self.easier:
+                raise DeprecationWarning()
+                # wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
+                # action = self.compute_turn_action(wanted_dir)
+                # action()
+                # if not self.was_introduced_to and (agent_utterance in self.wanted_intro_utterances):
+                #     self.was_introduced_to = True
+                #     self.introduction_state = {
+                #         "poked": self.poked,
+                #         "close": self.is_near_agent(),
+                #         "eye_contact": self.is_eye_contact(),
+                #         "correct_intro_utterance": agent_utterance == self.wanted_intro_utterance
+                #     }
+                #     if self.is_introduction_state_ok():
+                #         utterance = "Go to the {} door \n".format(self.env.target_color)
+                #         return utterance
+            else:
+                wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
+                action = self.compute_turn_action(wanted_dir)
+                action()
+                if not self.was_introduced_to and (agent_utterance in self.wanted_intro_utterances):
+                    self.was_introduced_to = True
+                    self.introduction_state = {
+                        "poked": self.poked,
+                        "close": self.is_near_agent(),
+                        "eye_contact": self.is_eye_contact(),
+                        "intro_utterance": agent_utterance,
+                    }
+                    if not self.is_introduction_state_ok():
+                        if self.idl:
+                            if self.env.hidden_npc:
+                                return None
+                            else:
+                                return "I don't like that \n"
+                        else:
+                            return None
+                if self.is_eye_contact() and self.was_introduced_to:
+                    if self.is_introduction_state_ok():
+                        utterance = "Go to the {} door \n".format(self.env.target_color)
+                        if self.env.hidden_npc:
+                            return None
+                        else:
+                            return utterance
+                    else:
+                        # no utterance
+                        return None
+        else:
+            self.env._rand_elem(self.available_moves)()
+            return None
+    def render(self, img):
+        c = COLORS[self.color]
+        npc_shapes = []
+        # Draw eyes
+        if self.npc_type % 3 == 0:
+            npc_shapes.append(point_in_circle(cx=0.70, cy=0.50, r=0.10))
+            npc_shapes.append(point_in_circle(cx=0.30, cy=0.50, r=0.10))
+            # Draw mouth
+            npc_shapes.append(point_in_rect(0.20, 0.80, 0.72, 0.81))
+            # Draw top hat
+            npc_shapes.append(point_in_rect(0.30, 0.70, 0.05, 0.28))
+        elif self.npc_type % 3 == 1:
+            npc_shapes.append(point_in_circle(cx=0.70, cy=0.50, r=0.10))
+            npc_shapes.append(point_in_circle(cx=0.30, cy=0.50, r=0.10))
+            # Draw mouth
+            npc_shapes.append(point_in_rect(0.20, 0.80, 0.72, 0.81))
+            # Draw bottom hat
+            npc_shapes.append(point_in_triangle((0.15, 0.28),
+                                                (0.85, 0.28),
+                                                (0.50, 0.05)))
+        elif self.npc_type % 3 == 2:
+            npc_shapes.append(point_in_circle(cx=0.70, cy=0.50, r=0.10))
+            npc_shapes.append(point_in_circle(cx=0.30, cy=0.50, r=0.10))
+            # Draw mouth
+            npc_shapes.append(point_in_rect(0.20, 0.80, 0.72, 0.81))
+            # Draw bottom hat
+            npc_shapes.append(point_in_triangle((0.15, 0.28),
+                                                (0.85, 0.28),
+                                                (0.50, 0.05)))
+            # Draw top hat
+            npc_shapes.append(point_in_rect(0.30, 0.70, 0.05, 0.28))
+        # todo: move this to super function
+        # todo: super.render should be able to take the npc_shapes and then rotate them
+        if hasattr(self, "npc_dir"):
+            # Pre-rotation to ensure npc_dir = 1 means NPC looks downwards
+            npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=-1 * (math.pi / 2)) for v in npc_shapes]
+            # Rotate npc based on its direction
+            npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=(math.pi / 2) * self.npc_dir) for v in npc_shapes]
+        # Draw shapes
+        for v in npc_shapes:
+            fill_coords(img, v, c)
+# class EasyTeachingGamesSmallGrammar(object):
+#
+#     templates = ["Where is", "Open", "What is"]
+#     things = ["sesame", "the exit", "the password"]
+#
+#     grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+#
+#     @classmethod
+#     def construct_utterance(cls, action):
+#         if all(np.isnan(action)):
+#             return ""
+#         return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class EasyTeachingGamesGrammar(object):
+    templates = ["Where is", "Open", "Which is", "How are"]
+    things = [
+        "sesame", "the exit", "the correct door", "you", "the ceiling", "the window", "the entrance", "the closet",
+        "the drawer", "the fridge", "the floor", "the lamp", "the trash can", "the chair", "the bed", "the sofa"
+    ]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        if all(np.isnan(action)):
+            return ""
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class EasyTeachingGamesEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        diminished_reward=True,
+        step_penalty=False,
+        knowledgeable=False,
+        hard_password=False,
+        max_steps=50,
+        n_switches=3,
+        peer_type=None,
+        no_turn_off=False,
+        easier=False,
+        idl=False,
+        hidden_npc = False,
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.knowledgeable = knowledgeable
+        self.hard_password = hard_password
+        self.n_switches = n_switches
+        self.peer_type = peer_type
+        self.no_turn_off = no_turn_off
+        self.easier = easier
+        self.idl = idl
+        self.hidden_npc = hidden_npc
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *EasyTeachingGamesGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        self.wall_x = width - 1
+        self.wall_y = height - 1
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        if self.peer_type is None:
+            self.current_peer_type = self._rand_int(0, 12)
+        else:
+            self.current_peer_type = self.peer_type
+        self.peer = TeacherPeer(
+            color,
+            ["Bobby", "Robby", "Toby"][self.current_peer_type % 3],
+            self,
+            knowledgeable=self.knowledgeable,
+            npc_type=self.current_peer_type,
+            easier=self.easier,
+            idl=self.idl
+        )
+        # height -2 so its not in front of the buttons in the way
+        while True:
+            peer_pos = np.array((self._rand_int(1, width - 1), self._rand_int(1, height - 2)))
+            if (
+                # not in front of any door
+                not tuple(peer_pos) in self.door_front_pos
+            ) and (
+                # no_close npc is not in the middle of the 5x5 env
+                not (not self.peer.must_be_close and (width == 5 and height == 5) and all(peer_pos == (2, 2)))
+            ):
+                break
+        self.grid.set(*peer_pos, self.peer)
+        self.peer.init_pos = peer_pos
+        self.peer.cur_pos = peer_pos
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Generate the mission string
+        self.mission = 'exit the room'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+        self.outcome_info = None
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        if p_action == self.actions.done:
+            done = True
+        peer_utterance = EasyTeachingGamesGrammar.construct_utterance(utterance_action)
+        peer_reply = self.peer.step(peer_utterance)
+        if peer_reply is not None:
+            self.utterance += "{}: {} \n".format(self.peer.name, peer_reply)
+            self.conversation += "{}: {} \n".format(self.peer.name, peer_reply)
+        if all(self.agent_pos == self.target_pos):
+            done = True
+            reward = self._reward()
+        elif tuple(self.agent_pos) in self.door_pos:
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        if self.hidden_npc:
+            # all npc are hidden
+            assert np.argwhere(obs['image'][:,:,0] == OBJECT_TO_IDX['npc']).size == 0
+            assert "{}:".format(self.peer.name) not in self.utterance
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        if done:
+            if reward > 0:
+                self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
+            else:
+                self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.clear_text()  # erase previous text
+        self.window.set_caption(self.conversation, self.peer.name)
+        self.window.ax.set_title("correct door: {}".format(self.target_color), loc="left", fontsize=10)
+        if self.outcome_info:
+            color = None
+            if "SUCCESS" in self.outcome_info:
+                color = "lime"
+            elif "FAILURE" in self.outcome_info:
+                color = "red"
+            self.window.add_text(*(0.01, 0.85, self.outcome_info),
+                                 **{'fontsize':15, 'color':color, 'weight':"bold"})
+        self.window.show_img(obs)  # re-draw image to add changes to window
+        return obs
+# # must be far, must not poke
+# class EasyTeachingGames8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=0)
+#
+# # must be close, must not poke
+# class EasyTeachingGamesClose8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=1)
+#
+# # must be close, must poke
+# class EasyTeachingGamesPoke8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=2)
+#
+# # 100 multi
+# class EasyTeachingGamesMulti8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=None)
+#
+#
+#
+# # speaking 50 steps
+# register(
+#     id='MiniGrid-EasyTeachingGames-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasyTeachingGames8x8Env'
+# )
+#
+# # demonstrating 50 steps
+# register(
+#     id='MiniGrid-EasyTeachingGamesPoke-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasyTeachingGamesPoke8x8Env'
+# )
+#
+# # demonstrating 50 steps
+# register(
+#     id='MiniGrid-EasyTeachingGamesClose-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasyTeachingGamesClose8x8Env'
+# )
+#
+# # speaking 50 steps
+# register(
+#     id='MiniGrid-EasyTeachingGamesMulti-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasyTeachingGamesMulti8x8Env'
+# )
+# # must be far, must not poke
+# class EasierTeachingGames8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=0, easier=True)
+#
+# # must be close, must not poke
+# class EasierTeachingGamesClose8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=1, easier=True)
+#
+# # must be close, must poke
+# class EasierTeachingGamesPoke8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=2, easier=True)
+#
+# # 100 multi
+# class EasierTeachingGamesMulti8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=None, easier=True)
+#
+# # Multi Many
+# class ManyTeachingGamesMulti8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=None, easier=False, many=True)
+#
+# class ManyTeachingGamesMultiIDL8x8Env(EasyTeachingGamesEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=None, easier=False, many=True, idl=True)
+# # speaking 50 steps
+# register(
+#     id='MiniGrid-EasierTeachingGames-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasierTeachingGames8x8Env'
+# )
+#
+# # demonstrating 50 steps
+# register(
+#     id='MiniGrid-EasierTeachingGamesPoke-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasierTeachingGamesPoke8x8Env'
+# )
+#
+# # demonstrating 50 steps
+# register(
+#     id='MiniGrid-EasierTeachingGamesClose-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasierTeachingGamesClose8x8Env'
+# )
+#
+# # speaking 50 steps
+# register(
+#     id='MiniGrid-EasierTeachingGamesMulti-8x8-v0',
+#     entry_point='gym_minigrid.envs:EasierTeachingGamesMulti8x8Env'
+# )
+#
+# # speaking 50 steps
+# register(
+#     id='MiniGrid-ManyTeachingGamesMulti-8x8-v0',
+#     entry_point='gym_minigrid.envs:ManyTeachingGamesMulti8x8Env'
+# )
+#
+# # speaking 50 steps
+# register(
+#     id='MiniGrid-ManyTeachingGamesMultiIDL-8x8-v0',
+#     entry_point='gym_minigrid.envs:ManyTeachingGamesMultiIDL8x8Env'
+# )
+# Multi Many
+class DiverseExit8x8Env(EasyTeachingGamesEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, knowledgeable=True, max_steps=50, peer_type=None, easier=False, **kwargs)
+# speaking 50 steps
+register(
+    id='MiniGrid-DiverseExit-8x8-v0',
+    entry_point='gym_minigrid.envs:DiverseExit8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/exiter.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import numpy as np
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class Peer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    """
+    def __init__(self, color, name, env, random_actions=False):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = 0
+        self.env = env
+        self.npc_actions = []
+        self.dancing_step_idx = 0
+        self.actions = MiniGridEnv.Actions
+        self.add_npc_direction = True
+        self.available_moves = [self.rotate_left, self.rotate_right, self.go_forward, self.toggle_action]
+        self.random_actions = random_actions
+        self.joint_attention_achieved = False
+    def can_overlap(self):
+        # If the NPC is hidden, agent can overlap on it
+        return self.env.hidden_npc
+    def encode(self, nb_dims=3):
+        if self.env.hidden_npc:
+            if nb_dims == 3:
+                return (1, 0, 0)
+            elif nb_dims == 4:
+                return (1, 0, 0, 0)
+        else:
+            return super().encode(nb_dims=nb_dims)
+    def step(self):
+        super().step()
+        if self.random_actions:
+            if type(self.env.grid.get(*self.front_pos)) == Lava:
+                # can't walk into lava
+                act = self.env._rand_elem([
+                    m for m in self.available_moves if m != self.go_forward
+                ])
+            elif type(self.env.grid.get(*self.front_pos)) == Switch:
+                # can't toggle switches
+                act = self.env._rand_elem([
+                    m for m in self.available_moves if m != self.toggle_action
+                ])
+            else:
+                act = self.env._rand_elem(self.available_moves)
+            act()
+        else:
+            distances = np.abs(self.env.agent_pos - self.env.door_pos).sum(-1)
+            door_id = np.argmin(distances)
+            wanted_switch_pos = self.env.switches_pos[door_id]
+            sw = self.env.switches[door_id]
+            distance_to_switch = np.abs(wanted_switch_pos - self.cur_pos ).sum(-1)
+            # corresponding switch
+            if all(self.front_pos == wanted_switch_pos) and self.joint_attention_achieved:
+                # in agent front of door, looking at the door
+                if tuple(self.env.front_pos) == tuple(self.env.door_pos[door_id]):
+                    if not sw.is_on:
+                        self.toggle_action()
+            elif distance_to_switch == 1:
+                if not self.joint_attention_achieved:
+                    # looks at he agent
+                    wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
+                else:
+                    # turns to the switch
+                    wanted_dir = self.compute_wanted_dir(wanted_switch_pos)
+                action = self.compute_turn_action(wanted_dir)
+                action()
+                if self.is_eye_contact():
+                    self.joint_attention_achieved = True
+            else:
+                act = self.path_to_pos(wanted_switch_pos)
+                act()
+        # not really important as the NPC doesn't speak
+        if self.env.hidden_npc:
+            return None
+class ExiterGrammar(object):
+    templates = ["Move your", "Shake your"]
+    things = ["body", "head"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class ExiterEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        diminished_reward=True,
+        step_penalty=False,
+        knowledgeable=False,
+        ablation=False,
+        max_steps=20,
+        hidden_npc=False,
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.knowledgeable = knowledgeable
+        self.ablation = ablation
+        self.hidden_npc = hidden_npc
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *ExiterGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        self.wall_x = width-1
+        self.wall_y = height-1
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # add lava
+        self.grid.vert_wall(width//2, 1, height - 2, Lava)
+        # door top
+        door_color_top = self._rand_elem(COLOR_NAMES)
+        self.door_pos_top = (width-1, 1)
+        self.door_top = Door(door_color_top, is_locked=False if self.ablation else True)
+        self.grid.set(*self.door_pos_top, self.door_top)
+        # switch top
+        self.switch_pos_top = (0, 1)
+        self.switch_top = Switch(door_color_top, lockable_object=self.door_top, locker_switch=True)
+        self.grid.set(*self.switch_pos_top, self.switch_top)
+        # door bottom
+        door_color_bottom = self._rand_elem(COLOR_NAMES)
+        self.door_pos_bottom = (width-1, height-2)
+        self.door_bottom = Door(door_color_bottom, is_locked=False if self.ablation else True)
+        self.grid.set(*self.door_pos_bottom, self.door_bottom)
+        # switch bottom
+        self.switch_pos_bottom = (0, height-2)
+        self.switch_bottom = Switch(door_color_bottom, lockable_object=self.door_bottom, locker_switch=True)
+        self.grid.set(*self.switch_pos_bottom, self.switch_bottom)
+        self.switches = [self.switch_top, self.switch_bottom]
+        self.switches_pos = [self.switch_pos_top, self.switch_pos_bottom]
+        self.door = [self.door_top, self.door_bottom]
+        self.door_pos = [self.door_pos_top, self.door_pos_bottom]
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        self.peer = Peer(color, "Jill", self, random_actions=self.ablation)
+        # Place it on the middle right side of the room
+        peer_pos = np.array((self._rand_int(1, width//2), self._rand_int(1, height - 1)))
+        self.grid.set(*peer_pos, self.peer)
+        self.peer.init_pos = peer_pos
+        self.peer.cur_pos = peer_pos
+        # Randomize the agent's start position and orientation
+        agent = self.place_agent(top=(width // 2, 0), size=(width // 2, height))
+        # Generate the mission string
+        self.mission = 'watch dancer and repeat his moves afterwards'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+        self.outcome_info = None
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        self.peer.step()
+        if np.isnan(p_action):
+            pass
+        if p_action == self.actions.done:
+            done = True
+        elif all([self.switch_top.is_on, self.switch_bottom.is_on]):
+            # if both witches are on: no reward is given and the episode ends
+            done = True
+        elif tuple(self.agent_pos) in [self.door_pos_top, self.door_pos_bottom]:
+            # agent has exited
+            reward = self._reward()
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        if self.hidden_npc:
+            # all npc are hidden
+            assert np.argwhere(obs['image'][:,:,0] == OBJECT_TO_IDX['npc']).size == 0
+            assert "{}:".format(self.peer.name) not in self.utterance
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        if done:
+            if reward > 0:
+                self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
+            else:
+                self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.clear_text()  # erase previous text
+        # self.window.set_caption(self.conversation, [self.peer.name])
+        # self.window.ax.set_title("correct door: {}".format(self.true_guide.target_color), loc="left", fontsize=10)
+        if self.outcome_info:
+            color = None
+            if "SUCCESS" in self.outcome_info:
+                color = "lime"
+            elif "FAILURE" in self.outcome_info:
+                color = "red"
+            self.window.add_text(*(0.01, 0.85, self.outcome_info),
+                                 **{'fontsize':15, 'color':color, 'weight':"bold"})
+        self.window.show_img(obs)  # re-draw image to add changes to window
+        return obs
+class Exiter8x8Env(ExiterEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, max_steps=20, **kwargs)
+class Exiter6x6Env(ExiterEnv):
+    def __init__(self):
+        super().__init__(size=6, max_steps=20)
+class AblationExiterEnv(ExiterEnv):
+    def __init__(self):
+        super().__init__(size=5, ablation=True, max_steps=20)
+class AblationExiter8x8Env(ExiterEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, ablation=True, max_steps=20, **kwargs)
+class AblationExiter6x6Env(ExiterEnv):
+    def __init__(self):
+        super().__init__(size=6, ablation=True, max_steps=20)
+register(
+    id='MiniGrid-Exiter-5x5-v0',
+    entry_point='gym_minigrid.envs:ExiterEnv'
+)
+register(
+    id='MiniGrid-Exiter-6x6-v0',
+    entry_point='gym_minigrid.envs:Exiter6x6Env'
+)
+register(
+    id='MiniGrid-Exiter-8x8-v0',
+    entry_point='gym_minigrid.envs:Exiter8x8Env'
+)
+register(
+    id='MiniGrid-AblationExiter-5x5-v0',
+    entry_point='gym_minigrid.envs:AblationExiterEnv'
+)
+register(
+    id='MiniGrid-AblationExiter-6x6-v0',
+    entry_point='gym_minigrid.envs:AblationExiter6x6Env'
+)
+register(
+    id='MiniGrid-AblationExiter-8x8-v0',
+    entry_point='gym_minigrid.envs:AblationExiter8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoorpolite.py ADDED Viewed

	@@ -0,0 +1,292 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class Guide(NPC):
+    """
+    A simple NPC that wants an agent to go to an object (randomly chosen among object_pos list)
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.introduced = False
+        # Select a random target object as mission
+        obj_idx = self.env._rand_int(0, len(self.env.door_pos))
+        self.target_pos = self.env.door_pos[obj_idx]
+        self.target_color = self.env.door_colors[obj_idx]
+    def listen(self, utterance):
+        if utterance == PoliteGrammar.construct_utterance([0, 2]):
+            self.introduced = True
+            return "I am good. Thank you."
+        elif utterance == PoliteGrammar.construct_utterance([1, 1]):
+            if self.introduced:
+                return self.env.mission
+        return None
+    # def is_near_agent(self):
+    #     ax, ay = self.env.agent_pos
+    #     wx, wy = self.cur_pos
+    #     if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+    #         return True
+    #     return False
+class PoliteGrammar(object):
+    templates = ["How are", "Where is", "Open"]
+    things = ["sesame", "the exit", 'you']
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class GoToDoorPoliteEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False,
+        max_steps=100,
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *PoliteGrammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.empty_symbol = "NA \n"
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured NPC at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        self.wizard = Guide(color, "Gandalf", self)
+        # Place it randomly, omitting front of door positions
+        self.place_obj(self.wizard,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            agent_utterance = PoliteGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                self.utterance += "YOU: {} \n".format(agent_utterance)
+            # check if near wizard
+            if self.wizard.is_near_agent():
+                reply = self.wizard.listen(agent_utterance)
+                if reply:
+                    self.utterance += "{}: {} \n".format(self.wizard.name, reply)
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        if p_action == self.actions.done:
+            ax, ay = self.agent_pos
+            tx, ty = self.target_pos
+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                reward = self._reward()
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.utterance_history, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorPoliteTesting(GoToDoorPoliteEnv):
+    def __init__(self):
+        super().__init__(
+            size=5,
+            hear_yourself=False,
+            diminished_reward=False,
+            step_penalty=True,
+            max_steps=100
+        )
+class GoToDoorPolite8x8Env(GoToDoorPoliteEnv):
+    def __init__(self):
+        super().__init__(size=8, max_steps=100)
+class GoToDoorPolite6x6Env(GoToDoorPoliteEnv):
+    def __init__(self):
+        super().__init__(size=6, max_steps=100)
+# hear yourself
+class GoToDoorPoliteHY8x8Env(GoToDoorPoliteEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True, max_steps=100)
+class GoToDoorPoliteHY6x6Env(GoToDoorPoliteEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True, max_steps=100)
+class GoToDoorPoliteHY5x5Env(GoToDoorPoliteEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True, max_steps=100)
+register(
+    id='MiniGrid-GoToDoorPolite-Testing-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPoliteTesting'
+)
+register(
+    id='MiniGrid-GoToDoorPolite-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPoliteEnv'
+)
+register(
+    id='MiniGrid-GoToDoorPolite-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPolite6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorPolite-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPolite8x8Env'
+)
+register(
+    id='MiniGrid-GoToDoorPoliteHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPoliteHY5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorPoliteHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPoliteHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorPoliteHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorPoliteHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoorsesame.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class SesameGrammar(object):
+    templates = ["Open", "Who is", "Where is"]
+    things = ["the exit", "sesame", "the chest", "him", "that"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + "."
+class GoToDoorSesameEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *SesameGrammar.grammar_action_space.nvec
+            ])
+        )
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        doorPos = (self._rand_int(2, width-2), 0)
+        doorColors = self._rand_elem(COLOR_NAMES)
+        self.grid.set(*doorPos, Door(doorColors))
+        # doorPos = []
+        # doorPos.append((self._rand_int(2, width-2), 0))
+        #
+        # # Generate the door colors
+        # doorColors = []
+        # while len(doorColors) < len(doorPos):
+        #     color = self._rand_elem(COLOR_NAMES)
+        #     if color in doorColors:
+        #         continue
+        #     doorColors.append(color)
+        #
+        # # Place the doors in the grid
+        # for idx, pos in enumerate(doorPos):
+        #     color = doorColors[idx]
+        #     self.grid.set(*pos, Door(color))
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        # doorIdx = self._rand_int(0, len(doorPos))
+        # self.target_pos = doorPos[doorIdx]
+        # self.target_color = doorColors[doorIdx]
+        self.target_pos = doorPos
+        self.target_color = doorColors
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Initialize the dialogue string
+        self.dialogue = "This is what you hear. \n"
+    def gen_obs(self):
+        obs = super().gen_obs()
+        # add dialogue to obs
+        obs["dialogue"] = self.dialogue
+        return obs
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        ax, ay = self.agent_pos
+        tx, ty = self.target_pos
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        # magic words if front of the door
+        if speak_flag:
+            utterance = SesameGrammar.construct_utterance(utterance_action)
+            self.dialogue += "YOU: " + utterance + "\n"
+            if utterance == SesameGrammar.construct_utterance([0, 1]):
+                if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                    reward = self._reward()
+                    done = True
+        # Reward performing done action in front of the target door
+        # if p_action == self.actions.done:
+        #     if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+        #             reward = self._reward()
+        #             done = True
+        return obs, reward, done, info
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.dialogue, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorSesame8x8Env(GoToDoorSesameEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorSesame6x6Env(GoToDoorSesameEnv):
+    def __init__(self):
+        super().__init__(size=6)
+register(
+    id='MiniGrid-GoToDoorSesame-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorSesameEnv'
+)
+register(
+    id='MiniGrid-GoToDoorSesame-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorSesame6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorSesame-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorSesame8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoortalk.py ADDED Viewed

	@@ -0,0 +1,189 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+# these two classes should maybe be extracted to a utils file so they can be used all over our envs
+class GoToDoorTalkEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *Grammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+        self.empty_symbol = "NA \n"
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        doorPos = []
+        doorPos.append((self._rand_int(2, width-2), 0))
+        doorPos.append((self._rand_int(2, width-2), height-1))
+        doorPos.append((0, self._rand_int(2, height-2)))
+        doorPos.append((width-1, self._rand_int(2, height-2)))
+        # Generate the door colors
+        doorColors = []
+        while len(doorColors) < len(doorPos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in doorColors:
+                continue
+            doorColors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(doorPos):
+            color = doorColors[idx]
+            self.grid.set(*pos, Door(color))
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        doorIdx = self._rand_int(0, len(doorPos))
+        self.target_pos = doorPos[doorIdx]
+        self.target_color = doorColors[doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        if speak_flag:
+            agent_utterance = Grammar.construct_utterance(utterance_action)
+            reply = self.mission
+            NPC_name = "Wizard"
+            if self.hear_yourself:
+                self.utterance += "YOU: {} \n".format(agent_utterance)
+            self.utterance += "{}: {} \n".format(NPC_name, reply)
+        obs, reward, done, info = super().step(p_action)
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        # Reward performing done action in front of the target door
+        if p_action == self.actions.done:
+            ax, ay = self.agent_pos
+            tx, ty = self.target_pos
+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                reward = self._reward()
+            done = True
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.utterance_history, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorTalk8x8Env(GoToDoorTalkEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorTalk6x6Env(GoToDoorTalkEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# hear yourself
+class GoToDoorTalkHY8x8Env(GoToDoorTalkEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True)
+class GoToDoorTalkHY6x6Env(GoToDoorTalkEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True)
+class GoToDoorTalkHYEnv(GoToDoorTalkEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True)
+register(
+    id='MiniGrid-GoToDoorTalk-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalk-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalk6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalk-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalk8x8Env'
+)
+# hear yourself
+register(
+    id='MiniGrid-GoToDoorTalkHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHYEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhard.py ADDED Viewed

	@@ -0,0 +1,199 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class TalkHardGrammar(object):
+    templates = ["Where is", "What is"]
+    things = ["the exit", "the chair"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + "."
+class GoToDoorTalkHardEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *TalkHardGrammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        doorPos = []
+        doorPos.append((self._rand_int(2, width-2), 0))
+        doorPos.append((self._rand_int(2, width-2), height-1))
+        doorPos.append((0, self._rand_int(2, height-2)))
+        doorPos.append((width-1, self._rand_int(2, height-2)))
+        # Generate the door colors
+        doorColors = []
+        while len(doorColors) < len(doorPos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in doorColors:
+                continue
+            doorColors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(doorPos):
+            color = doorColors[idx]
+            self.grid.set(*pos, Door(color))
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        doorIdx = self._rand_int(0, len(doorPos))
+        self.target_pos = doorPos[doorIdx]
+        self.target_color = doorColors[doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Initialize the dialogue string
+        self.dialogue = "This is what you hear. "
+    def gen_obs(self):
+        obs = super().gen_obs()
+        # add dialogue to obs
+        obs["dialogue"] = self.dialogue
+        return obs
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        if speak_flag:
+            utterance = TalkHardGrammar.construct_utterance(utterance_action)
+            reply = self.mission
+            NPC_name = "Wizard"
+            if self.hear_yourself:
+                self.dialogue += "YOU: {} \n".format(utterance)
+            if utterance == TalkHardGrammar.construct_utterance([0, 0]):
+                self.dialogue += "{}: {} \n".format(NPC_name, reply)  # dummy reply gives mission
+        obs, reward, done, info = super().step(p_action)
+        ax, ay = self.agent_pos
+        tx, ty = self.target_pos
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        # Reward performing done action in front of the target door
+        if p_action == self.actions.done:
+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                reward = self._reward()
+            done = True
+        return obs, reward, done, info
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.dialogue, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorTalkHard8x8Env(GoToDoorTalkHardEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorTalkHard6x6Env(GoToDoorTalkHardEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# hear yourself
+class GoToDoorTalkHardHY8x8Env(GoToDoorTalkHardEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True)
+class GoToDoorTalkHardHY6x6Env(GoToDoorTalkHardEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True)
+class GoToDoorTalkHardHY5x5Env(GoToDoorTalkHardEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True)
+register(
+    id='MiniGrid-GoToDoorTalkHard-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHard-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHard6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHard-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHard8x8Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardHY5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardnpc.py ADDED Viewed

	@@ -0,0 +1,283 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class Guide(NPC):
+    """
+    A simple NPC that wants an agent to go to an object (randomly chosen among object_pos list)
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.has_spoken = False  # wizards only speak once
+        self.npc_type = 0
+    def listen(self, utterance):
+        if utterance == TalkHardSesameGrammar.construct_utterance([0, 1]):
+            return self.env.mission
+        return None
+    # def is_near_agent(self):
+    #     ax, ay = self.env.agent_pos
+    #     wx, wy = self.cur_pos
+    #     if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+    #         return True
+    #     return False
+class TalkHardSesameGrammar(object):
+    templates = ["Where is", "Open"]
+    things = ["sesame", "the exit"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class GoToDoorTalkHardNPCEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *TalkHardSesameGrammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.empty_symbol = "NA \n"
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured NPC at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        self.wizard = Guide(color, "Gandalf", self)
+        # Place it randomly, omitting front of door positions
+        self.place_obj(self.wizard,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            agent_utterance = TalkHardSesameGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                self.utterance += "YOU: {} \n".format(agent_utterance)
+            # check if near wizard
+            if self.wizard.is_near_agent():
+                reply = self.wizard.listen(agent_utterance)
+                if reply:
+                    self.utterance += "{}: {} \n".format(self.wizard.name, reply)
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        if p_action == self.actions.done:
+            ax, ay = self.agent_pos
+            tx, ty = self.target_pos
+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                reward = self._reward()
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.utterance_history, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorTalkHardNPCTesting(GoToDoorTalkHardNPCEnv):
+    def __init__(self):
+        super().__init__(
+            size=5,
+            hear_yourself=False,
+            diminished_reward=False,
+            step_penalty=True
+        )
+class GoToDoorTalkHardNPC8x8Env(GoToDoorTalkHardNPCEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorTalkHardNPC6x6Env(GoToDoorTalkHardNPCEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# hear yourself
+class GoToDoorTalkHardNPCHY8x8Env(GoToDoorTalkHardNPCEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True)
+class GoToDoorTalkHardNPCHY6x6Env(GoToDoorTalkHardNPCEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True)
+class GoToDoorTalkHardNPCHY5x5Env(GoToDoorTalkHardNPCEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPC-Testing-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPCTesting'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPC-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPCEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPC-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPC6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPC-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPC8x8Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPCHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPCHY5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPCHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPCHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardNPCHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardNPCHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardsesame.py ADDED Viewed

	@@ -0,0 +1,204 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class TalkHardSesameGrammar(object):
+    templates = ["Where is", "Open"]
+    things = ["sesame", "the exit"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class GoToDoorTalkHardSesameEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *TalkHardSesameGrammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+        self.empty_symbol = "NA \n"
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.doorPos = []
+        self.doorPos.append((self._rand_int(2, width-2), 0))
+        self.doorPos.append((self._rand_int(2, width-2), height-1))
+        self.doorPos.append((0, self._rand_int(2, height-2)))
+        self.doorPos.append((width-1, self._rand_int(2, height-2)))
+        # Generate the door colors
+        doorColors = []
+        while len(doorColors) < len(self.doorPos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in doorColors:
+                continue
+            doorColors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.doorPos):
+            color = doorColors[idx]
+            self.grid.set(*pos, Door(color))
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        doorIdx = self._rand_int(0, len(self.doorPos))
+        self.target_pos = self.doorPos[doorIdx]
+        self.target_color = doorColors[doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            utterance = TalkHardSesameGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                self.utterance += "YOU: {} \n".format(utterance)
+            if utterance == TalkHardSesameGrammar.construct_utterance([0, 1]):
+                reply = self.mission
+                NPC_name = "Wizard"
+                self.utterance += "{}: {} \n".format(NPC_name, reply)  # dummy reply gives mission
+            elif utterance == TalkHardSesameGrammar.construct_utterance([1, 0]):
+                ax, ay = self.agent_pos
+                tx, ty = self.target_pos
+                if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                    reward = self._reward()
+                for dx, dy in self.doorPos:
+                    if (ax == dx and abs(ay - dy) == 1) or (ay == dy and abs(ax - dx) == 1):
+                        # agent has chosen some door episode, regardless of if the door is correct the episode is over
+                        done = True
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.dialogue, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorTalkHardSesame8x8Env(GoToDoorTalkHardSesameEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorTalkHardSesame6x6Env(GoToDoorTalkHardSesameEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# hear yourself
+class GoToDoorTalkHardSesameHY8x8Env(GoToDoorTalkHardSesameEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True)
+class GoToDoorTalkHardSesameHY6x6Env(GoToDoorTalkHardSesameEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True)
+class GoToDoorTalkHardSesameHY5x5Env(GoToDoorTalkHardSesameEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesame-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesame-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesame6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesame-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesame8x8Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameHY5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardsesamnpc.py ADDED Viewed

	@@ -0,0 +1,294 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class Guide(NPC):
+    """
+    A simple NPC that wants an agent to go to an object (randomly chosen among object_pos list)
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.npc_type = 0
+    def listen(self, utterance):
+        if utterance == TalkHardSesameGrammar.construct_utterance([0, 1]):
+            return self.env.mission
+        return None
+    def is_near_agent(self):
+        ax, ay = self.env.agent_pos
+        wx, wy = self.cur_pos
+        if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+            return True
+        return False
+class TalkHardSesameGrammar(object):
+    templates = ["Where is", "Open"]
+    things = ["sesame", "the exit"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class GoToDoorTalkHardSesameNPCEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *TalkHardSesameGrammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.empty_symbol = "NA \n"
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured NPC at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        self.wizard = Guide(color, "Gandalf", self)
+        # Place it randomly, omitting front of door positions
+        self.place_obj(self.wizard,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        self.conversation = self.utterance
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            utterance = TalkHardSesameGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                self.utterance += "YOU: {} \n".format(utterance)
+            self.conversation += "YOU: {} \n".format(utterance)
+            # check if near wizard
+            if self.wizard.is_near_agent():
+                reply = self.wizard.listen(utterance)
+                if reply:
+                    self.utterance += "{}: {} \n".format(self.wizard.name, reply)
+                    self.conversation += "{}: {} \n".format(self.wizard.name, reply)
+            if utterance == TalkHardSesameGrammar.construct_utterance([1, 0]):
+                ax, ay = self.agent_pos
+                tx, ty = self.target_pos
+                if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                    reward = self._reward()
+                for dx, dy in self.door_pos:
+                    if (ax == dx and abs(ay - dy) == 1) or (ay == dy and abs(ax - dx) == 1):
+                        # agent has chosen some door episode, regardless of if the door is correct the episode is over
+                        done = True
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        if p_action == self.actions.done:
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.conversation, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorTalkHardSesameNPCTesting(GoToDoorTalkHardSesameNPCEnv):
+    def __init__(self):
+        super().__init__(
+            size=5,
+            hear_yourself=False,
+            diminished_reward=False,
+            step_penalty=True
+        )
+class GoToDoorTalkHardSesameNPC8x8Env(GoToDoorTalkHardSesameNPCEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorTalkHardSesameNPC6x6Env(GoToDoorTalkHardSesameNPCEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# hear yourself
+class GoToDoorTalkHardSesameNPCHY8x8Env(GoToDoorTalkHardSesameNPCEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True)
+class GoToDoorTalkHardSesameNPCHY6x6Env(GoToDoorTalkHardSesameNPCEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True)
+class GoToDoorTalkHardSesameNPCHY5x5Env(GoToDoorTalkHardSesameNPCEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPC-Testing-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCTesting'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPC-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPC-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPC6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPC-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPC8x8Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCHY5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoortalkhardsesamnpcguides.py ADDED Viewed

	@@ -0,0 +1,384 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class Wizard(NPC):
+    """
+    A simple NPC that knows who is telling the truth
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.npc_type = 0  # this will be put into the encoding
+    def listen(self, utterance):
+        if utterance == TalkHardSesameNPCGuidesGrammar.construct_utterance([0, 1]):
+            return "Ask {}.".format(self.env.true_guide.name)
+        return None
+    def is_near_agent(self):
+        ax, ay = self.env.agent_pos
+        wx, wy = self.cur_pos
+        if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+            return True
+        return False
+class Guide(NPC):
+    """
+    A simple NPC that knows the correct door.
+    """
+    def __init__(self, color, name, env, liar=False):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.liar = liar
+        self.npc_type = 1  # this will be put into the encoding
+        # Select a random target object as mission
+        obj_idx = self.env._rand_int(0, len(self.env.door_pos))
+        self.target_pos = self.env.door_pos[obj_idx]
+        self.target_color = self.env.door_colors[obj_idx]
+    def listen(self, utterance):
+        if utterance == TalkHardSesameNPCGuidesGrammar.construct_utterance([0, 1]):
+            if self.liar:
+                fake_colors = [c for c in self.env.door_colors if c != self.env.target_color]
+                fake_color = self.env._rand_elem(fake_colors)
+                # Generate the mission string
+                assert fake_color != self.env.target_color
+                return 'go to the %s door' % fake_color
+            else:
+                return self.env.mission
+        return None
+    def render(self, img):
+        c = COLORS[self.color]
+        # Draw eyes
+        fill_coords(img, point_in_circle(cx=0.70, cy=0.50, r=0.10), c)
+        fill_coords(img, point_in_circle(cx=0.30, cy=0.50, r=0.10), c)
+        # Draw mouth
+        fill_coords(img, point_in_rect(0.20, 0.80, 0.72, 0.81), c)
+        # #Draw hat
+        # tri_fn = point_in_triangle(
+        #     (0.15, 0.25),
+        #     (0.85, 0.25),
+        #     (0.50, 0.05),
+        # )
+        # fill_coords(img, tri_fn, c)
+    def is_near_agent(self):
+        ax, ay = self.env.agent_pos
+        wx, wy = self.cur_pos
+        if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+            return True
+        return False
+class TalkHardSesameNPCGuidesGrammar(object):
+    templates = ["Where is", "Open"]
+    things = ["sesame", "the exit"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class GoToDoorTalkHardSesameNPCGuidesEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *TalkHardSesameNPCGuidesGrammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.empty_symbol = "NA \n"
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured WIZARD at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        self.wizard = Wizard(color, "Gandalf", self)
+        # Place it randomly, omitting front of door positions
+        self.place_obj(self.wizard,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # add guides
+        GUIDE_NAMES = ["John", "Jack"]
+        # Set a randomly coloured TRUE GUIDE at a random position
+        name = self._rand_elem(GUIDE_NAMES)
+        color = self._rand_elem(COLOR_NAMES)
+        self.true_guide = Guide(color, name, self, liar=False)
+        # Place it randomly, omitting invalid positions
+        self.place_obj(self.true_guide,
+                       size=(width, height),
+                       # reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+                       reject_fn=lambda _, p: tuple(p) in [*self.door_front_pos, tuple(self.wizard.cur_pos)])
+        # Set a randomly coloured FALSE GUIDE at a random position
+        name = self._rand_elem([n for n in GUIDE_NAMES if n != self.true_guide.name])
+        color = self._rand_elem(COLOR_NAMES)
+        self.false_guide = Guide(color, name, self, liar=True)
+        # Place it randomly, omitting invalid positions
+        self.place_obj(self.false_guide,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in [
+                           *self.door_front_pos, tuple(self.wizard.cur_pos), tuple(self.true_guide.cur_pos)])
+        assert self.true_guide.name != self.false_guide.name
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        self.conversation = self.utterance
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            utterance = TalkHardSesameNPCGuidesGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                self.utterance += "YOU: {} \n".format(utterance)
+            self.conversation += "YOU: {} \n".format(utterance)
+            # check if near wizard
+            if hasattr(self, "wizard"):
+                if self.wizard.is_near_agent():
+                    reply = self.wizard.listen(utterance)
+                    if reply:
+                        self.utterance += "{}: {} \n".format(self.wizard.name, reply)
+                        self.conversation += "{}: {} \n".format(self.wizard.name, reply)
+            if self.true_guide.is_near_agent():
+                reply = self.true_guide.listen(utterance)
+                if reply:
+                    self.utterance += "{}: {} \n".format(self.true_guide.name, reply)
+                    self.conversation += "{}: {} \n".format(self.true_guide.name, reply)
+            if hasattr(self, "false_guide"):
+                if self.false_guide.is_near_agent():
+                    reply = self.false_guide.listen(utterance)
+                    if reply:
+                        self.utterance += "{}: {} \n".format(self.false_guide.name, reply)
+                        self.conversation += "{}: {} \n".format(self.false_guide.name, reply)
+            if utterance == TalkHardSesameNPCGuidesGrammar.construct_utterance([1, 0]):
+                ax, ay = self.agent_pos
+                tx, ty = self.target_pos
+                if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                    reward = self._reward()
+                for dx, dy in self.door_pos:
+                    if (ax == dx and abs(ay - dy) == 1) or (ay == dy and abs(ax - dx) == 1):
+                        # agent has chosen some door episode, regardless of if the door is correct the episode is over
+                        done = True
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        if p_action == self.actions.done:
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print(self.conversation)
+        self.window.set_caption(self.conversation, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GoToDoorTalkHardSesameNPCGuides8x8Env(GoToDoorTalkHardSesameNPCGuidesEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GoToDoorTalkHardSesameNPCGuides6x6Env(GoToDoorTalkHardSesameNPCGuidesEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# hear yourself
+class GoToDoorTalkHardSesameNPCGuidesHY8x8Env(GoToDoorTalkHardSesameNPCGuidesEnv):
+    def __init__(self):
+        super().__init__(size=8, hear_yourself=True)
+class GoToDoorTalkHardSesameNPCGuidesHY6x6Env(GoToDoorTalkHardSesameNPCGuidesEnv):
+    def __init__(self):
+        super().__init__(size=6, hear_yourself=True)
+class GoToDoorTalkHardSesameNPCGuidesHY5x5Env(GoToDoorTalkHardSesameNPCGuidesEnv):
+    def __init__(self):
+        super().__init__(size=5, hear_yourself=True)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCGuides-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCGuidesEnv'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCGuides-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCGuides6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCGuides-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCGuides8x8Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCGuidesHY-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCGuidesHY5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCGuidesHY-6x6-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPGuidesCHY6x6Env'
+)
+register(
+    id='MiniGrid-GoToDoorTalkHardSesameNPCGuidesHY-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorTalkHardSesameNPCGuidesHY8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/gotodoorwizard.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class simpleWizard(NPC):
+    """
+    A simple NPC that wants an agent to go to an object (randomly chosen among object_pos list)
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.has_spoken = False  # wizards only speak once
+        # Select a random target object as mission
+        obj_idx = self.env._rand_int(0, len(self.env.door_pos))
+        self.target_pos = self.env.door_pos[obj_idx]
+        self.target_color = self.env.door_colors[obj_idx]
+        # Generate the mission string
+        self.wizard_mission = 'go to the %s door' % self.target_color
+    def listen(self, utterance):
+        if not self.has_spoken:
+            self.has_spoken = True
+            return self.wizard_mission
+        return None
+    def is_satisfied(self):
+        ax, ay = self.env.agent_pos
+        tx, ty = self.target_pos
+        if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+            return True
+        return False
+    def is_near_agent(self):
+        ax, ay = self.env.agent_pos
+        wx, wy = self.cur_pos
+        if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+            return True
+        return False
+class GoToDoorWizard(MiniGridEnv):
+    """
+    Environment in which the agent is instructed to "please the wizard",
+    i.e. to go ask him for a quest (which is goto door)
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+    ):
+        assert size >= 5
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *Grammar.grammar_action_space.nvec
+            ])
+        )
+        self.hear_yourself = hear_yourself
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured NPC at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        self.wizard = simpleWizard(color, "Gandalf", self)
+        # Place it randomly, omitting front of door positions
+        self.place_obj(self.wizard,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+        # Generate the mission string
+        self.mission = 'please the wizard'
+        # Initialize the dialogue string
+        self.dialogue = "This is what you hear. "
+    def gen_obs(self):
+        obs = super().gen_obs()
+        # add dialogue to obs
+        obs["dialogue"] = self.dialogue
+        return obs
+    def step(self, action):
+        # dirty handle of action provided by manual_control todo improve
+        if type(action) == MiniGridEnv.Actions:
+            action = [action, None]
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        # check if near wizard
+        if self.wizard.is_near_agent():#p_action == self.actions.talk and self.near_wizard:
+            #utterance = Grammar.construct_utterance(utterance_action)
+            reply = self.wizard.listen("")
+            # if self.hear_yourself:
+            #     self.dialogue += "YOU: " + utterance
+            if reply:
+                self.dialogue += "{}: {}".format(self.wizard.name, reply)
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        # Reward performing done action if pleasing the wizard
+        if p_action == self.actions.done:
+            if self.wizard.is_satisfied():
+                reward = self._reward()
+            done = True
+        return obs, reward, done, info
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.set_caption(self.dialogue, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        self.window.fig.gca().set_title("goal: "+self.mission)
+        return obs
+class GoToDoorWizard5x5Env(GoToDoorWizard):
+    def __init__(self):
+        super().__init__(size=5)
+class GoToDoorWizard7x7Env(GoToDoorWizard):
+    def __init__(self):
+        super().__init__(size=7)
+class GoToDoorWizard8x8Env(GoToDoorWizard):
+    def __init__(self):
+        super().__init__(size=8)
+register(
+    id='MiniGrid-GoToDoorWizard-5x5-v0',
+    entry_point='gym_minigrid.envs:GoToDoorWizard5x5Env'
+)
+register(
+    id='MiniGrid-GoToDoorWizard-7x7-v0',
+    entry_point='gym_minigrid.envs:GoToDoorWizard7x7Env'
+)
+register(
+    id='MiniGrid-GoToDoorWizard-8x8-v0',
+    entry_point='gym_minigrid.envs:GoToDoorWizard8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/guidethief.py ADDED Viewed

	@@ -0,0 +1,416 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class Guide(NPC):
+    """
+    A simple NPC that knows the correct door.
+    """
+    def __init__(self, color, name, id, env, liar=False):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.liar = liar
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = id  # this will be put into the encoding
+        # Select a random target object as mission
+        obj_idx = self.env._rand_int(0, len(self.env.door_pos))
+        self.target_pos = self.env.door_pos[obj_idx]
+        self.target_color = self.env.door_colors[obj_idx]
+    def listen(self, utterance):
+        if utterance == GuideThiefGrammar.construct_utterance([0, 1]):
+            if self.liar:
+                fake_colors = [c for c in self.env.door_colors if c != self.env.target_color]
+                fake_color = self.env._rand_elem(fake_colors)
+                # Generate the mission string
+                assert fake_color != self.env.target_color
+                if self.env.one_word:
+                    return '%s' % fake_color
+                elif self.env.very_diff:
+                    return 'you want the %s door' % fake_color
+                else:
+                    return 'go to the %s door' % fake_color
+            else:
+                return self.env.mission
+        return None
+    def render(self, img):
+        c = COLORS[self.color]
+        npc_shapes = []
+        # Draw eyes
+        npc_shapes.append(point_in_circle(cx=0.70, cy=0.50, r=0.10))
+        npc_shapes.append(point_in_circle(cx=0.30, cy=0.50, r=0.10))
+        # Draw mouth
+        npc_shapes.append(point_in_rect(0.20, 0.80, 0.72, 0.81))
+        # todo: move this to super function
+        # todo: super.render should be able to take the npc_shapes and then rotate them
+        if hasattr(self, "npc_dir"):
+            # Pre-rotation to ensure npc_dir = 1 means NPC looks downwards
+            npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=-1*(math.pi / 2)) for v in npc_shapes]
+            # Rotate npc based on its direction
+            npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=(math.pi/2) * self.npc_dir) for v in npc_shapes]
+        # Draw shapes
+        for v in npc_shapes:
+            fill_coords(img, v, c)
+    def is_near_agent(self):
+        ax, ay = self.env.agent_pos
+        wx, wy = self.cur_pos
+        if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
+            return True
+        return False
+class GuideThiefGrammar(object):
+    templates = ["Where is", "Open", "Close", "What is"]
+    things = [
+        "sesame", "the exit", "the wall", "the floor", "the ceiling", "the window", "the entrance", "the closet",
+        "the drawer", "the fridge", "oven", "the lamp", "the trash can", "the chair", "the bed", "the sofa"
+    ]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class GuideThiefEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False,
+        nameless=False,
+        max_steps=None,
+        very_diff=False,
+        one_word=False,
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.nameless = nameless
+        self.very_diff = very_diff
+        self.one_word = one_word
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps or 5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *GuideThiefGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured WIZARD at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        # Place it randomly, omitting front of door positions
+        # add guides
+        GUIDE_NAMES = ["John", "Jack"]
+        name_2_id = {name: id for id, name in enumerate(GUIDE_NAMES)}
+        # Set a randomly coloured TRUE GUIDE at a random position
+        true_guide_name = GUIDE_NAMES[0]
+        color = self._rand_elem(COLOR_NAMES)
+        self.true_guide = Guide(
+            color=color,
+            name=true_guide_name,
+            id=name_2_id[true_guide_name],
+            env=self,
+            liar=False
+        )
+        # Place it randomly, omitting invalid positions
+        self.place_obj(self.true_guide,
+                       size=(width, height),
+                       # reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # Set a randomly coloured FALSE GUIDE at a random position
+        false_guide_name = GUIDE_NAMES[1]
+        if self.nameless:
+            color = self._rand_elem([c for c in COLOR_NAMES if c != self.true_guide.color])
+        else:
+            color = self._rand_elem(COLOR_NAMES)
+        self.false_guide = Guide(
+            color=color,
+            name=false_guide_name,
+            id=name_2_id[false_guide_name],
+            env=self,
+            liar=True
+        )
+        # Place it randomly, omitting invalid positions
+        self.place_obj(self.false_guide,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in [
+                           *self.door_front_pos, tuple(self.true_guide.cur_pos)])
+        assert self.true_guide.name != self.false_guide.name
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            utterance = GuideThiefGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                if self.nameless:
+                    self.utterance += "{} \n".format(utterance)
+                else:
+                    self.utterance += "YOU: {} \n".format(utterance)
+            self.conversation += "YOU: {} \n".format(utterance)
+            if self.true_guide.is_near_agent():
+                reply = self.true_guide.listen(utterance)
+                if reply:
+                    if self.nameless:
+                        self.utterance += "{} \n".format(reply)
+                    else:
+                        self.utterance += "{}: {} \n".format(self.true_guide.name, reply)
+                    self.conversation += "{}: {} \n".format(self.true_guide.name, reply)
+            if self.false_guide.is_near_agent():
+                reply = self.false_guide.listen(utterance)
+                if reply:
+                    if self.nameless:
+                        self.utterance += "{} \n".format(reply)
+                    else:
+                        self.utterance += "{}: {} \n".format(self.false_guide.name, reply)
+                    self.conversation += "{}: {} \n".format(self.false_guide.name, reply)
+            if utterance == GuideThiefGrammar.construct_utterance([1, 0]):
+                ax, ay = self.agent_pos
+                tx, ty = self.target_pos
+                if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                    reward = self._reward()
+                for dx, dy in self.door_pos:
+                    if (ax == dx and abs(ay - dy) == 1) or (ay == dy and abs(ax - dx) == 1):
+                        # agent has chosen some door episode, regardless of if the door is correct the episode is over
+                        done = True
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        if p_action == self.actions.done:
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print("conversation:\n", self.conversation)
+        print("utterance_history:\n", self.utterance_history)
+        self.window.set_caption(self.conversation, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class GuideThief8x8Env(GuideThiefEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class GuideThief6x6Env(GuideThiefEnv):
+    def __init__(self):
+        super().__init__(size=6)
+class GuideThiefNameless8x8Env(GuideThiefEnv):
+    def __init__(self):
+        super().__init__(size=8, nameless=True)
+class GuideThiefTestEnv(GuideThiefEnv):
+    def __init__(self):
+        super().__init__(
+            size=5,
+            nameless=False,
+            max_steps=20,
+        )
+class GuideThiefVeryDiff(GuideThiefEnv):
+    def __init__(self):
+        super().__init__(
+            size=5,
+            nameless=False,
+            max_steps=20,
+            very_diff=True,
+        )
+class GuideThiefOneWord(GuideThiefEnv):
+    def __init__(self):
+        super().__init__(
+            size=5,
+            nameless=False,
+            max_steps=20,
+            very_diff=False,
+            one_word=True
+        )
+register(
+    id='MiniGrid-GuideThief-5x5-v0',
+    entry_point='gym_minigrid.envs:GuideThiefEnv'
+)
+register(
+    id='MiniGrid-GuideThief-6x6-v0',
+    entry_point='gym_minigrid.envs:GuideThief6x6Env'
+)
+register(
+    id='MiniGrid-GuideThief-8x8-v0',
+    entry_point='gym_minigrid.envs:GuideThief8x8Env'
+)
+register(
+    id='MiniGrid-GuideThiefNameless-8x8-v0',
+    entry_point='gym_minigrid.envs:GuideThiefNameless8x8Env'
+)
+register(
+    id='MiniGrid-GuideThiefTest-v0',
+    entry_point='gym_minigrid.envs:GuideThiefTestEnv'
+)
+register(
+    id='MiniGrid-GuideThiefVeryDiff-v0',
+    entry_point='gym_minigrid.envs:GuideThiefVeryDiff'
+)
+register(
+    id='MiniGrid-GuideThiefOneWord-v0',
+    entry_point='gym_minigrid.envs:GuideThiefOneWord'
+)

gym-minigrid/gym_minigrid/backup_envs/helper.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import numpy as np
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class Peer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = 0
+        self.env = env
+        self.npc_actions = []
+        self.dancing_step_idx = 0
+        self.actions = MiniGridEnv.Actions
+        self.add_npc_direction = True
+        self.available_moves = [self.rotate_left, self.rotate_right, self.go_forward, self.toggle_action]
+        selected_door_id = self.env._rand_elem([0, 1])
+        self.selected_door_pos = [self.env.door_pos_top, self.env.door_pos_bottom][selected_door_id]
+        self.selected_door = [self.env.door_top, self.env.door_bottom][selected_door_id]
+        self.joint_attention_achieved = False
+    def can_overlap(self):
+        # If the NPC is hidden, agent can overlap on it
+        return self.env.hidden_npc
+    def encode(self, nb_dims=3):
+        if self.env.hidden_npc:
+            if nb_dims == 3:
+                return (1, 0, 0)
+            elif nb_dims == 4:
+                return (1, 0, 0, 0)
+        else:
+            return super().encode(nb_dims=nb_dims)
+    def step(self):
+        distance_to_door = np.abs(self.selected_door_pos - self.cur_pos).sum(-1)
+        if all(self.front_pos == self.selected_door_pos) and self.selected_door.is_open:
+            # in front of door
+            self.go_forward()
+        elif distance_to_door == 1 and not self.joint_attention_achieved:
+            # before turning to the door look at the agent
+            wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
+            act = self.compute_turn_action(wanted_dir)
+            act()
+            if self.is_eye_contact():
+                self.joint_attention_achieved = True
+        else:
+            act = self.path_to_toggle_pos(self.selected_door_pos)
+            act()
+        # not really important as the NPC doesn't speak
+        if self.env.hidden_npc:
+            return None
+class HelperGrammar(object):
+    templates = ["Move your", "Shake your"]
+    things = ["body", "head"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class HelperEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        diminished_reward=True,
+        step_penalty=False,
+        knowledgeable=False,
+        max_steps=20,
+        hidden_npc=False,
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.knowledgeable = knowledgeable
+        self.hidden_npc = hidden_npc
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *HelperGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        self.wall_x = width-1
+        self.wall_y = height-1
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # add lava
+        self.grid.vert_wall(width//2, 1, height - 2, Lava)
+        # door top
+        door_color_top = self._rand_elem(COLOR_NAMES)
+        self.door_pos_top = (width-1, 1)
+        self.door_top = Door(door_color_top, is_locked=True)
+        self.grid.set(*self.door_pos_top, self.door_top)
+        # switch top
+        self.switch_pos_top = (0, 1)
+        self.switch_top = Switch(door_color_top, lockable_object=self.door_top, locker_switch=True)
+        self.grid.set(*self.switch_pos_top, self.switch_top)
+        # door bottom
+        door_color_bottom = self._rand_elem(COLOR_NAMES)
+        self.door_pos_bottom = (width-1, height-2)
+        self.door_bottom = Door(door_color_bottom, is_locked=True)
+        self.grid.set(*self.door_pos_bottom, self.door_bottom)
+        # switch bottom
+        self.switch_pos_bottom = (0, height-2)
+        self.switch_bottom = Switch(door_color_bottom, lockable_object=self.door_bottom, locker_switch=True)
+        self.grid.set(*self.switch_pos_bottom, self.switch_bottom)
+        # save to variables
+        self.switches = [self.switch_top, self.switch_bottom]
+        self.switches_pos = [self.switch_pos_top, self.switch_pos_bottom]
+        self.door = [self.door_top, self.door_bottom]
+        self.door_pos = [self.door_pos_top, self.door_pos_bottom]
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        self.peer = Peer(color, "Jill", self)
+        # Place it on the middle right side of the room
+        peer_pos = np.array((self._rand_int(width//2+1, width - 1), self._rand_int(1, height - 1)))
+        self.grid.set(*peer_pos, self.peer)
+        self.peer.init_pos = peer_pos
+        self.peer.cur_pos = peer_pos
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width//2, height))
+        # Generate the mission string
+        self.mission = 'watch dancer and repeat his moves afterwards'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+        self.outcome_info = None
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        self.peer.step()
+        if np.isnan(p_action):
+            pass
+        if p_action == self.actions.done:
+            done = True
+        elif all(self.agent_pos == self.door_pos_top):
+            done = True
+        elif all(self.agent_pos == self.door_pos_bottom):
+            done = True
+        elif all([self.switch_top.is_on, self.switch_bottom.is_on]):
+            # if both switches are on no reward is given and episode ends
+            done = True
+        elif all(self.peer.cur_pos == self.peer.selected_door_pos):
+            reward = self._reward()
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        if self.hidden_npc:
+            # all npc are hidden
+            assert np.argwhere(obs['image'][:,:,0] == OBJECT_TO_IDX['npc']).size == 0
+            assert "{}:".format(self.peer.name) not in self.utterance
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        if done:
+            if reward > 0:
+                self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
+            else:
+                self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.clear_text()  # erase previous text
+        # self.window.set_caption(self.conversation, [self.peer.name])
+        # self.window.ax.set_title("correct door: {}".format(self.true_guide.target_color), loc="left", fontsize=10)
+        if self.outcome_info:
+            color = None
+            if "SUCCESS" in self.outcome_info:
+                color = "lime"
+            elif "FAILURE" in self.outcome_info:
+                color = "red"
+            self.window.add_text(*(0.01, 0.85, self.outcome_info),
+                                 **{'fontsize':15, 'color':color, 'weight':"bold"})
+        self.window.show_img(obs)  # re-draw image to add changes to window
+        return obs
+class Helper8x8Env(HelperEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, max_steps=20, **kwargs)
+class Helper6x6Env(HelperEnv):
+    def __init__(self):
+        super().__init__(size=6, max_steps=20)
+register(
+    id='MiniGrid-Helper-5x5-v0',
+    entry_point='gym_minigrid.envs:HelperEnv'
+)
+register(
+    id='MiniGrid-Helper-6x6-v0',
+    entry_point='gym_minigrid.envs:Helper6x6Env'
+)
+register(
+    id='MiniGrid-Helper-8x8-v0',
+    entry_point='gym_minigrid.envs:Helper8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/showme.py ADDED Viewed

	@@ -0,0 +1,525 @@

+import numpy as np
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class DemonstratingPeer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    """
+    def __init__(self, color, name, env, knowledgeable=False):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = 0
+        self.env = env
+        self.knowledgeable = knowledgeable
+        self.npc_actions = []
+        self.dancing_step_idx = 0
+        self.actions = MiniGridEnv.Actions
+        self.add_npc_direction = True
+        self.available_moves = [self.rotate_left, self.rotate_right, self.go_forward, self.toggle_action]
+        self.exited = False
+        self.joint_attention_achieved = False
+    def can_overlap(self):
+        # If the NPC is hidden, agent can overlap on it
+        return self.env.hidden_npc
+    def encode(self, nb_dims=3):
+        if self.env.hidden_npc:
+            if nb_dims == 3:
+                return (1, 0, 0)
+            elif nb_dims == 4:
+                return (1, 0, 0, 0)
+        else:
+            return super().encode(nb_dims=nb_dims)
+    def step(self):
+        super().step()
+        reply = None
+        if self.exited:
+            return
+        if all(np.array(self.cur_pos) == np.array(self.env.door_pos)):
+            # disappear
+            self.env.grid.set(*self.cur_pos, self.env.object)
+            self.cur_pos = np.array([np.nan, np.nan])
+            # close door
+            self.env.object.toggle(self.env, self.cur_pos)
+            # reset switches door
+            for s in self.env.switches:
+                s.is_on = False
+            # update door
+            self.env.update_door_lock()
+            self.exited = True
+        elif self.knowledgeable:
+            if self.joint_attention_achieved:
+                if self.env.object.is_locked:
+                    first_wrong_id = np.where(self.env.get_selected_password() != self.env.password)[0][0]
+                    goal_pos = self.env.switches_pos[first_wrong_id]
+                    act = self.path_to_toggle_pos(goal_pos)
+                    act()
+                else:
+                    if all(self.front_pos == self.env.door_pos) and self.env.object.is_open:
+                        self.go_forward()
+                    else:
+                        act = self.path_to_toggle_pos(self.env.door_pos)
+                        act()
+            else:
+                wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
+                action = self.compute_turn_action(wanted_dir)
+                action()
+                if self.is_eye_contact():
+                    self.joint_attention_achieved = True
+                    reply = "Look at me"
+        else:
+            self.env._rand_elem(self.available_moves)()
+        self.env.update_door_lock()
+        if self.env.hidden_npc:
+            reply = None
+        return reply
+class DemonstrationGrammar(object):
+    templates = ["Move your", "Shake your"]
+    things = ["body", "head"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class DemonstrationEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        diminished_reward=True,
+        step_penalty=False,
+        knowledgeable=False,
+        hard_password=False,
+        max_steps=100,
+        n_switches=3,
+        augmentation=False,
+        stump=False,
+        no_turn_off=False,
+        no_light=False,
+        hidden_npc=False
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.knowledgeable = knowledgeable
+        self.hard_password = hard_password
+        self.n_switches = n_switches
+        self.augmentation = augmentation
+        self.stump = stump
+        self.no_turn_off=no_turn_off
+        self.hidden_npc = hidden_npc
+        if self.augmentation:
+           assert not no_light
+        self.no_light = no_light
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps,
+            # Set this to True for maximum speed
+            see_through_walls=False if self.stump else True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *DemonstrationGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def get_selected_password(self):
+        return np.array([int(s.is_on) for s in self.switches])
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        self.wall_x = width - 1
+        self.wall_y = height - 1
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        door_color = self._rand_elem(COLOR_NAMES)
+        if self.stump:
+            wall_for_door = 1
+        else:
+            wall_for_door = self._rand_int(1, 4)
+        if wall_for_door < 2:
+            w = self._rand_int(1, width-1)
+            h = height-1 if wall_for_door == 0 else 0
+        else:
+            w = width-1 if wall_for_door == 3 else 0
+            h = self._rand_int(1, height-1)
+        assert h != height-1  # door mustn't be on the bottom wall
+        self.door_pos = (w, h)
+        self.door = Door(door_color, is_locked=True)
+        self.grid.set(*self.door_pos, self.door)
+        if self.stump:
+            self.stump_pos = (w, h+2)
+            self.stump_obj = Wall()
+            self.grid.set(*self.stump_pos, self.stump_obj)
+        # sample password
+        if self.hard_password:
+            self.password = np.array([self._rand_int(0, 2) for _ in range(self.n_switches)])
+        else:
+            idx = self._rand_int(0, self.n_switches)
+            self.password = np.zeros(self.n_switches)
+            self.password[idx] = 1.0
+        # add the switches
+        self.switches = []
+        self.switches_pos = []
+        for i in range(self.n_switches):
+            c = COLOR_NAMES[i]
+            pos = np.array([i+1, height-1])
+            sw = Switch(c, is_on=bool(self.password[i]) if self.augmentation else False, no_light=self.no_light)
+            self.grid.set(*pos, sw)
+            self.switches.append(sw)
+            self.switches_pos.append(pos)
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        if not self.augmentation:
+            self.peer = DemonstratingPeer(color, "Jim", self, knowledgeable=self.knowledgeable)
+            # height -2 so its not in front of the buttons in the way
+            peer_pos = np.array((self._rand_int(1, width - 1), self._rand_int(1, height - 2)))
+            self.grid.set(*peer_pos, self.peer)
+            self.peer.init_pos = peer_pos
+            self.peer.cur_pos = peer_pos
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Generate the mission string
+        self.mission = 'exit the room'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+        self.outcome_info = None
+    def update_door_lock(self):
+        if self.augmentation and self.step_count <= 10:
+                self.door.is_locked = True
+                self.door.is_open = False
+        else:
+            if np.array_equal(self.get_selected_password(), self.password):
+                self.door.is_locked = False
+            else:
+                self.door.is_locked = True
+                self.door.is_open = False
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        self.update_door_lock()
+        # print("pass:", self.password)
+        # print("selected pass:", self.get_selected_password())
+        if self.augmentation and self.step_count == 10:
+            # reset switches door
+            for s in self.switches:
+                s.is_on = False
+            # update door
+            self.update_door_lock()
+        if p_action == self.actions.done:
+            done = True
+        if not self.augmentation:
+            peer_reply = self.peer.step()
+            if peer_reply is not None:
+                self.utterance += "{}: {} \n".format(self.peer.name, peer_reply)
+                self.conversation += "{}: {} \n".format(self.peer.name, peer_reply)
+        if all(self.agent_pos == self.door_pos):
+            done = True
+            if not self.augmentation:
+                if self.peer.exited:
+                    # only give reward if both exited
+                    reward = self._reward()
+            else:
+                reward = self._reward()
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        if self.hidden_npc:
+            # all npc are hidden
+            assert np.argwhere(obs['image'][:,:,0] == OBJECT_TO_IDX['npc']).size == 0
+            if not self.augmentation:
+                assert "{}:".format(self.peer.name) not in self.utterance
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        if done:
+            if reward > 0:
+                self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
+            else:
+                self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        self.window.clear_text()  # erase previous text
+        self.window.set_caption(self.conversation)
+        sw_color = self.switches[np.argmax(self.password)].color
+        self.window.ax.set_title("correct switch: {}".format(sw_color), loc="left", fontsize=10)
+        if self.outcome_info:
+            color = None
+            if "SUCCESS" in self.outcome_info:
+                color = "lime"
+            elif "FAILURE" in self.outcome_info:
+                color = "red"
+            self.window.add_text(*(0.01, 0.85, self.outcome_info),
+                                 **{'fontsize':15, 'color':color, 'weight':"bold"})
+        self.window.show_img(obs)  # re-draw image to add changes to window
+        return obs
+## 100 Demonstrating
+# register(
+#     id='MiniGrid-DemonstrationNoLightNoTurnOff100-8x8-v0',
+#     entry_point='gym_minigrid.envs:DemonstrationNoLightNoTurnOff1008x8Env'
+# )
+#class Demonstration100TwoSwitches8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=2)
+#
+#class Demonstration100TwoSwitchesHard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=2, hard_password=True)
+#
+## 100 AUG Demonstrating
+#class AugmentationDemonstration100TwoSwitches8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=2, augmentation=True)
+#
+#class AugmentationDemonstration100TwoSwitchesHard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=2, hard_password=True, augmentation=True)
+#
+#
+## Three switches
+## 100 Demonstrating
+#class Demonstration1008x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100)
+#
+#class Demonstration100Hard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, hard_password=True)
+#
+## 100 AUG Demonstrating
+#class AugmentationDemonstration1008x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, augmentation=True)
+#
+#class AugmentationDemonstration100Hard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, hard_password=True, augmentation=True)
+#
+## No turn off
+## 100 Demonstrating:  No light, no turn off
+#
+#class DemonstrationNoLightNoTurnOff100Hard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, hard_password=True, no_light=True)
+#
+## 100 no turn off
+#class DemonstrationNoTurnOff1008x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True)
+#
+#class DemonstrationNoTurnOff100Hard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, hard_password=True)
+#
+## 100 AUG Demonstrating
+#
+#class AugmentationDemonstrationNoTurnOff100Hard8x8Env(DemonstrationEnv):
+#    def __init__(self):
+#        super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, hard_password=True, augmentation=True)
+## demonstrating 100 steps
+#register(
+#    id='MiniGrid-Demonstration100TwoSwitches-8x8-v0',
+#    entry_point='gym_minigrid.envs:Demonstration100TwoSwitches8x8Env'
+#)
+#register(
+#    id='MiniGrid-Demonstration100TwoSwitchesHard-8x8-v0',
+#    entry_point='gym_minigrid.envs:Demonstration100TwoSwitchesHard8x8Env'
+#)
+#
+## AUG demonstrating 100 steps
+#register(
+#    id='MiniGrid-AugmentationDemonstration100TwoSwitches-8x8-v0',
+#    entry_point='gym_minigrid.envs:AugmentationDemonstration100TwoSwitches8x8Env'
+#)
+#register(
+#    id='MiniGrid-AugmentationDemonstration100TwoSwitchesHard-8x8-v0',
+#    entry_point='gym_minigrid.envs:AugmentationDemonstration100TwoSwitchesHard8x8Env'
+#)
+#
+## three switches
+#
+## demonstrating 100 steps
+#register(
+#    id='MiniGrid-Demonstration100-8x8-v0',
+#    entry_point='gym_minigrid.envs:Demonstration1008x8Env'
+#)
+#register(
+#    id='MiniGrid-Demonstration100Hard-8x8-v0',
+#    entry_point='gym_minigrid.envs:Demonstration100Hard8x8Env'
+#)
+#
+## AUG demonstrating 100 steps
+#register(
+#    id='MiniGrid-AugmentationDemonstration100-8x8-v0',
+#    entry_point='gym_minigrid.envs:AugmentationDemonstration1008x8Env'
+#)
+#register(
+#    id='MiniGrid-AugmentationDemonstration100Hard-8x8-v0',
+#    entry_point='gym_minigrid.envs:AugmentationDemonstration100Hard8x8Env'
+#)
+#
+## no turn off three switches
+#
+## demonstrating 100 steps
+#register(
+#    id='MiniGrid-DemonstrationNoTurnOff100-8x8-v0',
+#    entry_point='gym_minigrid.envs:DemonstrationNoTurnOff1008x8Env'
+#)
+#register(
+#    id='MiniGrid-DemonstrationNoTurnOff100Hard-8x8-v0',
+#    entry_point='gym_minigrid.envs:DemonstrationNoTurnOff100Hard8x8Env'
+#)
+#
+## demonstrating 100 steps no light
+#register(
+#    id='MiniGrid-DemonstrationNoLightNoTurnOff100-8x8-v0',
+#    entry_point='gym_minigrid.envs:DemonstrationNoLightNoTurnOff1008x8Env'
+#)
+#register(
+#    id='MiniGrid-DemonstrationNoLightNoTurnOff100Hard-8x8-v0',
+#    entry_point='gym_minigrid.envs:DemonstrationNoLightNoTurnOff100Hard8x8Env'
+#)
+#
+## AUG demonstrating 100 steps
+#register(
+#    id='MiniGrid-AugmentationDemonstrationNoTurnOff100-8x8-v0',
+#    entry_point='gym_minigrid.envs:AugmentationDemonstrationNoTurnOff1008x8Env'
+#)
+#register(
+#    id='MiniGrid-AugmentationDemonstrationNoTurnOff100Hard-8x8-v0',
+#    entry_point='gym_minigrid.envs:AugmentationDemonstrationNoTurnOff100Hard8x8Env'
+#)
+# register(
+#     id='MiniGrid-AugmentationDemonstrationNoTurnOff100-8x8-v0',
+#     entry_point='gym_minigrid.envs:AugmentationDemonstrationNoTurnOff1008x8Env'
+# )
+#
+# class DemonstrationNoLightNoTurnOff1008x8Env(DemonstrationEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, no_light=True)
+#
+# class AugmentationDemonstrationNoTurnOff1008x8Env(DemonstrationEnv):
+#     def __init__(self):
+#         super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, augmentation=True)
+class ShowMe8x8Env(DemonstrationEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, no_light=True, **kwargs)
+class ShowMeNoSocial8x8Env(DemonstrationEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, knowledgeable=True, max_steps=100, no_turn_off=True, augmentation=True, **kwargs)
+# AUG demonstrating 100 steps
+register(
+    id='MiniGrid-ShowMeNoSocial-8x8-v0',
+    entry_point='gym_minigrid.envs:ShowMeNoSocial8x8Env'
+)
+register(
+    id='MiniGrid-ShowMe-8x8-v0',
+    entry_point='gym_minigrid.envs:ShowMe8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/socialenv.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from itertools import chain
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+from gym_minigrid.envs import DanceWithOneNPC8x8Env, CoinThief8x8Env, TalkItOutPolite8x8Env, ShowMe8x8Env, \
+    DiverseExit8x8Env, Exiter8x8Env, Helper8x8Env
+from gym_minigrid.envs import DanceWithOneNPCGrammar, CoinThiefGrammar, TalkItOutPoliteGrammar, DemonstrationGrammar, \
+    EasyTeachingGamesGrammar, ExiterGrammar
+import time
+from collections import deque
+class SocialEnvMetaGrammar(object):
+    def __init__(self, grammar_list, env_list):
+        self.templates = []
+        self.things = []
+        self.original_template_idx = []
+        self.original_thing_idx = []
+        self.meta_template_idx_to_env_name = {}
+        self.meta_thing_idx_to_env_name = {}
+        self.template_idx, self.thing_idx = 0, 0
+        env_names = [e.__class__.__name__ for e in env_list]
+        for g, env_name in zip(grammar_list, env_names):
+            # add templates
+            self.templates += g.templates
+            # add things
+            self.things += g.things
+            # save original idx for both
+            self.original_template_idx += list(range(0, len(g.templates)))
+            self.original_thing_idx += list(range(0, len(g.things)))
+            # update meta_idx to env_names dictionaries
+            self.meta_template_idx_to_env_name.update(dict.fromkeys(list(range(self.template_idx,
+                                                                               self.template_idx + len(g.templates))),
+                                                                    env_name))
+            self.template_idx += len(g.templates)
+            self.meta_thing_idx_to_env_name.update(dict.fromkeys(list(range(self.thing_idx,
+                                                                            self.thing_idx + len(g.things))),
+                                                                 env_name))
+            self.thing_idx += len(g.things)
+        self.grammar_action_space = spaces.MultiDiscrete([len(self.templates), len(self.things)])
+    @classmethod
+    def construct_utterance(self, action):
+        return self.templates[int(action[0])] + " " + self.things[int(action[1])] + " "
+    @classmethod
+    def random_utterance(self):
+        return np.random.choice(self.templates) + " " + np.random.choice(self.things) + " "
+    def construct_original_action(self, action, current_env_name):
+        template_env_name = self.meta_template_idx_to_env_name[int(action[0])]
+        thing_env_name = self.meta_thing_idx_to_env_name[int(action[1])]
+        if template_env_name == current_env_name and thing_env_name == current_env_name:
+            original_action = [self.original_template_idx[int(action[0])], self.original_thing_idx[int(action[1])]]
+        else:
+            original_action = [np.nan, np.nan]
+        return original_action
+class SocialEnv(gym.Env):
+    """
+    Meta-Environment containing all other environment (multi-task learning)
+    """
+    def __init__(
+            self,
+            size=8,
+            hidden_npc=False,
+            is_test_env=False
+    ):
+        # Number of cells (width and height) in the agent view
+        self.agent_view_size = 7
+        # Number of object dimensions (i.e. number of channels in symbolic image)
+        self.nb_obj_dims = 4
+        # Observations are dictionaries containing an
+        # encoding of the grid and a textual 'mission' string
+        self.observation_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(self.agent_view_size, self.agent_view_size, self.nb_obj_dims),
+            dtype='uint8'
+        )
+        self.observation_space = spaces.Dict({
+            'image': self.observation_space
+        })
+        self.hidden_npc = hidden_npc  # TODO: implement hidden npc
+        # TODO get max step from env list
+        self.env_list = [DanceWithOneNPC8x8Env, CoinThief8x8Env, TalkItOutPolite8x8Env, ShowMe8x8Env, DiverseExit8x8Env,
+                         Exiter8x8Env]
+        self.all_npc_utterance_actions = sorted(list(set(chain(*[e.all_npc_utterance_actions for e in self.env_list]))))
+        self.grammar_list = [DanceWithOneNPCGrammar, CoinThiefGrammar, TalkItOutPoliteGrammar, DemonstrationGrammar,
+                             EasyTeachingGamesGrammar, ExiterGrammar]
+        if is_test_env:
+            self.env_list[-1] = Helper8x8Env
+        # instanciate all envs
+        self.env_list = [env() for env in self.env_list]
+        self.current_env = None
+        self.metaGrammar = SocialEnvMetaGrammar(self.grammar_list, self.env_list)
+        # Actions are discrete integer values
+        self.action_space = spaces.MultiDiscrete([len(MiniGridEnv.Actions),
+                                                  *self.metaGrammar.grammar_action_space.nvec])
+        self.actions = MiniGridEnv.Actions
+        self._window = None
+    def reset(self):
+        # select a new social environment at random, for each new episode
+        old_window = None
+        if self.current_env:  # a previous env exists, save old window
+            old_window = self.current_env.window
+        # sample new environment
+        self.current_env = np.random.choice(self.env_list)
+        obs = self.current_env.reset()
+        # carry on window if this env is not the first
+        if old_window:
+            self.current_env.window = old_window
+        return obs
+    def seed(self, seed=1337):
+        # Seed the random number generator
+        for env in self.env_list:
+            env.seed(seed)
+        np.random.seed(seed)
+        return [seed]
+    def step(self, action):
+        assert (self.current_env)
+        if len(action) == 1:  # agent cannot speak
+            utterance_action = [np.nan, np.nan]
+        else:
+            utterance_action = action[1:]
+        if len(action) >= 1 and not all(np.isnan(utterance_action)):  # if agent speaks, contruct env-specific action
+            action[1:] = self.metaGrammar.construct_original_action(action[1:], self.current_env.__class__.__name__)
+        return self.current_env.step(action)
+    @property
+    def window(self):
+        return self.current_env.window
+    @window.setter
+    def window(self, value):
+        self.current_env.window = value
+    def render(self, *args, **kwargs):
+        assert self.current_env
+        return self.current_env.render(*args, **kwargs)
+    @property
+    def step_count(self):
+        return self.current_env.step_count
+    def get_mission(self):
+        return self.current_env.get_mission()
+class SocialEnv8x8Env(SocialEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=8, **kwargs)
+register(
+    id='MiniGrid-SocialEnv-5x5-v0',
+    entry_point='gym_minigrid.envs:SocialEnvEnv'
+)
+register(
+    id='MiniGrid-SocialEnv-8x8-v0',
+    entry_point='gym_minigrid.envs:SocialEnv8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/spying.py ADDED Viewed

	@@ -0,0 +1,429 @@

+import numpy as np
+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+import time
+from collections import deque
+class Peer(NPC):
+    """
+    A dancing NPC that the agent has to copy
+    """
+    def __init__(self, color, name, env, knowledgeable=False):
+        super().__init__(color)
+        self.name = name
+        self.npc_dir = 1  # NPC initially looks downward
+        self.npc_type = 0
+        self.env = env
+        self.knowledgeable = knowledgeable
+        self.npc_actions = []
+        self.dancing_step_idx = 0
+        self.actions = MiniGridEnv.Actions
+        self.add_npc_direction = True
+        self.available_moves = [self.rotate_left, self.rotate_right, self.go_forward, self.toggle_action]
+        self.exited = False
+    def step(self):
+        if self.exited:
+            return
+        if all(np.array(self.cur_pos) == np.array(self.env.door_pos)):
+            # disappear
+            self.env.grid.set(*self.cur_pos, self.env.object)
+            self.cur_pos = np.array([np.nan, np.nan])
+            # close door
+            self.env.object.toggle(self.env, self.cur_pos)
+            # reset switches door
+            for s in self.env.switches:
+                s.is_on = False
+            # update door
+            self.env.update_door_lock()
+            self.exited = True
+        elif self.knowledgeable:
+            if self.env.object.is_locked:
+                first_wrong_id = np.where(self.env.get_selected_password() != self.env.password)[0][0]
+                print("first_wrong_id:", first_wrong_id)
+                goal_pos = self.env.switches_pos[first_wrong_id]
+                act = self.path_to_toggle_pos(goal_pos)
+                act()
+            else:
+                if all(self.front_pos == self.env.door_pos) and self.env.object.is_open:
+                    self.go_forward()
+                else:
+                    act = self.path_to_toggle_pos(self.env.door_pos)
+                    act()
+        else:
+            self.env._rand_elem(self.available_moves)()
+        self.env.update_door_lock()
+class SpyingGrammar(object):
+    templates = ["Move your", "Shake your"]
+    things = ["body", "head"]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class SpyingEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        diminished_reward=True,
+        step_penalty=False,
+        knowledgeable=False,
+        hard_password=False,
+        max_steps=None,
+        n_switches=3
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.knowledgeable = knowledgeable
+        self.hard_password = hard_password
+        self.n_switches = n_switches
+        super().__init__(
+            grid_size=size,
+            max_steps=max_steps or 5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *SpyingGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def get_selected_password(self):
+        return np.array([int(s.is_on) for s in self.switches])
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        self.wall_x = width - 1
+        self.wall_y = height - 1
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        door_color = self._rand_elem(COLOR_NAMES)
+        wall_for_door = self._rand_int(1, 4)
+        if wall_for_door < 2:
+            w = self._rand_int(1, width-1)
+            h = height-1 if wall_for_door == 0 else 0
+        else:
+            w = width-1 if wall_for_door == 3 else 0
+            h = self._rand_int(1, height-1)
+        assert h != height-1  # door mustn't be on the bottom wall
+        self.door_pos = (w, h)
+        self.door = Door(door_color, is_locked=True)
+        self.grid.set(*self.door_pos, self.door)
+        # add the switches
+        self.switches = []
+        self.switches_pos = []
+        for i in range(self.n_switches):
+            c = COLOR_NAMES[i]
+            pos = np.array([i+1, height-1])
+            sw = Switch(c)
+            self.grid.set(*pos, sw)
+            self.switches.append(sw)
+            self.switches_pos.append(pos)
+        # sample password
+        if self.hard_password:
+            self.password = np.array([self._rand_int(0, 2) for _ in range(self.n_switches)])
+        else:
+            idx = self._rand_int(0, self.n_switches)
+            self.password = np.zeros(self.n_switches)
+            self.password[idx] = 1.0
+        # Set a randomly coloured Dancer NPC
+        color = self._rand_elem(COLOR_NAMES)
+        self.peer = Peer(color, "Jim", self, knowledgeable=self.knowledgeable)
+        # Place it on the middle left side of the room
+        peer_pos = np.array((self._rand_int(1, width - 1), self._rand_int(1, height - 1)))
+        self.grid.set(*peer_pos, self.peer)
+        self.peer.init_pos = peer_pos
+        self.peer.cur_pos = peer_pos
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Generate the mission string
+        self.mission = 'exit the room'
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+    def update_door_lock(self):
+        if np.array_equal(self.get_selected_password(), self.password):
+            self.door.is_locked = False
+        else:
+            self.door.is_locked = True
+            self.door.is_open = False
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        obs, reward, done, info = super().step(p_action)
+        self.update_door_lock()
+        print("pass:", self.password)
+        if p_action == self.actions.done:
+            done = True
+        self.peer.step()
+        if all(self.agent_pos == self.door_pos):
+            done = True
+            if self.peer.exited:
+                # only give reward of both exited
+                reward = self._reward()
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print("conversation:\n", self.conversation)
+        print("utterance_history:\n", self.utterance_history)
+        self.window.set_caption(self.conversation, [self.peer.name])
+        return obs
+class Spying8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class Spying6x6Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=6)
+# knowledgeable
+class SpyingKnowledgeableEnv(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=5, knowledgeable=True)
+class SpyingKnowledgeable6x6Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=6, knowledgeable=True)
+class SpyingKnowledgeable8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True)
+class SpyingKnowledgeableHardPassword8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, hard_password=True)
+class Spying508x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, max_steps=50)
+class SpyingKnowledgeable508x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=50)
+class SpyingKnowledgeableHardPassword508x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, hard_password=True, max_steps=50)
+class SpyingKnowledgeable1008x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=100)
+class SpyingKnowledgeable100OneSwitch8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=1)
+class SpyingKnowledgeable50OneSwitch5x5Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=5, knowledgeable=True, max_steps=50, n_switches=1)
+class SpyingKnowledgeable505x5Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=5, knowledgeable=True, max_steps=50, n_switches=3)
+class SpyingKnowledgeable50TwoSwitches8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=50, n_switches=2)
+class SpyingKnowledgeable50TwoSwitchesHard8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=50, n_switches=2, hard_password=True)
+class SpyingKnowledgeable100TwoSwitches8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=2)
+class SpyingKnowledgeable100TwoSwitchesHard8x8Env(SpyingEnv):
+    def __init__(self):
+        super().__init__(size=8, knowledgeable=True, max_steps=100, n_switches=2, hard_password=True)
+register(
+    id='MiniGrid-Spying-5x5-v0',
+    entry_point='gym_minigrid.envs:SpyingEnv'
+)
+register(
+    id='MiniGrid-Spying-6x6-v0',
+    entry_point='gym_minigrid.envs:Spying6x6Env'
+)
+register(
+    id='MiniGrid-Spying-8x8-v0',
+    entry_point='gym_minigrid.envs:Spying8x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable-5x5-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeableEnv'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable-6x6-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable6x6Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable8x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeableHardPassword-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeableHardPassword8x8Env'
+)
+# max len 50
+register(
+    id='MiniGrid-Spying50-8x8-v0',
+    entry_point='gym_minigrid.envs:Spying508x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable50-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable508x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeableHardPassword50-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeableHardPassword508x8Env'
+)
+# max len 100
+register(
+    id='MiniGrid-SpyingKnowledgeable100-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable1008x8Env'
+)
+# max len OneSwitch
+register(
+    id='MiniGrid-SpyingKnowledgeable100OneSwitch-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable100OneSwitch8x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable50OneSwitch-5x5-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable50OneSwitch5x5Env'
+)
+register(
+    id='MiniGrid-SpyingUnknowledgeable50OneSwitch-5x5-v0',
+    entry_point='gym_minigrid.envs:SpyingUnknowledgeable50OneSwitch5x5Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable50-5x5-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable505x5Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable50TwoSwitches-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable50TwoSwitches8x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable50TwoSwitchesHard-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable50TwoSwitchesHard8x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable100TwoSwitches-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable100TwoSwitches8x8Env'
+)
+register(
+    id='MiniGrid-SpyingKnowledgeable100TwoSwitchesHard-8x8-v0',
+    entry_point='gym_minigrid.envs:SpyingKnowledgeable100TwoSwitchesHard8x8Env'
+)

gym-minigrid/gym_minigrid/backup_envs/talkitout.py ADDED Viewed

	@@ -0,0 +1,385 @@

+from gym_minigrid.minigrid import *
+from gym_minigrid.register import register
+class Wizard(NPC):
+    """
+    A simple NPC that knows who is telling the truth
+    """
+    def __init__(self, color, name, env):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.npc_dir = 1  # NPC initially looks downward
+        # todo: this should be id == name
+        self.npc_type = 0  # this will be put into the encoding
+    def listen(self, utterance):
+        if utterance == TalkItOutGrammar.construct_utterance([0, 1]):
+            if self.env.nameless:
+                return "Ask the {} guide.".format(self.env.true_guide.color)
+            else:
+                return "Ask {}.".format(self.env.true_guide.name)
+        return None
+class Guide(NPC):
+    """
+    A simple NPC that knows the correct door.
+    """
+    def __init__(self, color, name, env, liar=False):
+        super().__init__(color)
+        self.name = name
+        self.env = env
+        self.liar = liar
+        self.npc_dir = 1  # NPC initially looks downward
+        # todo: this should be id == name
+        self.npc_type = 1  # this will be put into the encoding
+        # Select a random target object as mission
+        obj_idx = self.env._rand_int(0, len(self.env.door_pos))
+        self.target_pos = self.env.door_pos[obj_idx]
+        self.target_color = self.env.door_colors[obj_idx]
+    def listen(self, utterance):
+        if utterance == TalkItOutGrammar.construct_utterance([0, 1]):
+            if self.liar:
+                fake_colors = [c for c in self.env.door_colors if c != self.env.target_color]
+                fake_color = self.env._rand_elem(fake_colors)
+                # Generate the mission string
+                assert fake_color != self.env.target_color
+                return 'go to the %s door' % fake_color
+            else:
+                return self.env.mission
+        return None
+    def render(self, img):
+        c = COLORS[self.color]
+        npc_shapes = []
+        # Draw eyes
+        npc_shapes.append(point_in_circle(cx=0.70, cy=0.50, r=0.10))
+        npc_shapes.append(point_in_circle(cx=0.30, cy=0.50, r=0.10))
+        # Draw mouth
+        npc_shapes.append(point_in_rect(0.20, 0.80, 0.72, 0.81))
+        # todo: move this to super function
+        # todo: super.render should be able to take the npc_shapes and then rotate them
+        if hasattr(self, "npc_dir"):
+            # Pre-rotation to ensure npc_dir = 1 means NPC looks downwards
+            npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=-1*(math.pi / 2)) for v in npc_shapes]
+            # Rotate npc based on its direction
+            npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=(math.pi/2) * self.npc_dir) for v in npc_shapes]
+        # Draw shapes
+        for v in npc_shapes:
+            fill_coords(img, v, c)
+class TalkItOutGrammar(object):
+    templates = ["Where is", "Open", "Close", "What is"]
+    things = [
+        "sesame", "the exit", "the wall", "the floor", "the ceiling", "the window", "the entrance", "the closet",
+        "the drawer", "the fridge", "oven", "the lamp", "the trash can", "the chair", "the bed", "the sofa"
+    ]
+    grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])
+    @classmethod
+    def construct_utterance(cls, action):
+        return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "
+class TalkItOutEnv(MultiModalMiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+    def __init__(
+        self,
+        size=5,
+        hear_yourself=False,
+        diminished_reward=True,
+        step_penalty=False,
+        nameless=False,
+    ):
+        assert size >= 5
+        self.empty_symbol = "NA \n"
+        self.hear_yourself = hear_yourself
+        self.diminished_reward = diminished_reward
+        self.step_penalty = step_penalty
+        self.nameless = nameless
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+            actions=MiniGridEnv.Actions,
+            action_space=spaces.MultiDiscrete([
+                len(MiniGridEnv.Actions),
+                *TalkItOutGrammar.grammar_action_space.nvec
+            ]),
+            add_npc_direction=True
+        )
+        print({
+            "size": size,
+            "hear_yourself": hear_yourself,
+            "diminished_reward": diminished_reward,
+            "step_penalty": step_penalty,
+        })
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height, nb_obj_dims=4)
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+        # Generate the 4 doors at random positions
+        self.door_pos = []
+        self.door_front_pos = []  # Remembers positions in front of door to avoid setting wizard here
+        self.door_pos.append((self._rand_int(2, width-2), 0))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1]+1))
+        self.door_pos.append((self._rand_int(2, width-2), height-1))
+        self.door_front_pos.append((self.door_pos[-1][0], self.door_pos[-1][1] - 1))
+        self.door_pos.append((0, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] + 1, self.door_pos[-1][1]))
+        self.door_pos.append((width-1, self._rand_int(2, height-2)))
+        self.door_front_pos.append((self.door_pos[-1][0] - 1, self.door_pos[-1][1]))
+        # Generate the door colors
+        self.door_colors = []
+        while len(self.door_colors) < len(self.door_pos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in self.door_colors:
+                continue
+            self.door_colors.append(color)
+        # Place the doors in the grid
+        for idx, pos in enumerate(self.door_pos):
+            color = self.door_colors[idx]
+            self.grid.set(*pos, Door(color))
+        # Set a randomly coloured WIZARD at a random position
+        color = self._rand_elem(COLOR_NAMES)
+        self.wizard = Wizard(color, "Gandalf", self)
+        # Place it randomly, omitting front of door positions
+        self.place_obj(self.wizard,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+        # add guides
+        GUIDE_NAMES = ["John", "Jack"]
+        # Set a randomly coloured TRUE GUIDE at a random position
+        name = self._rand_elem(GUIDE_NAMES)
+        color = self._rand_elem(COLOR_NAMES)
+        self.true_guide = Guide(color, name, self, liar=False)
+        # Place it randomly, omitting invalid positions
+        self.place_obj(self.true_guide,
+                       size=(width, height),
+                       # reject_fn=lambda _, p: tuple(p) in self.door_front_pos)
+                       reject_fn=lambda _, p: tuple(p) in [*self.door_front_pos, tuple(self.wizard.cur_pos)])
+        # Set a randomly coloured FALSE GUIDE at a random position
+        name = self._rand_elem([n for n in GUIDE_NAMES if n != self.true_guide.name])
+        if self.nameless:
+            color = self._rand_elem([c for c in COLOR_NAMES if c != self.true_guide.color])
+        else:
+            color = self._rand_elem(COLOR_NAMES)
+        self.false_guide = Guide(color, name, self, liar=True)
+        # Place it randomly, omitting invalid positions
+        self.place_obj(self.false_guide,
+                       size=(width, height),
+                       reject_fn=lambda _, p: tuple(p) in [
+                           *self.door_front_pos, tuple(self.wizard.cur_pos), tuple(self.true_guide.cur_pos)])
+        assert self.true_guide.name != self.false_guide.name
+        # Randomize the agent's start position and orientation
+        self.place_agent(size=(width, height))
+        # Select a random target door
+        self.doorIdx = self._rand_int(0, len(self.door_pos))
+        self.target_pos = self.door_pos[self.doorIdx]
+        self.target_color = self.door_colors[self.doorIdx]
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+        # Dummy beginning string
+        self.beginning_string = "This is what you hear. \n"
+        self.utterance = self.beginning_string
+        # utterance appended at the end of each step
+        self.utterance_history = ""
+        # used for rendering
+        self.conversation = self.utterance
+    def step(self, action):
+        p_action = action[0]
+        utterance_action = action[1:]
+        # assert all nan or neither nan
+        assert len(set(np.isnan(utterance_action))) == 1
+        speak_flag = not all(np.isnan(utterance_action))
+        obs, reward, done, info = super().step(p_action)
+        if speak_flag:
+            utterance = TalkItOutGrammar.construct_utterance(utterance_action)
+            if self.hear_yourself:
+                if self.nameless:
+                    self.utterance += "{} \n".format(utterance)
+                else:
+                    self.utterance += "YOU: {} \n".format(utterance)
+            self.conversation += "YOU: {} \n".format(utterance)
+            # check if near wizard
+            if self.wizard.is_near_agent():
+                reply = self.wizard.listen(utterance)
+                if reply:
+                    if self.nameless:
+                        self.utterance += "{} \n".format(reply)
+                    else:
+                        self.utterance += "{}: {} \n".format(self.wizard.name, reply)
+                    self.conversation += "{}: {} \n".format(self.wizard.name, reply)
+            if self.true_guide.is_near_agent():
+                reply = self.true_guide.listen(utterance)
+                if reply:
+                    if self.nameless:
+                        self.utterance += "{} \n".format(reply)
+                    else:
+                        self.utterance += "{}: {} \n".format(self.true_guide.name, reply)
+                    self.conversation += "{}: {} \n".format(self.true_guide.name, reply)
+            if self.false_guide.is_near_agent():
+                reply = self.false_guide.listen(utterance)
+                if reply:
+                    if self.nameless:
+                        self.utterance += "{} \n".format(reply)
+                    else:
+                        self.utterance += "{}: {} \n".format(self.false_guide.name, reply)
+                    self.conversation += "{}: {} \n".format(self.false_guide.name, reply)
+            if utterance == TalkItOutGrammar.construct_utterance([1, 0]):
+                ax, ay = self.agent_pos
+                tx, ty = self.target_pos
+                if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                    reward = self._reward()
+                for dx, dy in self.door_pos:
+                    if (ax == dx and abs(ay - dy) == 1) or (ay == dy and abs(ax - dx) == 1):
+                        # agent has chosen some door episode, regardless of if the door is correct the episode is over
+                        done = True
+        # Don't let the agent open any of the doors
+        if p_action == self.actions.toggle:
+            done = True
+        if p_action == self.actions.done:
+            done = True
+        # discount
+        if self.step_penalty:
+            reward = reward - 0.01
+        # fill observation with text
+        self.append_existing_utterance_to_history()
+        obs = self.add_utterance_to_observation(obs)
+        self.reset_utterance()
+        return obs, reward, done, info
+    def _reward(self):
+        if self.diminished_reward:
+            return super()._reward()
+        else:
+            return 1.0
+    def render(self, *args, **kwargs):
+        obs = super().render(*args, **kwargs)
+        print("conversation:\n", self.conversation)
+        print("utterance_history:\n", self.utterance_history)
+        self.window.set_caption(self.conversation, [
+            "Gandalf:",
+            "Jack:",
+            "John:",
+            "Where is the exit",
+            "Open sesame",
+        ])
+        return obs
+class TalkItOut8x8Env(TalkItOutEnv):
+    def __init__(self):
+        super().__init__(size=8)
+class TalkItOut6x6Env(TalkItOutEnv):
+    def __init__(self):
+        super().__init__(size=6)
+class TalkItOutNameless8x8Env(TalkItOutEnv):
+    def __init__(self):
+        super().__init__(size=8, nameless=True)
+register(
+    id='MiniGrid-TalkItOut-5x5-v0',
+    entry_point='gym_minigrid.envs:TalkItOutEnv'
+)
+register(
+    id='MiniGrid-TalkItOut-6x6-v0',
+    entry_point='gym_minigrid.envs:TalkItOut6x6Env'
+)
+register(
+    id='MiniGrid-TalkItOut-8x8-v0',
+    entry_point='gym_minigrid.envs:TalkItOut8x8Env'
+)
+register(
+    id='MiniGrid-TalkItOutNameless-8x8-v0',
+    entry_point='gym_minigrid.envs:TalkItOutNameless8x8Env'
+)