Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

SocialAISchool / stester.py

grg

Cleaned old git history

be5548b almost 2 years ago

raw

history blame

3 kB

	import os
	import numpy as np
	import re
	from pathlib import Path
	from collections import defaultdict
	from scipy import stats

	experiments = Path("./results_1000/")

	results_dict = {}

	def label_parser(label):
	label_parser_dict = {
	"VIGIL4_WizardGuide_lang64_no_explo": "ABL_MH-BabyAI",
	"VIGIL4_WizardTwoGuides_lang64_no_explo": "FULL_MH-BabyAI",

	"VIGIL4_WizardGuide_lang64_mm": "ABL_MH-BabyAI-ExpBonus",
	"VIGIL4_WizardTwoGuides_lang64_mm": "FULL_MH-BabyAI-ExpBonus",

	"VIGIL4_WizardGuide_lang64_deaf_no_explo": "ABL_Deaf-MH-BabyAI",
	"VIGIL4_WizardTwoGuides_lang64_deaf_no_explo": "FULL_Deaf-MH-BabyAI",

	"VIGIL4_WizardGuide_lang64_bow": "ABL_MH-BabyAI-BOW",
	"VIGIL4_WizardTwoGuides_lang64_bow": "FULL_MH-BabyAI-BOW",

	"VIGIL4_WizardGuide_lang64_no_mem": "ABL_MH-BabyAI-no-mem",
	"VIGIL4_WizardTwoGuides_lang64_no_mem": "FULL_MH-BabyAI-no-mem",

	"VIGIL5_WizardGuide_lang64_bigru": "ABL_MH-BabyAI-bigru",
	"VIGIL5_WizardTwoGuides_lang64_bigru": "FULL_MH-BabyAI-bigru",

	"VIGIL5_WizardGuide_lang64_attgru": "ABL_MH-BabyAI-attgru",
	"VIGIL5_WizardTwoGuides_lang64_attgru": "FULL_MH-BabyAI-attgru",

	"VIGIL4_WizardGuide_lang64_curr_dial": "ABL_MH-BabyAI-current-dialogue",
	"VIGIL4_WizardTwoGuides_lang64_curr_dial": "FULL_MH-BabyAI-current-dialogue",

	"random_WizardGuide": "ABL_Random-agent",
	"random_WizardTwoGuides": "FULL_Random-agent",
	}
	if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
	print("ERROR")
	print(label)
	exit()

	for k, v in label_parser_dict.items():
	if k in label: return v

	return label

	for experiment_out_file in experiments.iterdir():
	results_dict[label_parser(str(experiment_out_file))] = []
	with open(experiment_out_file) as f:
	for line in f:
	if "seed success rate" in line:
	seed_success_rate = float(re.search('[0-9]\.[0-9]*', line).group())
	results_dict[label_parser(str(experiment_out_file))].append(seed_success_rate)

	assert set([len(v) for v in results_dict.values()]) == set([16])

	test_p = 0.05
	compare = {
	"ABL_MH-BabyAI-ExpBonus": "ABL_MH-BabyAI",
	"ABL_MH-BabyAI": "ABL_Deaf-MH-BabyAI",
	"ABL_Deaf-MH-BabyAI": "ABL_Random-agent",
	"FULL_MH-BabyAI-ExpBonus": "FULL_MH-BabyAI",
	"FULL_MH-BabyAI": "FULL_Deaf-MH-BabyAI",
	"FULL_Deaf-MH-BabyAI": "FULL_Random-agent",
	}
	for k, v in compare.items():
	p = stats.ttest_ind(
	results_dict[k],
	results_dict[v],
	equal_var=False
	).pvalue
	if np.isnan(p):
	from IPython import embed; embed()
	print("{} (m:{}) <---> {} (m:{}) = p: {} result: {}".format(
	k, np.mean(results_dict[k]), v, np.mean(results_dict[v]), p,
	"Distributions different(p={})".format(test_p) if p < test_p else "Distributions same(p={})".format(test_p)
	))
	print()
	# from IPython import embed; embed()