Spaces:

Jsevisal
/

semantrix

Paused

semantrix / juego_embbedings_text_config.py

Javierss

Add files

e2b757a over 1 year ago

8.07 kB

	# %%
	import json
	import pickle as pk
	import random
	import threading
	from datetime import datetime

	import numpy as np
	from gensim.models import KeyedVectors
	from sentence_transformers import SentenceTransformer

	from display import display_words
	from pistas import curiosity, hint
	from seguimiento import calculate_moving_average, calculate_tendency_slope

	# %%
	model = KeyedVectors(768)
	model_st = SentenceTransformer(
	"sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
	)
	# file_path = "word_embeddings_mpnet.pth"
	# embeddings_dict = torch.load(file_path)
	embeddings_dict = {}

	config_file_path = "config/lang.json"
	secret_file_path = "config/secret.json"


	class DictWrapper:
	def __init__(self, data_dict):
	self.__dict__.update(data_dict)


	with open(config_file_path, "r") as file:
	# Load JSON from the file into a dictionary
	Config_full = json.load(file)

	with open(secret_file_path, "r") as file:
	# Load JSON from the file into a dictionary
	secret = json.load(file)

	lang = 0

	if lang == 0:
	Config = DictWrapper(Config_full["SPA"]["Game"]) # type: ignore
	secret_dict = secret["SPA"]
	elif lang == 1:
	Config = DictWrapper(Config_full["ENG"]["Game"]) # type: ignore
	secret_dict = secret["ENG"]
	else:
	Config = DictWrapper(Config_full["SPA"]["Game"]) # type: ignore
	secret_dict = secret["SPA"]


	with open("ranking.txt", "w+") as file:
	file.write("---------------------------")

	# %%
	pca = pk.load(open("pca_mpnet.pkl", "rb"))

	print(Config.Difficulty_presentation_Full) # type: ignore
	# difficulty = int(input("Dificultad: "))
	difficulty = int(input(Config.Difficulty + ": ")) # type: ignore


	# with open(file_path, "r") as file:
	# secret_list = file.readlines()

	# Write a function


	# Optional: Remove newline characters from each element in the list
	secret_list = secret_dict["basic"] if difficulty <= 2 else secret_dict["advanced"]

	secret = secret_list.pop(random.randint(0, len(secret_list) - 1))
	secret = secret.lower()

	words = [Config.secret_word] # type: ignore
	scores = [10]

	# %%
	# if word not in embeddings_dict.keys():
	embeddings_dict[secret] = model_st.encode(secret, convert_to_tensor=True)
	model.add_vector(secret, embeddings_dict[secret].tolist())

	word_vect = [embeddings_dict[secret].tolist()]

	# model.add_vector(secret, embedding.tolist())

	thread = threading.Thread(
	target=display_words, args=(words, pca.transform(word_vect), scores, -1)
	)

	# Start the thread
	thread.start()


	def preproc_vectors(words, word_vect, scores, repeated):
	ascending_indices = np.argsort(scores)
	# Reverse the order to get descending indices
	descending_indices = list(ascending_indices[::-1])
	ranking_data = []
	k = len(words) - 1
	if repeated != -1:
	k = repeated

	ranking_data.append(["#" + str(k), words[k], scores[k]])

	ranking_data.append("---------------------------")
	for i in descending_indices: # type: ignore
	if i == 0:
	continue
	ranking_data.append(["#" + str(i), words[i], scores[i]])

	with open("ranking.txt", "w+") as file:
	for item in ranking_data:
	file.write("%s\n" % item)

	if len(words) > 11:
	if k in descending_indices[:11]:
	descending_indices = descending_indices[:11]
	else:
	descending_indices = descending_indices[:11]
	descending_indices.append(k)
	words_display = [words[i] for i in descending_indices]
	displayvect_display = pca.transform([word_vect[i] for i in descending_indices])
	scores_display = [scores[i] for i in descending_indices]
	bold = descending_indices.index(k)

	else:
	words_display = words
	displayvect_display = pca.transform(word_vect)
	scores_display = scores
	bold = k

	return (
	words_display,
	displayvect_display,
	scores_display,
	bold,
	)


	# Example usage:

	win = False
	n = 0
	recent_hint = 0
	f_dev_avg = 0
	last_hint = -1

	if difficulty == 1:
	n = 3

	while win == False:
	word = input(Config.New_word).lower() # type: ignore
	if word == "give_up":
	break
	if word in words:
	repeated = words.index(word)

	else:
	repeated = -1
	words.append(word)

	thread.join()

	# if word not in embeddings_dict.keys():
	embedding = model_st.encode(word, convert_to_tensor=True)
	embeddings_dict[word] = embedding
	# model.add_vector(word, embeddings_dict[word].tolist())
	model.add_vector(word, embedding.tolist()) # type: ignore
	# model.add_vector(word, embedding.tolist())
	if repeated == -1:
	word_vect.append(embeddings_dict[word].tolist())

	score = round(model.similarity(secret, word) * 10, 2)

	if repeated == -1:
	scores.append(score) # type: ignore
	#
	# score = round(score * 10, 2)
	# %%
	if score <= 2.5:
	feedback = Config.Feedback_0 + str(score) # type: ignore

	elif score > 2.5 and score <= 4.0:
	feedback = Config.Feedback_1 + str(score) # type: ignore

	elif score > 4.0 and score <= 6.0:
	feedback = Config.Feedback_2 + str(score) # type: ignore

	elif score > 6.0 and score <= 7.5:
	feedback = Config.Feedback_3 + str(score) # type: ignore

	elif score > 7.5 and score <= 8.0:
	feedback = Config.Feedback_4 + str(score) # type: ignore

	elif score > 8.0 and score < 10.0:
	feedback = Config.Feedback_5 + str(score) # type: ignore

	else:
	win = True
	feedback = Config.Feedback_8 # type: ignore
	words[0] = secret
	words.pop(len(words) - 1)
	word_vect.pop(len(word_vect) - 1)
	scores.pop(len(scores) - 1)
	# print(model.most_similar(secret, topn=20))

	print(feedback)
	if score > scores[len(scores) - 2] and win == False:
	print(Config.Feedback_6) # type: ignore
	elif score < scores[len(scores) - 2] and win == False:
	print(Config.Feedback_7) # type: ignore

	if difficulty != 4:
	mov_avg = calculate_moving_average(scores[1:], 5)

	# print (mov_avg)
	if len(mov_avg) > 1 and win == False:
	f_dev = calculate_tendency_slope(mov_avg)
	# print(f_dev[len(f_dev) - 3 :])
	f_dev_avg = calculate_moving_average(f_dev, 3)
	# print(f_dev_avg[len(f_dev_avg) - 3 :])
	# print(f_dev_avg)
	if f_dev_avg[len(f_dev_avg) - 1] < 0 and recent_hint == 0:
	i = random.randint(0, len(Config.hint_intro) - 1) # type: ignore
	print("\n")
	print(Config.hint_intro[i]) # type: ignore
	hint_text, n, last_hint = hint(
	secret,
	n,
	model_st,
	last_hint,
	lang,
	DictWrapper(Config_full["SPA"]["Hint"])
	if lang == 0
	else DictWrapper(Config_full["ENG"]["Hint"]),
	)
	print(hint_text)
	recent_hint = 3

	if recent_hint != 0:
	recent_hint -= 1

	(
	words_display,
	displayvect_display,
	scores_display,
	bold_display,
	) = preproc_vectors(words, word_vect, scores, repeated)

	if win:
	bold_display = 0

	thread = threading.Thread(
	target=display_words,
	args=(words_display, displayvect_display, scores_display, bold_display),
	)

	# Start the thread
	thread.start()

	if win == False:
	print(Config.Feedback_9 + secret) # type: ignore
	print(Config.Feedback_10) # type: ignore

	curiosity = curiosity(
	secret,
	DictWrapper(Config_full["SPA"]["Hint"])
	if lang == 0
	else DictWrapper(Config_full["ENG"]["Hint"]),
	)
	print(curiosity)

	with open("ranking.txt", "r") as original_file:
	file_content = original_file.readlines()


	new_file_name = secret + "_" + str(datetime.now())

	with open("data/" + new_file_name, "w") as new_file:
	new_file.writelines(file_content[2:])

	thread.join()
	exit()