Fixed generate_response
Browse filesFixed the error saying Probabilities Do Not Sum
- chatbotTrainer.py +6 -4
- runCorpus.py +9 -9
chatbotTrainer.py
CHANGED
@@ -225,8 +225,8 @@ class ChatbotTrainer:
|
|
225 |
|
226 |
# Prediction Setup (Everything here will take priority)
|
227 |
self.min_word = 10 # Only for generate_response
|
228 |
-
self.temperature =
|
229 |
-
self.scaling_factor =
|
230 |
self.logger = self.setup_logger() # Initialize your logger here
|
231 |
self.beam_width = 9
|
232 |
self.top_p = 0.7
|
@@ -789,12 +789,14 @@ class ChatbotTrainer:
|
|
789 |
output_tokens, state_h, state_c = self.decoder_model.predict([target_seq, state_h, state_c])
|
790 |
|
791 |
# Scale logits immediately after getting output_tokens
|
792 |
-
logits = output_tokens[0, -1, :]
|
793 |
-
|
|
|
794 |
|
795 |
# Compute softmax
|
796 |
exp_logits = np.exp(logits - np.max(logits)) # Prevent overflow
|
797 |
probabilities = exp_logits / np.sum(exp_logits)
|
|
|
798 |
|
799 |
predicted_token_index = np.random.choice(len(probabilities), p=probabilities)
|
800 |
predicted_word = self.reverse_tokenizer.get(predicted_token_index, '<oov>')
|
|
|
225 |
|
226 |
# Prediction Setup (Everything here will take priority)
|
227 |
self.min_word = 10 # Only for generate_response
|
228 |
+
self.temperature = 1
|
229 |
+
self.scaling_factor = 1
|
230 |
self.logger = self.setup_logger() # Initialize your logger here
|
231 |
self.beam_width = 9
|
232 |
self.top_p = 0.7
|
|
|
789 |
output_tokens, state_h, state_c = self.decoder_model.predict([target_seq, state_h, state_c])
|
790 |
|
791 |
# Scale logits immediately after getting output_tokens
|
792 |
+
logits = output_tokens[0, -1, :] * self.scaling_factor
|
793 |
+
logits = logits / self.temperature
|
794 |
+
logits = np.clip(logits, -50, 50)
|
795 |
|
796 |
# Compute softmax
|
797 |
exp_logits = np.exp(logits - np.max(logits)) # Prevent overflow
|
798 |
probabilities = exp_logits / np.sum(exp_logits)
|
799 |
+
probabilities = exp_logits / (np.sum(exp_logits) + 1e-8)
|
800 |
|
801 |
predicted_token_index = np.random.choice(len(probabilities), p=probabilities)
|
802 |
predicted_word = self.reverse_tokenizer.get(predicted_token_index, '<oov>')
|
runCorpus.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
#
|
2 |
import os
|
3 |
from playsound3 import playsound
|
4 |
import tensorflow
|
@@ -182,7 +182,7 @@ class CorpusTrainer:
|
|
182 |
self.failure_history.append(len(self.troubleList))
|
183 |
if percent_running is None:
|
184 |
percent_running = 0.0
|
185 |
-
self.chatbot_trainer.logger.info(f"
|
186 |
|
187 |
# We check for speaker vs top num achieved successfully in self.speakerList
|
188 |
# topConvo is a larger buffer for models that may take longer to learn but for dynamic learning. self.top_num is for running consecutive and is default in that situation
|
@@ -241,15 +241,15 @@ class CorpusTrainer:
|
|
241 |
# Debug Lines; But pretty useful to see how it works
|
242 |
print(f"Trouble List: {len(self.troubleList)}")
|
243 |
print(f"Bad Count: {self.bad_count}")
|
244 |
-
print(f"Number of Conversations(This
|
245 |
print(f"Number of Conversations Combined: {topConvo}")
|
246 |
-
print(f"
|
247 |
|
248 |
percent_running = self.runningPercent(len(self.troubleList), topConvo)
|
249 |
self.failure_history.append(len(self.troubleList))
|
250 |
if percent_running is None:
|
251 |
percent_running = 0.0
|
252 |
-
self.chatbot_trainer.logger.info(f"
|
253 |
|
254 |
print(f"Now is the time to quit if need be... ")
|
255 |
if play_notification == 1:
|
@@ -259,14 +259,14 @@ class CorpusTrainer:
|
|
259 |
print(f"Next convo in:{self.time_sleep-x}")
|
260 |
|
261 |
if percent_running is not None:
|
262 |
-
# Note: The
|
263 |
if percent_running > self.percent_reset:
|
264 |
self.log_failures(len(self.troubleList), self.log_file)
|
265 |
print("Plotting Failures... See failures_plot.png for more information... ")
|
266 |
self.plot_failures(self.log_file)
|
267 |
if play_notification == 1:
|
268 |
playsound(notification_sound)
|
269 |
-
print(f"Resetting... Failure Rate is Greater than {self.percent_reset}%... For this
|
270 |
# time.sleep(self.time_sleep * 3)
|
271 |
return self.main(self.chatbot_trainer, user_choice, dialog_data, topConvo, self.top_num)
|
272 |
|
@@ -340,8 +340,8 @@ class CorpusTrainer:
|
|
340 |
|
341 |
# Plot actual failure values
|
342 |
plt.figure(figsize=(10, 6))
|
343 |
-
plt.plot(self.failure_history, marker='o', linestyle='-', color='red', label='Failures Per
|
344 |
-
plt.xlabel("
|
345 |
plt.ylabel("Number of Failures")
|
346 |
plt.title("Failures Before Restart Over Time")
|
347 |
plt.legend()
|
|
|
1 |
+
# runMovieCorpus.py
|
2 |
import os
|
3 |
from playsound3 import playsound
|
4 |
import tensorflow
|
|
|
182 |
self.failure_history.append(len(self.troubleList))
|
183 |
if percent_running is None:
|
184 |
percent_running = 0.0
|
185 |
+
self.chatbot_trainer.logger.info(f"Running Percentage Failure: {percent_running}%")
|
186 |
|
187 |
# We check for speaker vs top num achieved successfully in self.speakerList
|
188 |
# topConvo is a larger buffer for models that may take longer to learn but for dynamic learning. self.top_num is for running consecutive and is default in that situation
|
|
|
241 |
# Debug Lines; But pretty useful to see how it works
|
242 |
print(f"Trouble List: {len(self.troubleList)}")
|
243 |
print(f"Bad Count: {self.bad_count}")
|
244 |
+
print(f"Number of Conversations(This Run): {self.counter}")
|
245 |
print(f"Number of Conversations Combined: {topConvo}")
|
246 |
+
print(f"Running Trouble: {len(self.runningTrouble)}")
|
247 |
|
248 |
percent_running = self.runningPercent(len(self.troubleList), topConvo)
|
249 |
self.failure_history.append(len(self.troubleList))
|
250 |
if percent_running is None:
|
251 |
percent_running = 0.0
|
252 |
+
self.chatbot_trainer.logger.info(f"Running Percentage Failure: {percent_running}%")
|
253 |
|
254 |
print(f"Now is the time to quit if need be... ")
|
255 |
if play_notification == 1:
|
|
|
259 |
print(f"Next convo in:{self.time_sleep-x}")
|
260 |
|
261 |
if percent_running is not None:
|
262 |
+
# Note: The run adds to the trained speaker list which is successful trainings. If it's not in that list, when it comes up again it will be trained another time.
|
263 |
if percent_running > self.percent_reset:
|
264 |
self.log_failures(len(self.troubleList), self.log_file)
|
265 |
print("Plotting Failures... See failures_plot.png for more information... ")
|
266 |
self.plot_failures(self.log_file)
|
267 |
if play_notification == 1:
|
268 |
playsound(notification_sound)
|
269 |
+
print(f"Resetting... Failure Rate is Greater than {self.percent_reset}%... For this run.")
|
270 |
# time.sleep(self.time_sleep * 3)
|
271 |
return self.main(self.chatbot_trainer, user_choice, dialog_data, topConvo, self.top_num)
|
272 |
|
|
|
340 |
|
341 |
# Plot actual failure values
|
342 |
plt.figure(figsize=(10, 6))
|
343 |
+
plt.plot(self.failure_history, marker='o', linestyle='-', color='red', label='Failures Per Run')
|
344 |
+
plt.xlabel("Run Iteration")
|
345 |
plt.ylabel("Number of Failures")
|
346 |
plt.title("Failures Before Restart Over Time")
|
347 |
plt.legend()
|