import pandas as pd import matplotlib.pyplot as plt import numpy as np import nltk from nltk.tokenize import word_tokenize import pickle import tensorflow as tf import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.preprocessing.text import Tokenizer from sklearn.model_selection import train_test_split nltk.download('punkt_tab') # load dataset DatasetLocation = r"datset.csv" dataset = pd.read_csv(DatasetLocation) print("data loaded") #label data x = dataset["text"] y = dataset["output"] #convert y #convert -1 to 0 Newy = y + 1 Newy = Newy / 2 #remove NAN to 0 #convert 1 to 0.5 y = Newy for i in range(len(y)): if np.isnan(y[i]): y[i] = 0 print(y) #tokenize data tokenizer = Tokenizer() #fit tokenizer tokenizer.fit_on_texts(x) TokenX = tokenizer.texts_to_sequences(x) #save tokenizer with open("tokenizer.pkl","wb") as handle: pickle.dump(tokenizer,handle,protocol=pickle.HIGHEST_PROTOCOL) print(TokenX) #pad data max_length = 100 # Choose a suitable maximum length X_Padded = pad_sequences(TokenX,maxlen= max_length) print("data padded correctly") #set train and validation X_train, X_val, y_train, y_val = train_test_split(X_Padded, y, test_size=0.2, random_state=42) # Define the model model = Sequential([ Dense(256, activation='relu'), Dense(128, activation='relu'), Dense(1, activation='sigmoid') # For binary classification ]) from tensorflow.keras.optimizers import Adam model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy']) print("model defined correctly") print(np.isnan(y).sum()) # Should be 0 # train model epochs = 3 i = 0 TrainLoss= [] ValLoss= [] Num = [] while i < epochs: history = model.fit(X_Padded, y, epochs=100, verbose=2) Train_loss = history.history['loss'][-1] # Get the last value of training loss Train_accuracy = history.history['accuracy'][-1] # Get the last value of training accuracy Val_loss, Val_accuracy = model.evaluate(X_val, y_val) ValLoss.append(Val_loss) TrainLoss.append(Train_loss) Num.append(i) i += 1 #save the model model.save("model.h5") #graph loss plt.figure(figsize=(10, 6)) plt.plot(Num, ValLoss, label='Validation Loss', color='orange') plt.plot(Num, TrainLoss, label='Training Loss', color='blue') plt.title('Training and Validation Loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.grid() plt.show()