Leo2394824849's picture
Upload 9 files
9688990 verified
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
import pickle
import tensorflow as tf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
nltk.download('punkt_tab')
# load dataset
DatasetLocation = r"datset.csv"
dataset = pd.read_csv(DatasetLocation)
print("data loaded")
#label data
x = dataset["text"]
y = dataset["output"]
#convert y
#convert -1 to 0
Newy = y + 1
Newy = Newy / 2
#remove NAN to 0
#convert 1 to 0.5
y = Newy
for i in range(len(y)):
if np.isnan(y[i]):
y[i] = 0
print(y)
#tokenize data
tokenizer = Tokenizer()
#fit tokenizer
tokenizer.fit_on_texts(x)
TokenX = tokenizer.texts_to_sequences(x)
#save tokenizer
with open("tokenizer.pkl","wb") as handle:
pickle.dump(tokenizer,handle,protocol=pickle.HIGHEST_PROTOCOL)
print(TokenX)
#pad data
max_length = 100 # Choose a suitable maximum length
X_Padded = pad_sequences(TokenX,maxlen= max_length)
print("data padded correctly")
#set train and validation
X_train, X_val, y_train, y_val = train_test_split(X_Padded, y, test_size=0.2, random_state=42)
# Define the model
model = Sequential([
Dense(256, activation='relu'),
Dense(128, activation='relu'),
Dense(1, activation='sigmoid') # For binary classification
])
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
print("model defined correctly")
print(np.isnan(y).sum()) # Should be 0
# train model
epochs = 3
i = 0
TrainLoss= []
ValLoss= []
Num = []
while i < epochs:
history = model.fit(X_Padded, y, epochs=100, verbose=2)
Train_loss = history.history['loss'][-1] # Get the last value of training loss
Train_accuracy = history.history['accuracy'][-1] # Get the last value of training accuracy
Val_loss, Val_accuracy = model.evaluate(X_val, y_val)
ValLoss.append(Val_loss)
TrainLoss.append(Train_loss)
Num.append(i)
i += 1
#save the model
model.save("model.h5")
#graph loss
plt.figure(figsize=(10, 6))
plt.plot(Num, ValLoss, label='Validation Loss', color='orange')
plt.plot(Num, TrainLoss, label='Training Loss', color='blue')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid()
plt.show()