|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import nltk
|
|
from nltk.tokenize import word_tokenize
|
|
import pickle
|
|
import tensorflow as tf
|
|
import tensorflow as tf
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.layers import Dense
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
from tensorflow.keras.preprocessing.text import Tokenizer
|
|
from sklearn.model_selection import train_test_split
|
|
nltk.download('punkt_tab')
|
|
|
|
DatasetLocation = r"datset.csv"
|
|
dataset = pd.read_csv(DatasetLocation)
|
|
print("data loaded")
|
|
|
|
|
|
x = dataset["text"]
|
|
y = dataset["output"]
|
|
|
|
|
|
|
|
Newy = y + 1
|
|
Newy = Newy / 2
|
|
|
|
|
|
|
|
y = Newy
|
|
for i in range(len(y)):
|
|
if np.isnan(y[i]):
|
|
y[i] = 0
|
|
print(y)
|
|
|
|
tokenizer = Tokenizer()
|
|
|
|
|
|
tokenizer.fit_on_texts(x)
|
|
|
|
TokenX = tokenizer.texts_to_sequences(x)
|
|
|
|
|
|
with open("tokenizer.pkl","wb") as handle:
|
|
pickle.dump(tokenizer,handle,protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
print(TokenX)
|
|
|
|
|
|
|
|
max_length = 100
|
|
X_Padded = pad_sequences(TokenX,maxlen= max_length)
|
|
|
|
print("data padded correctly")
|
|
|
|
|
|
X_train, X_val, y_train, y_val = train_test_split(X_Padded, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
model = Sequential([
|
|
Dense(256, activation='relu'),
|
|
Dense(128, activation='relu'),
|
|
Dense(1, activation='sigmoid')
|
|
])
|
|
from tensorflow.keras.optimizers import Adam
|
|
|
|
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
|
|
print("model defined correctly")
|
|
print(np.isnan(y).sum())
|
|
|
|
epochs = 3
|
|
i = 0
|
|
TrainLoss= []
|
|
ValLoss= []
|
|
Num = []
|
|
while i < epochs:
|
|
history = model.fit(X_Padded, y, epochs=100, verbose=2)
|
|
Train_loss = history.history['loss'][-1]
|
|
Train_accuracy = history.history['accuracy'][-1]
|
|
Val_loss, Val_accuracy = model.evaluate(X_val, y_val)
|
|
ValLoss.append(Val_loss)
|
|
TrainLoss.append(Train_loss)
|
|
Num.append(i)
|
|
i += 1
|
|
|
|
model.save("model.h5")
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
plt.plot(Num, ValLoss, label='Validation Loss', color='orange')
|
|
plt.plot(Num, TrainLoss, label='Training Loss', color='blue')
|
|
plt.title('Training and Validation Loss')
|
|
plt.xlabel('Epochs')
|
|
plt.ylabel('Loss')
|
|
plt.legend()
|
|
plt.grid()
|
|
plt.show() |