Br-T-1.1 / Br-T-1.1.py

Bertug1911

Upload Br-T-1.1.py

1d3cce3 verified 3 months ago

3.94 kB

	import os
	import re
	import time
	import numpy as np
	from gensim.models import Word2Vec
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Dense, LSTM, Embedding
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences

	max_worker = os.cpu_count()
	workers = max(1, max_worker - 0)
	vector_size = 1000
	window_size = 50
	min_count = 1
	context_length = 256
	max_length = context_length * 4
	sentence_length = 5
	top_k = 500
	temperature = 0
	min_probability = 0.5
	max_probability = 1

	# === VERİ SETİNİ OKUMA ===
	file_path = input("Veri setinin dosya yolunu giriniz: ")
	try:
	with open(file_path, "r", encoding="utf-8") as f:
	dataset = f.readlines()
	except FileNotFoundError:
	print("Dosya bulunamadı!")
	exit()

	# === KELİMELERE AYIRMA ===
	tokenized_sentences = [re.findall(r'\b\w+\b', sentence.lower()) for sentence in dataset]

	# === WORD2VEC MODELİ OLUŞTURMA ===
	model = Word2Vec(tokenized_sentences, vector_size=vector_size, window=window_size, min_count=min_count, workers=workers)

	# === TOKENİZASYON ===
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts([' '.join(sentence) for sentence in tokenized_sentences])
	sequences = tokenizer.texts_to_sequences([' '.join(sentence) for sentence in tokenized_sentences])
	X = pad_sequences(sequences, maxlen=context_length, padding='post')
	y = np.array([seq[-1] if len(seq) > 0 else 0 for seq in sequences])

	# === SİNİR AĞI MODELİ ===
	nn_model = Sequential([
	Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=vector_size, input_length=context_length),
	LSTM(256, return_sequences=True),
	LSTM(128, return_sequences=True),
	LSTM(128, return_sequences=True),
	LSTM(128, return_sequences=True),
	LSTM(64, return_sequences=True),
	LSTM(64, return_sequences=True),
	LSTM(64, return_sequences=True),
	LSTM(32, return_sequences=True),
	LSTM(32, return_sequences=True),
	LSTM(32, return_sequences=True),
	LSTM(16, return_sequences=True),
	LSTM(16, return_sequences=True),
	LSTM(16, return_sequences=True),
	LSTM(8, return_sequences=True),
	LSTM(8, return_sequences=True),
	LSTM(8, return_sequences=True),
	LSTM(4, return_sequences=True),
	LSTM(4, return_sequences=True),
	LSTM(4, return_sequences=True),
	LSTM(2, return_sequences=True),
	LSTM(1),
	Dense(len(tokenizer.word_index) + 1, activation='softmax')
	])

	nn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	nn_model.fit(X, y, epochs=20, batch_size=32)

	# === CÜMLE ÜRETME (temperature sampling eklenmiş) ===
	def generate_sentence(start_word, sentence_length, temperature=1.0):
	sentence = [start_word]
	for _ in range(sentence_length - 1):
	sequence = tokenizer.texts_to_sequences([' '.join(sentence)])
	sequence = pad_sequences(sequence, maxlen=context_length, padding='post')
	predicted_probs = nn_model.predict(sequence)[0]

	# Uygulanan sıcaklık parametresiyle olasılıkları değiştirme
	predicted_probs = np.asarray(predicted_probs).astype('float64')
	predicted_probs = np.log(predicted_probs + 1e-10) / temperature
	predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))

	# Temperature ile daha rastgele bir kelime seçimi
	predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
	next_word = tokenizer.index_word.get(predicted_index, '')

	if not next_word:
	break
	sentence.append(next_word)

	return ' '.join(sentence)

	# Başlangıç kelimesi girilmesi ve cümle üretimi
	start_word = input("Başlangıç kelimesi giriniz: ")
	print("\nÜretilen Cümle:", generate_sentence(start_word, sentence_length, temperature=1.0))