Bertug1911
/

Br-T-1.1

Text2Text Generation

Turkish

Model card Files Files and versions Community

Bertug1911 commited on Jan 24

Commit

1d3cce3

verified ·

1 Parent(s): 84f84af

Upload Br-T-1.1.py

Browse files

Files changed (1) hide show

Br-T-1.1.py +101 -0

Br-T-1.1.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import os
+import re
+import time
+import numpy as np
+from gensim.models import Word2Vec
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, LSTM, Embedding
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+max_worker = os.cpu_count()
+workers = max(1, max_worker - 0)
+vector_size = 1000
+window_size = 50
+min_count = 1
+context_length = 256
+max_length = context_length * 4
+sentence_length = 5
+top_k = 500
+temperature = 0
+min_probability = 0.5
+max_probability = 1
+# === VERİ SETİNİ OKUMA ===
+file_path = input("Veri setinin dosya yolunu giriniz: ")
+try:
+    with open(file_path, "r", encoding="utf-8") as f:
+        dataset = f.readlines()
+except FileNotFoundError:
+    print("Dosya bulunamadı!")
+    exit()
+# === KELİMELERE AYIRMA ===
+tokenized_sentences = [re.findall(r'\b\w+\b', sentence.lower()) for sentence in dataset]
+# === WORD2VEC MODELİ OLUŞTURMA ===
+model = Word2Vec(tokenized_sentences, vector_size=vector_size, window=window_size, min_count=min_count, workers=workers)
+# === TOKENİZASYON ===
+tokenizer = Tokenizer()
+tokenizer.fit_on_texts([' '.join(sentence) for sentence in tokenized_sentences])
+sequences = tokenizer.texts_to_sequences([' '.join(sentence) for sentence in tokenized_sentences])
+X = pad_sequences(sequences, maxlen=context_length, padding='post')
+y = np.array([seq[-1] if len(seq) > 0 else 0 for seq in sequences])
+# === SİNİR AĞI MODELİ ===
+nn_model = Sequential([
+    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=vector_size, input_length=context_length),
+    LSTM(256, return_sequences=True),
+    LSTM(128, return_sequences=True),
+    LSTM(128, return_sequences=True),
+    LSTM(128, return_sequences=True),
+    LSTM(64, return_sequences=True),
+    LSTM(64, return_sequences=True),
+    LSTM(64, return_sequences=True),
+    LSTM(32, return_sequences=True),
+    LSTM(32, return_sequences=True),
+    LSTM(32, return_sequences=True),
+    LSTM(16, return_sequences=True),
+    LSTM(16, return_sequences=True),
+    LSTM(16, return_sequences=True),
+    LSTM(8, return_sequences=True),
+    LSTM(8, return_sequences=True),
+    LSTM(8, return_sequences=True),
+    LSTM(4, return_sequences=True),
+    LSTM(4, return_sequences=True),
+    LSTM(4, return_sequences=True),
+    LSTM(2, return_sequences=True),
+    LSTM(1),
+    Dense(len(tokenizer.word_index) + 1, activation='softmax')
+])
+nn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
+nn_model.fit(X, y, epochs=20, batch_size=32)
+# === CÜMLE ÜRETME (temperature sampling eklenmiş) ===
+def generate_sentence(start_word, sentence_length, temperature=1.0):
+    sentence = [start_word]
+    for _ in range(sentence_length - 1):
+        sequence = tokenizer.texts_to_sequences([' '.join(sentence)])
+        sequence = pad_sequences(sequence, maxlen=context_length, padding='post')
+        predicted_probs = nn_model.predict(sequence)[0]
+        # Uygulanan sıcaklık parametresiyle olasılıkları değiştirme
+        predicted_probs = np.asarray(predicted_probs).astype('float64')
+        predicted_probs = np.log(predicted_probs + 1e-10) / temperature
+        predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))
+        # Temperature ile daha rastgele bir kelime seçimi
+        predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
+        next_word = tokenizer.index_word.get(predicted_index, '')
+        if not next_word:
+            break
+        sentence.append(next_word)
+    return ' '.join(sentence)
+# Başlangıç kelimesi girilmesi ve cümle üretimi
+start_word = input("Başlangıç kelimesi giriniz: ")
+print("\nÜretilen Cümle:", generate_sentence(start_word, sentence_length, temperature=1.0))