Bertug1911 commited on
Commit
1d3cce3
·
verified ·
1 Parent(s): 84f84af

Upload Br-T-1.1.py

Browse files
Files changed (1) hide show
  1. Br-T-1.1.py +101 -0
Br-T-1.1.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ import numpy as np
5
+ from gensim.models import Word2Vec
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import Dense, LSTM, Embedding
8
+ from tensorflow.keras.preprocessing.text import Tokenizer
9
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
10
+
11
+ max_worker = os.cpu_count()
12
+ workers = max(1, max_worker - 0)
13
+ vector_size = 1000
14
+ window_size = 50
15
+ min_count = 1
16
+ context_length = 256
17
+ max_length = context_length * 4
18
+ sentence_length = 5
19
+ top_k = 500
20
+ temperature = 0
21
+ min_probability = 0.5
22
+ max_probability = 1
23
+
24
+ # === VERİ SETİNİ OKUMA ===
25
+ file_path = input("Veri setinin dosya yolunu giriniz: ")
26
+ try:
27
+ with open(file_path, "r", encoding="utf-8") as f:
28
+ dataset = f.readlines()
29
+ except FileNotFoundError:
30
+ print("Dosya bulunamadı!")
31
+ exit()
32
+
33
+ # === KELİMELERE AYIRMA ===
34
+ tokenized_sentences = [re.findall(r'\b\w+\b', sentence.lower()) for sentence in dataset]
35
+
36
+ # === WORD2VEC MODELİ OLUŞTURMA ===
37
+ model = Word2Vec(tokenized_sentences, vector_size=vector_size, window=window_size, min_count=min_count, workers=workers)
38
+
39
+ # === TOKENİZASYON ===
40
+ tokenizer = Tokenizer()
41
+ tokenizer.fit_on_texts([' '.join(sentence) for sentence in tokenized_sentences])
42
+ sequences = tokenizer.texts_to_sequences([' '.join(sentence) for sentence in tokenized_sentences])
43
+ X = pad_sequences(sequences, maxlen=context_length, padding='post')
44
+ y = np.array([seq[-1] if len(seq) > 0 else 0 for seq in sequences])
45
+
46
+ # === SİNİR AĞI MODELİ ===
47
+ nn_model = Sequential([
48
+ Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=vector_size, input_length=context_length),
49
+ LSTM(256, return_sequences=True),
50
+ LSTM(128, return_sequences=True),
51
+ LSTM(128, return_sequences=True),
52
+ LSTM(128, return_sequences=True),
53
+ LSTM(64, return_sequences=True),
54
+ LSTM(64, return_sequences=True),
55
+ LSTM(64, return_sequences=True),
56
+ LSTM(32, return_sequences=True),
57
+ LSTM(32, return_sequences=True),
58
+ LSTM(32, return_sequences=True),
59
+ LSTM(16, return_sequences=True),
60
+ LSTM(16, return_sequences=True),
61
+ LSTM(16, return_sequences=True),
62
+ LSTM(8, return_sequences=True),
63
+ LSTM(8, return_sequences=True),
64
+ LSTM(8, return_sequences=True),
65
+ LSTM(4, return_sequences=True),
66
+ LSTM(4, return_sequences=True),
67
+ LSTM(4, return_sequences=True),
68
+ LSTM(2, return_sequences=True),
69
+ LSTM(1),
70
+ Dense(len(tokenizer.word_index) + 1, activation='softmax')
71
+ ])
72
+
73
+ nn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
74
+ nn_model.fit(X, y, epochs=20, batch_size=32)
75
+
76
+ # === CÜMLE ÜRETME (temperature sampling eklenmiş) ===
77
+ def generate_sentence(start_word, sentence_length, temperature=1.0):
78
+ sentence = [start_word]
79
+ for _ in range(sentence_length - 1):
80
+ sequence = tokenizer.texts_to_sequences([' '.join(sentence)])
81
+ sequence = pad_sequences(sequence, maxlen=context_length, padding='post')
82
+ predicted_probs = nn_model.predict(sequence)[0]
83
+
84
+ # Uygulanan sıcaklık parametresiyle olasılıkları değiştirme
85
+ predicted_probs = np.asarray(predicted_probs).astype('float64')
86
+ predicted_probs = np.log(predicted_probs + 1e-10) / temperature
87
+ predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))
88
+
89
+ # Temperature ile daha rastgele bir kelime seçimi
90
+ predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
91
+ next_word = tokenizer.index_word.get(predicted_index, '')
92
+
93
+ if not next_word:
94
+ break
95
+ sentence.append(next_word)
96
+
97
+ return ' '.join(sentence)
98
+
99
+ # Başlangıç kelimesi girilmesi ve cümle üretimi
100
+ start_word = input("Başlangıç kelimesi giriniz: ")
101
+ print("\nÜretilen Cümle:", generate_sentence(start_word, sentence_length, temperature=1.0))