File size: 13,237 Bytes
f9a9b57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# app.py
# Character-level text generator with PyTorch (RNN/LSTM/GRU) + Gradio UI
import io
import math
import time
import random
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import gradio as gr

# -----------------------
# Utilities
# -----------------------
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

def pick_device(force_cpu: bool = False):
    if not force_cpu and torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

def text_from_file(f):
    if f is None:
        return ""
    name = getattr(f, "name", None)
    try:
        # Try to read as bytes then decode safely
        raw = f.read()
        if isinstance(raw, bytes):
            try:
                return raw.decode("utf-8")
            except UnicodeDecodeError:
                return raw.decode("latin-1", errors="ignore")
        return str(raw)
    except Exception:
        # Fallback: if it's a path-like
        if name:
            try:
                with open(name, "rb") as fh:
                    raw = fh.read()
                try:
                    return raw.decode("utf-8")
                except UnicodeDecodeError:
                    return raw.decode("latin-1", errors="ignore")
            except Exception:
                return ""
        return ""

def build_vocab(text: str):
    chars = sorted(list(set(text)))
    stoi = {ch: i for i, ch in enumerate(chars)}
    itos = {i: ch for ch, i in stoi.items()}
    return stoi, itos, len(chars)

def encode(text: str, stoi: dict):
    return torch.tensor([stoi[c] for c in text], dtype=torch.long)

def decode(indices, itos: dict):
    return "".join([itos[int(i)] for i in indices])

def make_batches(data_ids: torch.Tensor, seq_len: int, batch_size: int):
    # data_ids: shape [T]
    total_len = data_ids.size(0)
    num_sequences = (total_len - 1) // seq_len
    if num_sequences <= 0:
        return []

    # Trim so we have full sequences
    trimmed = num_sequences * seq_len
    x = data_ids[:trimmed]
    y = data_ids[1:trimmed + 1]

    x = x.view(num_sequences, seq_len)
    y = y.view(num_sequences, seq_len)

    # Shuffle sequence order
    perm = torch.randperm(num_sequences)
    x = x[perm]
    y = y[perm]

    # Batch into mini-batches
    batches = []
    for i in range(0, num_sequences, batch_size):
        xb = x[i:i + batch_size]
        yb = y[i:i + batch_size]
        if xb.size(0) == batch_size:  # keep full batches only for simplicity
            batches.append((xb, yb))
    return batches

# -----------------------
# Model
# -----------------------
class CharRNN(nn.Module):
    def __init__(self, vocab_size: int, embed_dim: int, hidden_size: int, num_layers: int, cell_type: str = "LSTM", dropout: float = 0.0):
        super().__init__()
        assert cell_type in {"RNN", "LSTM", "GRU"}
        self.vocab_size = vocab_size
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.cell_type = cell_type
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        if cell_type == "RNN":
            self.rnn = nn.RNN(embed_dim, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0)
        elif cell_type == "GRU":
            self.rnn = nn.GRU(embed_dim, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0)
        else:  # LSTM
            self.rnn = nn.LSTM(embed_dim, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0)

        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        # x: [B, T]
        x = self.embed(x)  # [B, T, E]
        out, hidden = self.rnn(x, hidden)  # out: [B, T, H]
        logits = self.fc(out)  # [B, T, V]
        return logits, hidden

    def init_hidden(self, batch_size: int, device):
        if self.cell_type == "LSTM":
            h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)
            c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)
            return (h0, c0)
        else:
            h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)
            return h0

# -----------------------
# Training & Generation
# -----------------------
def temperature_sample(logits: torch.Tensor, temperature: float):
    # logits: [V]
    if temperature <= 0:
        # argmax
        return int(torch.argmax(logits).item())
    probs = torch.softmax(logits / temperature, dim=-1)
    return int(torch.multinomial(probs, num_samples=1).item())

def generate_text(model: CharRNN, stoi, itos, prime: str, length: int, temperature: float, device):
    if len(prime) == 0:
        # start from random char if prime is empty
        prime = random.choice(list(stoi.keys()))
    model.eval()
    # Feed prime to warm up hidden state
    input_ids = torch.tensor([[stoi.get(ch, 0) for ch in prime]], dtype=torch.long, device=device)
    hidden = model.init_hidden(batch_size=1, device=device)
    with torch.no_grad():
        logits, hidden = model(input_ids, hidden)
        last_char_id = input_ids[0, -1]

    generated = [ch for ch in prime]
    for _ in range(length):
        with torch.no_grad():
            # take last timestep logits
            last_logits = logits[0, -1, :]  # [V]
            next_id = temperature_sample(last_logits, temperature)
            generated.append(itos[next_id])

            # next step
            inp = torch.tensor([[next_id]], dtype=torch.long, device=device)
            logits, hidden = model(inp, hidden)

    return "".join(generated)

def train_one_run(
    raw_text: str,
    model_type: str = "LSTM",
    embed_dim: int = 64,
    hidden_size: int = 128,
    num_layers: int = 2,
    seq_len: int = 64,
    batch_size: int = 16,
    lr: float = 0.003,
    epochs: int = 3,
    dropout: float = 0.0,
    temperature: float = 0.8,
    generate_n_chars: int = 400,
    prime_text: str = "The ",
    force_cpu: bool = False,
):
    t0 = time.time()

    # Sanity text
    text = raw_text.strip()
    if len(text) < max(100, seq_len + 1):
        # Provide a small default if not enough text
        default = (
            "To be, or not to be, that is the question:\n"
            "Whether 'tis nobler in the mind to suffer\n"
            "The slings and arrows of outrageous fortune,\n"
            "Or to take arms against a sea of troubles\n"
            "And by opposing end them."
        )
        text = (text + "\n" + default).strip()

    stoi, itos, vocab_size = build_vocab(text)
    data_ids = encode(text, stoi)

    device = pick_device(force_cpu=force_cpu)

    model = CharRNN(
        vocab_size=vocab_size,
        embed_dim=embed_dim,
        hidden_size=hidden_size,
        num_layers=num_layers,
        cell_type=model_type,
        dropout=dropout,
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    losses = []
    model.train()
    for ep in range(1, epochs + 1):
        batches = make_batches(data_ids, seq_len=seq_len, batch_size=batch_size)
        if len(batches) == 0:
            raise ValueError("Not enough data to make at least one full batch. Increase text length or reduce seq_len/batch_size.")
        ep_loss = 0.0
        count = 0

        for xb, yb in batches:
            xb = xb.to(device)  # [B, T]
            yb = yb.to(device)  # [B, T]
            hidden = model.init_hidden(batch_size=xb.size(0), device=device)

            optimizer.zero_grad()
            logits, _ = model(xb, hidden)  # [B, T, V]
            # Reshape for CE loss
            B, T, V = logits.shape
            loss = criterion(logits.view(B * T, V), yb.view(B * T))
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            ep_loss += loss.item()
            count += 1

        mean_loss = ep_loss / max(count, 1)
        losses.append(mean_loss)

    train_time = time.time() - t0

    # Generate
    gen = generate_text(
        model=model,
        stoi=stoi,
        itos=itos,
        prime=prime_text,
        length=generate_n_chars,
        temperature=temperature,
        device=device,
    )

    # Plot loss curve
    fig = plt.figure(figsize=(5, 3.2), dpi=140)
    xs = np.arange(1, len(losses) + 1)
    plt.plot(xs, losses, marker="o")
    plt.xlabel("Epoch")
    plt.ylabel("Training Loss")
    plt.title("Loss per Epoch")
    plt.tight_layout()

    # Hyperparameters summary (to show clearly in UI)
    hparams = {
        "model_type": model_type,
        "embed_dim": embed_dim,
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "seq_len": seq_len,
        "batch_size": batch_size,
        "learning_rate": lr,
        "epochs": epochs,
        "dropout": dropout,
        "temperature": temperature,
        "generate_n_chars": generate_n_chars,
        "vocab_size": vocab_size,
        "device": str(device),
        "train_time_sec": round(train_time, 3),
        "num_batches_per_epoch": len(make_batches(data_ids, seq_len, batch_size)),
        "text_len": len(text),
    }

    return gen, fig, hparams

# -----------------------
# Gradio Interface
# -----------------------
EXAMPLE_TEXT = (
    "In the beginning God created the heaven and the earth. "
    "And the earth was without form, and void; and darkness was upon the face of the deep. "
    "And the Spirit of God moved upon the face of the waters. "
    "And God said, Let there be light: and there was light."
)

with gr.Blocks(title="PyTorch RNN/LSTM/GRU Text Generator") as demo:
    gr.Markdown(
        """# 🔠 PyTorch Character RNN / LSTM / GRU
Train a tiny character-level model on your text and generate new text.  
Use the controls below to **show & tweak hyperparameters**, provide **input text**, and view **outputs** (generated text + loss curve).
"""
    )
    with gr.Row():
        with gr.Column():
            corpus = gr.Textbox(
                label="Training Text (paste here)",
                value=EXAMPLE_TEXT,
                lines=10,
                placeholder="Paste training text here… (longer is better)"
            )
            upload = gr.File(label="Or upload a .txt file (optional)", file_count="single")
            prime_text = gr.Textbox(label="Prime text (seed for generation)", value="The ", lines=1)

            with gr.Row():
                model_type = gr.Radio(choices=["LSTM", "GRU", "RNN"], value="LSTM", label="Model type")
                force_cpu = gr.Checkbox(value=False, label="Force CPU (uncheck to use GPU if available)")

            with gr.Accordion("Hyperparameters", open=True):
                with gr.Row():
                    embed_dim = gr.Slider(16, 256, value=64, step=1, label="Embedding Dim")
                    hidden_size = gr.Slider(32, 512, value=128, step=1, label="Hidden Size")
                with gr.Row():
                    num_layers = gr.Slider(1, 4, value=2, step=1, label="Layers")
                    dropout = gr.Slider(0.0, 0.6, value=0.0, step=0.05, label="Dropout")
                with gr.Row():
                    seq_len = gr.Slider(16, 256, value=64, step=1, label="Sequence Length")
                    batch_size = gr.Slider(4, 128, value=16, step=1, label="Batch Size")
                with gr.Row():
                    lr = gr.Number(value=0.003, precision=6, label="Learning Rate")
                    epochs = gr.Slider(1, 15, value=3, step=1, label="Epochs")
                with gr.Row():
                    temperature = gr.Slider(0.0, 1.5, value=0.8, step=0.05, label="Temperature (sampling)")
                    generate_n_chars = gr.Slider(50, 1000, value=400, step=10, label="Generate N Chars")

            run_btn = gr.Button("🚀 Train & Generate", variant="primary")

        with gr.Column():
            gen_text = gr.Textbox(label="Generated Text (output)", lines=20)
            loss_plot = gr.Plot(label="Training Loss")
            hparams_json = gr.JSON(label="Hyperparameters (for your records)")

    def run_pipeline(corpus_text, uploaded_file, **kwargs):
        merged = (text_from_file(uploaded_file) + "\n" + (corpus_text or "")).strip() if uploaded_file else (corpus_text or "")
        out_text, fig, hp = train_one_run(raw_text=merged, **kwargs)
        return out_text, fig, hp

    run_btn.click(
        run_pipeline,
        inputs=[
            corpus, upload,
            {"label": "model_type"},
            {"label": "embed_dim"},
            {"label": "hidden_size"},
            {"label": "num_layers"},
            {"label": "seq_len"},
            {"label": "batch_size"},
            {"label": "lr"},
            {"label": "epochs"},
            {"label": "dropout"},
            {"label": "temperature"},
            {"label": "generate_n_chars"},
            prime_text,
            force_cpu
        ],
        outputs=[gen_text, loss_plot, hparams_json],
        queue=False
    )

if __name__ == "__main__":
    demo.launch()