Pro-AI-TC / simple_lm.pth
ProCreations's picture
Update simple_lm.pth
3f0fa4c verified
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import json
# Define a simple LSTM-based language model
class SimpleLM(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim):
super(SimpleLM, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
self.linear = nn.Linear(hidden_dim, vocab_size)
def forward(self, x, hidden):
embedded = self.embedding(x)
output, hidden = self.lstm(embedded, hidden)
output = self.linear(output)
return output, hidden
# Define a custom dataset class
class CustomDataset(Dataset):
def __init__(self, data_path):
self.data = json.load(open(data_path, 'r'))
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
text = self.data[idx]
return torch.tensor(text, dtype=torch.long)
# Define training parameters
vocab_size = 10000 # Example vocabulary size
embedding_dim = 128
hidden_dim = 256
batch_size = 32
num_epochs = 10
# Initialize the LM
lm = SimpleLM(vocab_size, embedding_dim, hidden_dim)
# Load data
dataset = CustomDataset('training_data.json')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(lm.parameters(), lr=0.001)
# Training loop
for epoch in range(num_epochs):
total_loss = 0
for batch in dataloader:
optimizer.zero_grad()
input_data = batch[:, :-1] # Input sequence
target = batch[:, 1:] # Target sequence shifted by one
hidden = None
output, hidden = lm(input_data, hidden)
output = output.view(-1, vocab_size)
target = target.view(-1)
loss = criterion(output, target)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {total_loss / len(dataloader)}')
# Save the trained LM
torch.save(lm.state_dict(), 'simple_lm.pth')