|
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
from torchvision import transforms
|
|
from PIL import Image
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
class IJEPAModel(nn.Module):
|
|
def __init__(self, feature_dim=128):
|
|
super(IJEPAModel, self).__init__()
|
|
self.encoder = nn.Sequential(
|
|
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
|
|
nn.ReLU(),
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
|
|
nn.ReLU(),
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
nn.Flatten(),
|
|
nn.Linear(64 * 7 * 7, feature_dim)
|
|
)
|
|
self.classifier = nn.Linear(feature_dim, 10)
|
|
|
|
def forward(self, x):
|
|
x = self.encoder(x)
|
|
x = self.classifier(x)
|
|
return x
|
|
|
|
|
|
model = IJEPAModel()
|
|
model.load_state_dict(torch.load("mnist-i-jepa.pth"))
|
|
model.eval()
|
|
|
|
|
|
transform = transforms.Compose([
|
|
transforms.Grayscale(num_output_channels=1),
|
|
transforms.Resize((28, 28)),
|
|
transforms.ToTensor(),
|
|
transforms.Normalize((0.5,), (0.5,))
|
|
])
|
|
|
|
|
|
img = Image.open("test_digit.jpg")
|
|
img = transform(img).unsqueeze(0)
|
|
|
|
|
|
with torch.no_grad():
|
|
output = model(img)
|
|
_, predicted = torch.max(output, 1)
|
|
|
|
|
|
predicted_digit = predicted.item()
|
|
print(f"Predicted digit: {predicted_digit}")
|
|
|
|
|
|
plt.imshow(img.squeeze(), cmap='gray')
|
|
plt.title(f"Predicted digit: {predicted_digit}")
|
|
plt.show()
|
|
|