### Imports and class names setup ---------------------------------------------------- ### import os import torch import torchvision import gradio as gr from model import create_vit from timeit import default_timer as timer from typing import Tuple, Dict # Setup class names with open("class_names.txt", "r") as f: class_names = [food.strip() for food in f.readlines()] # Device agnostic code if torch.backends.mps.is_available(): device = 'mps' elif torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' ### Model and transforms preparation ---------------------------------------------------- ### vit_model, vit_transforms = create_vit(pretrained_weights=torchvision.models.ViT_B_16_Weights.DEFAULT, model=torchvision.models.vit_b_16, in_features=768, out_features=101, device='cpu') # Load save weights vit_model.load_state_dict(torch.load(f="pretrained_vit_feature_extractor_food101.pth", map_location=torch.device("cpu"))) # load the model to the CPU ### Predict function ---------------------------------------------------- ### def predict(img) -> Tuple[Dict, float]: # Start a timer start_time = timer() # Transform the input image for use with ViT Model img = vit_transforms(img).unsqueeze(0) # unsqueeze = add batch dimension on 0th index (3, 224, 224) into (1, 3, 224, 224) # Put model into eval mode, make prediction vit_model.eval() with torch.inference_mode(): # Pass transformed image through the model and turn the prediction logits into probabilities pred_logits = vit_model(img) pred_probs = torch.softmax(pred_logits, dim=1) # Create a prediction label and prediction probability dictionary pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} # Calculate pred time end_timer = timer() pred_time = round(end_timer - start_time, 4) # Return pred dict and pred time return pred_labels_and_probs, pred_time ### Gradio interface and launch ------------------------------------------------------------------ ### # Create title and description title = "FoodVision: ViT Model" description = "A ViT model trained on 20% of the Food101 dataset to classify Food images" # Create example list example_list = [["examples/" + example] for example in os.listdir("examples")] # Create the Gradio demo demo = gr.Interface(fn=predict, inputs=gr.Image(type="pil"), outputs=[gr.Label(num_top_classes=5, label="Predictions"), gr.Number(label="Prediction time(s)")], title=title, description=description, examples=example_list) demo.launch()