INFERENCE CODE
pip install transformers[torch]
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import time
tokenizer = AutoTokenizer.from_pretrained("Mr-Vicky-01/QnA-248M")
model = AutoModelForSeq2SeqLM.from_pretrained("Mr-Vicky-01/QnA-248M")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
prefix = "Answer the Question: "
inp = YOUR_QUESTION
import time
start = time.time()
inputs = tokenizer(prefix + inp.lower(), return_tensors="pt")
model.to(device)
inputs = inputs.to(device)
outputs = model.generate(**inputs, max_length=512)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(answer)
end = time.time()
print(f"Time taken: {end - start}")