import streamlit as st from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from peft import PeftModel import torch @st.cache_resource def load_model(): model_id = "google/flan-t5-large" adapter_path = "./Flan-T5-Typosquat-detect" # Adjust to your saved adapter path # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForSeq2SeqLM.from_pretrained(model_id) model = PeftModel.from_pretrained(model, adapter_path) model.eval() return model, tokenizer device='cpu' model, tokenizer = load_model() st.title("FLAN-T5 Typosquatting Detection") st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.") # Non-editable prompt part prompt_prefix = "Is the first domain a typosquat of the second:" # Display the non-editable prompt with input fields for the rest st.markdown("### Prompt") st.text_area("Prompt", prompt_prefix, height=50, disabled=True) # User inputs for dynamic part of the prompt potential_typosquat = st.text_input("Potential Typosquatted Domain", value="lonlonsoft.com") target_domain = st.text_input("Legitimate Domain", value="stiltsoft.net") # Generate prompt by combining fixed and dynamic parts full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}" # Perform inference when button is clicked if st.button("Check Typosquatting"): if potential_typosquat and target_domain: # Encode and generate response input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device) outputs = model.generate(input_ids, max_new_tokens=20) # Decode the response prediction = tokenizer.decode(outputs[0], skip_special_tokens=True) # Display the result st.write("**Prediction:**") st.write(prediction) else: st.warning("Please enter both domains to perform the check.")