Spaces:

Tarive
/

protein_fold_hack_nation_ai

Sleeping

App Files Files Community

Tarive commited on 23 days ago

Commit

c780aad

verified ·

1 Parent(s): e210218

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -16

app.py CHANGED Viewed

@@ -1,21 +1,34 @@
-# app.py (Updated Version)
 import gradio as gr
 from transformers import pipeline
 import pickle
 # =============================================================================
 # 1. LOAD YOUR MODEL AND THE SAVED LABEL ENCODER
 # =============================================================================
 # Define the path to your model repository
-model_path = "Tarive/esm2_t12_35M_UR50D-finetuned-pfam-1k" # Make sure this is correct
 # Load the classification pipeline
 classifier = pipeline("text-classification", model=model_path)
-# Load the label encoder from the file you uploaded
-with open("label_encoder.pkl", "rb") as f:
     label_encoder = pickle.load(f)
 # =============================================================================
@@ -29,20 +42,25 @@ def predict_family(sequence):
     # The model outputs labels like "LABEL_455". We need to extract the number.
     results = {}
     for p in predictions:
-        # Extract the number from the label string (e.g., "LABEL_455" -> 455)
-        label_index = int(p['label'].split('_')[1])
-        # Use the label_encoder to find the original family name
-        original_label = label_encoder.inverse_transform([label_index])[0]
-        # Store the real name and score
-        results[original_label] = p['score']
     return results
 # =============================================================================
-# 3. CREATE THE GRADIO INTERFACE (No changes here)
 # =============================================================================
 iface = gr.Interface(
     fn=predict_family,
     inputs=gr.Textbox(
@@ -58,10 +76,13 @@ iface = gr.Interface(
     description="This demo uses a fine-tuned ESM-2 model to predict the protein family from its amino acid sequence. Enter a sequence to see the top 5 predictions and their confidence scores.",
     examples=[
         ["MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR"],
-        ["MTEYKLVVVGAGDVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVEVDCQQCMILDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGGCMS"]
     ],
-    allow_flagging="never"
 )
 # Launch the interface!
 iface.launch()

+# app.py (Final Corrected Version)
 import gradio as gr
 from transformers import pipeline
 import pickle
+from huggingface_hub import hf_hub_download # Import the download function
 # =============================================================================
 # 1. LOAD YOUR MODEL AND THE SAVED LABEL ENCODER
 # =============================================================================
 # Define the path to your model repository
+model_path = "Tarive/esm2_t12_35M_UR50D-finetuned-pfam-1k"
+# --- FIX FOR LFS ---
+# Explicitly download the label_encoder.pkl file from the repo.
+# This ensures the app can find the file even if it's stored with Git LFS.
+print("Downloading label encoder...")
+encoder_path = hf_hub_download(repo_id=model_path, filename="label_encoder.pkl")
+print("Download complete.")
+# --- END FIX ---
 # Load the classification pipeline
+print("Loading classification pipeline...")
 classifier = pipeline("text-classification", model=model_path)
+print("Pipeline loaded.")
+# Load the label encoder from the path where it was downloaded
+print("Loading label encoder...")
+with open(encoder_path, "rb") as f:
     label_encoder = pickle.load(f)
+print("Label encoder loaded.")
 # =============================================================================
     # The model outputs labels like "LABEL_455". We need to extract the number.
     results = {}
     for p in predictions:
+        try:
+            # Extract the number from the label string (e.g., "LABEL_455" -> 455)
+            label_index = int(p['label'].split('_')[1])
+            # Use the label_encoder to find the original family name
+            original_label = label_encoder.inverse_transform([label_index])[0]
+            # Store the real name and score
+            results[original_label] = p['score']
+        except (ValueError, IndexError):
+            # Handle cases where the label format is unexpected
+            results[p['label']] = p['score']
     return results
 # =============================================================================
+# 3. CREATE THE GRADIO INTERFACE
 # =============================================================================
+print("Creating Gradio interface...")
 iface = gr.Interface(
     fn=predict_family,
     inputs=gr.Textbox(
     description="This demo uses a fine-tuned ESM-2 model to predict the protein family from its amino acid sequence. Enter a sequence to see the top 5 predictions and their confidence scores.",
     examples=[
         ["MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR"],
+        ["MTEYKLVVVGAGDVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVEVDCQQCMILDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGGCMS"],
+        ["MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA"]
     ],
+    allow_flagging="never" # Disables the "Flag" button for a cleaner interface
 )
+print("Interface created.")
 # Launch the interface!
+print("Launching app...")
 iface.launch()