alidenewade's picture
Update app.py
7a73738 verified
raw
history blame
21.7 kB
import gradio as gr
import matplotlib.pyplot as plt
from Bio import SeqIO
from Bio.Seq import Seq # Though not directly used in final logic, good for context
from Bio.Restriction import RestrictionBatch, AllEnzymes, Analysis
import os # For getting filename
# Ensure matplotlib uses a non-interactive backend for Gradio
import matplotlib
matplotlib.use('Agg')
# Define paths for example files
EXAMPLE_DIR = "eg_files"
EXAMPLE_PLASMID1_PATH = os.path.join(EXAMPLE_DIR, "plasmid1_example.gb")
EXAMPLE_PLASMID2_PATH = os.path.join(EXAMPLE_DIR, "plasmid2_example.gb")
# --- Core BioPython and Plotting Functions ---
def simulate_digest_and_plot_gradio(plasmid_seq_record, enzyme_name, plasmid_label):
"""
Simulates restriction digest and plots a virtual agarose gel.
Uses enzyme.catalyse() for robust fragment generation.
"""
fig, ax = plt.subplots(figsize=(6, 8)) # Adjusted size for better readability
if plasmid_seq_record is None:
ax.text(0.5, 0.5, f"Error: Plasmid data for '{plasmid_label}' is missing.",
ha='center', va='center', wrap=True, color='red')
ax.set_xticks([]); ax.set_yticks([])
ax.set_title(f"Virtual Gel: {plasmid_label} - Error", fontsize=10)
plt.tight_layout()
return fig
try:
enzyme = AllEnzymes.get(str(enzyme_name))
if not enzyme:
raise ValueError(f"Enzyme '{enzyme_name}' not found in Biopython's AllEnzymes.")
except Exception as e:
ax.text(0.5, 0.5, f"Error: Could not load enzyme '{enzyme_name}'.\n{e}",
ha='center', va='center', wrap=True, color='red')
ax.set_xticks([]); ax.set_yticks([])
ax.set_title(f"Virtual Gel: {plasmid_label} - Error", fontsize=10)
plt.tight_layout()
return fig
fragments_seqs = enzyme.catalyse(plasmid_seq_record.seq)
is_uncut = False
if len(fragments_seqs) == 1 and len(fragments_seqs[0]) == len(plasmid_seq_record.seq):
if not enzyme.search(plasmid_seq_record.seq):
is_uncut = True
if is_uncut:
ax.text(0.5, 0.5, f"Enzyme {enzyme_name} does not cut {plasmid_label}",
ha='center', va='center', wrap=True)
ax.set_title(f"Virtual Gel: {plasmid_label} + {enzyme_name} (No Sites)", fontsize=10)
lengths = [len(plasmid_seq_record.seq)]
else:
lengths = sorted([len(f) for f in fragments_seqs], reverse=True)
ax.set_yscale("log")
min_display_size = 10
plasmid_len_for_scale = max(len(plasmid_seq_record.seq), min_display_size * 10)
max_display_size = max(plasmid_len_for_scale * 1.1, min_display_size * 2)
ax.set_ylim(min_display_size, max_display_size)
band_width = 0.6
lane_center = 0.5
if not lengths:
ax.text(0.5, 0.5, "No fragments to display.", ha='center', va='center')
else:
for i, size in enumerate(lengths):
if size < min_display_size:
ax.text(lane_center, min_display_size * 1.1 , f"(+ {len(lengths) - i} small fragments < {min_display_size}bp not shown)",
ha='center', va='top', fontsize=7, color='gray')
break
ax.plot([lane_center - band_width/2, lane_center + band_width/2], [size, size],
linewidth=6, color='royalblue', solid_capstyle='butt')
ax.text(lane_center + band_width/2 + 0.05, size, f"{size} bp",
va='center', ha='left', fontsize=8)
ax.invert_yaxis()
ax.set_title(f"Virtual Gel: {plasmid_label} digested with {enzyme_name}", fontsize=10)
ax.set_ylabel("Fragment Size (bp)", fontsize=9)
ax.set_xlabel("Lane 1", fontsize=9)
ax.set_xticks([])
ax.tick_params(axis='y', labelsize=8)
well_top_y = ax.get_ylim()[0]
well_line_y = well_top_y * 1.01
well_depth_y = well_top_y * 0.98
ax.plot([lane_center - band_width/1.5, lane_center + band_width/1.5], [well_line_y, well_line_y],
linewidth=1.5, color='black')
ax.plot([lane_center - band_width/1.5, lane_center - band_width/1.5], [well_line_y, well_depth_y],
linewidth=1.5, color='black')
ax.plot([lane_center + band_width/1.5, lane_center + band_width/1.5], [well_line_y, well_depth_y],
linewidth=1.5, color='black')
plt.tight_layout(pad=1.5)
return fig
def analyze_plasmids_gradio(file1_path, file2_path, current_plasmid_choice_for_plot):
"""
Analyzes two plasmid files to find unique restriction enzymes.
Returns status messages, plasmid data, lists of unique enzyme names,
and an update for the enzyme selection dropdown.
"""
initial_enzyme_dd_update = gr.update(choices=["Analyze plasmids first"], value="Analyze plasmids first", interactive=False)
# Check if example files exist if paths match example paths
example_file_error_msg = ""
if file1_path == EXAMPLE_PLASMID1_PATH and not os.path.exists(EXAMPLE_PLASMID1_PATH):
example_file_error_msg += f"Example file not found: {EXAMPLE_PLASMID1_PATH}. Please create it in the '{EXAMPLE_DIR}' directory.\n"
if file2_path == EXAMPLE_PLASMID2_PATH and not os.path.exists(EXAMPLE_PLASMID2_PATH):
example_file_error_msg += f"Example file not found: {EXAMPLE_PLASMID2_PATH}. Please create it in the '{EXAMPLE_DIR}' directory.\n"
if example_file_error_msg:
return example_file_error_msg, "", "", None, None, [], [], initial_enzyme_dd_update
if file1_path is None or file2_path is None:
return "Error: Please upload or load both plasmid files.", "", "", None, None, [], [], initial_enzyme_dd_update
try:
def read_plasmid(filepath, filename_for_error):
try:
return SeqIO.read(filepath, "genbank")
except Exception:
try:
return SeqIO.read(filepath, "fasta")
except Exception as e_fasta:
raise ValueError(f"Could not parse '{filename_for_error}'. Ensure it's a valid GenBank or FASTA file. Last error: {e_fasta}")
p1_orig_filename = os.path.basename(file1_path)
p2_orig_filename = os.path.basename(file2_path)
plasmid1_seq_rec = read_plasmid(file1_path, p1_orig_filename)
plasmid2_seq_rec = read_plasmid(file2_path, p2_orig_filename)
except Exception as e:
return str(e), "", "", None, None, [], [], initial_enzyme_dd_update
valid_enzyme_objects = []
for enz_name in AllEnzymes.elements():
enzyme_obj = AllEnzymes.get(enz_name)
if enzyme_obj and hasattr(enzyme_obj, 'site') and enzyme_obj.site is not None:
if hasattr(enzyme_obj, 'is_restriction') and enzyme_obj.is_restriction():
valid_enzyme_objects.append(enzyme_obj)
elif not hasattr(enzyme_obj, 'is_restriction'):
valid_enzyme_objects.append(enzyme_obj)
if not valid_enzyme_objects:
return "Error: Could not load any restriction enzymes from Biopython.", "", "", None, None, [], [], initial_enzyme_dd_update
enzymes_batch = RestrictionBatch(valid_enzyme_objects)
analysis1 = Analysis(enzymes_batch, plasmid1_seq_rec.seq, linear=False)
analysis2 = Analysis(enzymes_batch, plasmid2_seq_rec.seq, linear=False)
enzymes_cutting_p1 = set(analysis1.with_sites().keys())
enzymes_cutting_p2 = set(analysis2.with_sites().keys())
unique_to_1_obj = sorted(list(enzymes_cutting_p1 - enzymes_cutting_p2), key=lambda e: str(e))
unique_to_2_obj = sorted(list(enzymes_cutting_p2 - enzymes_cutting_p1), key=lambda e: str(e))
unique_to_1_names = [str(e) for e in unique_to_1_obj]
unique_to_2_names = [str(e) for e in unique_to_2_obj]
p1_display_label = f"Plasmid 1 ({p1_orig_filename})"
p2_display_label = f"Plasmid 2 ({p2_orig_filename})"
msg1 = f"Enzymes cutting only {p1_display_label} ({len(unique_to_1_names)}):\n" + ", ".join(unique_to_1_names) if unique_to_1_names else f"No unique enzymes found for {p1_display_label}."
msg2 = f"Enzymes cutting only {p2_display_label} ({len(unique_to_2_names)}):\n" + ", ".join(unique_to_2_names) if unique_to_2_names else f"No unique enzymes found for {p2_display_label}."
status = "Analysis complete."
if not unique_to_1_names and not unique_to_2_names:
status += " No enzymes found that uniquely cut only one of the plasmids."
dd_choices = []
if current_plasmid_choice_for_plot == "Plasmid 1":
dd_choices = unique_to_1_names if unique_to_1_names else [f"No unique enzymes for {p1_display_label}"]
else:
dd_choices = unique_to_2_names if unique_to_2_names else [f"No unique enzymes for {p2_display_label}"]
if (current_plasmid_choice_for_plot == "Plasmid 1" and unique_to_1_names) or \
(current_plasmid_choice_for_plot == "Plasmid 2" and unique_to_2_names):
initial_enzyme_dd_update = gr.update(choices=["Select an enzyme"] + dd_choices, value="Select an enzyme", interactive=True)
else:
initial_enzyme_dd_update = gr.update(choices=dd_choices, value=dd_choices[0], interactive=False if not dd_choices or "No unique" in dd_choices[0] else True)
return status, msg1, msg2, plasmid1_seq_rec, plasmid2_seq_rec, unique_to_1_names, unique_to_2_names, initial_enzyme_dd_update
def plot_selected_digest_controller(plasmid_choice_label, enzyme_name, p1_data, p2_data):
"""
Controller to select the correct plasmid data and call the plotting function.
"""
fig_placeholder, ax_placeholder = plt.subplots(figsize=(6, 8))
ax_placeholder.text(0.5, 0.5, "Plot will appear here.", ha='center', va='center')
ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
plt.tight_layout()
if not enzyme_name or enzyme_name == "Select an enzyme" or "No unique enzymes" in enzyme_name or "Analyze plasmids first" in enzyme_name:
ax_placeholder.clear()
ax_placeholder.text(0.5, 0.5, "Please select a valid plasmid and enzyme after analysis.", ha='center', va='center', wrap=True)
plt.tight_layout()
return fig_placeholder
target_plasmid_rec = None
target_label = ""
if plasmid_choice_label == "Plasmid 1":
if p1_data is None:
ax_placeholder.clear()
ax_placeholder.text(0.5, 0.5, "Plasmid 1 data not loaded. Please re-analyze.", ha='center', va='center', wrap=True, color='red')
plt.tight_layout()
return fig_placeholder
target_plasmid_rec = p1_data
target_label = "Plasmid 1"
if hasattr(p1_data, 'name') and p1_data.name: target_label += f" ({p1_data.name})"
elif hasattr(p1_data, 'id') and p1_data.id: target_label += f" ({p1_data.id})"
elif plasmid_choice_label == "Plasmid 2":
if p2_data is None:
ax_placeholder.clear()
ax_placeholder.text(0.5, 0.5, "Plasmid 2 data not loaded. Please re-analyze.", ha='center', va='center', wrap=True, color='red')
plt.tight_layout()
return fig_placeholder
target_plasmid_rec = p2_data
target_label = "Plasmid 2"
if hasattr(p2_data, 'name') and p2_data.name: target_label += f" ({p2_data.name})"
elif hasattr(p2_data, 'id') and p2_data.id: target_label += f" ({p2_data.id})"
else:
ax_placeholder.clear()
ax_placeholder.text(0.5, 0.5, "Invalid plasmid selection.", ha='center', va='center', wrap=True, color='red')
plt.tight_layout()
return fig_placeholder
return simulate_digest_and_plot_gradio(target_plasmid_rec, enzyme_name, target_label)
def update_enzyme_dropdown_choices_on_radio_change(plasmid_choice_label, p1_enzyme_names, p2_enzyme_names):
if plasmid_choice_label == "Plasmid 1":
choices = p1_enzyme_names if p1_enzyme_names else ["No unique enzymes for P1"]
if p1_enzyme_names:
return gr.update(choices=["Select an enzyme"] + choices, value="Select an enzyme", interactive=True)
return gr.update(choices=choices, value=choices[0], interactive=False)
elif plasmid_choice_label == "Plasmid 2":
choices = p2_enzyme_names if p2_enzyme_names else ["No unique enzymes for P2"]
if p2_enzyme_names:
return gr.update(choices=["Select an enzyme"] + choices, value="Select an enzyme", interactive=True)
return gr.update(choices=choices, value=choices[0], interactive=False)
return gr.update(choices=[], value=None, interactive=False)
def load_examples_and_auto_process():
"""
Loads example files, triggers analysis, and then attempts to auto-plot.
"""
# Step 1: Perform analysis with example files
# Default to "Plasmid 1" for initial dropdown population logic within analyze_plasmids_gradio
status, msg1, msg2, p1_rec, p2_rec, p1_enz_names, p2_enz_names, enz_dd_update = \
analyze_plasmids_gradio(EXAMPLE_PLASMID1_PATH, EXAMPLE_PLASMID2_PATH, "Plasmid 1")
# If analysis failed (e.g., files not found), p1_rec or p2_rec might be None
if p1_rec is None or p2_rec is None :
# Create a placeholder plot for error
fig_error, ax_error = plt.subplots(figsize=(6,8))
ax_error.text(0.5, 0.5, "Error during example analysis.\nCheck file paths and content.", ha='center', va='center', color='red', wrap=True)
ax_error.set_xticks([]); ax_error.set_yticks([])
plt.tight_layout()
return status, msg1, msg2, p1_rec, p2_rec, p1_enz_names, p2_enz_names, \
gr.update(choices=["Error"], value="Error", interactive=False), \
gr.update(value="Plasmid 1"), fig_error # Default radio to P1, show error plot
# Step 2: Determine auto-plot parameters
auto_plot_plasmid_label = None
auto_plot_enzyme_name = None
auto_plot_plasmid_data = None
final_radio_choice = "Plasmid 1" # Default if P1 has unique enzymes
if p1_enz_names:
auto_plot_plasmid_label = "Plasmid 1"
auto_plot_enzyme_name = p1_enz_names[0]
auto_plot_plasmid_data = p1_rec
final_radio_choice = "Plasmid 1"
# Update enzyme dropdown for P1
enz_dd_update = gr.update(choices=["Select an enzyme"] + p1_enz_names, value=auto_plot_enzyme_name, interactive=True)
elif p2_enz_names:
auto_plot_plasmid_label = "Plasmid 2"
auto_plot_enzyme_name = p2_enz_names[0]
auto_plot_plasmid_data = p2_rec
final_radio_choice = "Plasmid 2"
# Update enzyme dropdown for P2
enz_dd_update = gr.update(choices=["Select an enzyme"] + p2_enz_names, value=auto_plot_enzyme_name, interactive=True)
else:
# No unique enzymes for auto-plotting, update dropdown to reflect current choice (P1 default)
if final_radio_choice == "Plasmid 1":
enz_dd_update = gr.update(choices=[f"No unique enzymes for Plasmid 1 ({os.path.basename(EXAMPLE_PLASMID1_PATH)})"], value=f"No unique enzymes for Plasmid 1 ({os.path.basename(EXAMPLE_PLASMID1_PATH)})", interactive=False)
# (No need to handle P2 here as P1 is checked first for default)
# Step 3: Generate plot if possible
if auto_plot_enzyme_name and auto_plot_plasmid_data:
gel_fig = simulate_digest_and_plot_gradio(auto_plot_plasmid_data, auto_plot_enzyme_name, auto_plot_plasmid_label)
else:
# Create a placeholder plot if no auto-plot target
fig_placeholder, ax_placeholder = plt.subplots(figsize=(6, 8))
ax_placeholder.text(0.5, 0.5, "No unique enzymes found for automatic plotting.", ha='center', va='center', wrap=True)
ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([])
plt.tight_layout()
gel_fig = fig_placeholder
# Ensure dropdown reflects that no enzyme was selected for plotting
if not p1_enz_names and not p2_enz_names: # If truly no unique enzymes for either
enz_dd_update = gr.update(choices=["No unique enzymes found"], value="No unique enzymes found", interactive=False)
# Return all updates
return status, msg1, msg2, p1_rec, p2_rec, p1_enz_names, p2_enz_names, \
enz_dd_update, gr.update(value=final_radio_choice), gel_fig
# --- Gradio Interface Definition ---
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown("# Plasmid Restriction Digest Analyzer & Virtual Gel")
gr.Markdown(
"**Instructions:**\n"
"1. Upload two plasmid sequence files (GenBank `.gb`/`.gbk` or FASTA `.fasta`/`.fna`/`.fa` format) OR click 'Load Example Files'.\n"
"2. If uploading manually, click `Analyze Plasmids`. Results will show enzymes that uniquely cut one plasmid but not the other.\n"
"3. Select which plasmid's unique enzymes you want to consider for plotting.\n"
"4. Choose a specific enzyme from the dropdown list.\n"
"5. Click `Generate Gel Plot` to visualize the digestion pattern.\n"
f"Note: For 'Load Example Files', ensure `plasmid1_example.gb` and `plasmid2_example.gb` are in a folder named `{EXAMPLE_DIR}` next to this script."
)
plasmid1_data_state = gr.State()
plasmid2_data_state = gr.State()
p1_unique_enzymes_list_state = gr.State([])
p2_unique_enzymes_list_state = gr.State([])
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Upload Plasmids & Analyze")
file_p1 = gr.File(label="Plasmid 1 File (e.g., .gb, .fasta)", type="filepath", file_types=[".gb", ".gbk", ".fasta", ".fna", ".fa"])
file_p2 = gr.File(label="Plasmid 2 File (e.g., .gb, .fasta)", type="filepath", file_types=[".gb", ".gbk", ".fasta", ".fna", ".fa"])
_current_plasmid_choice_for_plot_hidden = gr.Textbox(value="Plasmid 1", visible=False)
analyze_btn = gr.Button("Analyze Uploaded Plasmids", variant="secondary") # Changed variant
example_btn = gr.Button("Load Example Files & Auto-Analyze/Plot", variant="primary", elem_id="example_button")
with gr.Column(scale=2):
gr.Markdown("### Analysis Results")
status_message_txt = gr.Textbox(label="Status", interactive=False, lines=1, max_lines=3) # Increased max_lines for error messages
unique_enzymes_p1_txt = gr.Textbox(label="Enzymes cutting only Plasmid 1", interactive=False, lines=3, max_lines=6)
unique_enzymes_p2_txt = gr.Textbox(label="Enzymes cutting only Plasmid 2", interactive=False, lines=3, max_lines=6)
gr.Markdown("---")
gr.Markdown("### 2. Visualize Digestion on Virtual Gel")
with gr.Row():
with gr.Column(scale=1):
plasmid_to_plot_choice_radio = gr.Radio(
choices=["Plasmid 1", "Plasmid 2"],
label="Select Plasmid for Gel Visualization",
value="Plasmid 1",
interactive=True
)
enzyme_for_plot_dropdown = gr.Dropdown(
label="Select Unique Enzyme",
choices=["Analyze plasmids first"],
value="Analyze plasmids first",
interactive=False
)
plot_btn = gr.Button("Generate Gel Plot for Selection", variant="secondary", elem_id="plot_button") # Changed variant
with gr.Column(scale=2):
gel_plot_output = gr.Plot(label="Virtual Agarose Gel")
gr.Markdown("---")
gr.Markdown("Developed using Biopython, Matplotlib, and Gradio.")
gr.Markdown("Note: Large plasmid files or complex analyses might take a few moments.")
# --- Event Handlers ---
plasmid_to_plot_choice_radio.change(
fn=lambda x: x,
inputs=[plasmid_to_plot_choice_radio],
outputs=[_current_plasmid_choice_for_plot_hidden]
)
analyze_btn.click(
fn=analyze_plasmids_gradio,
inputs=[file_p1, file_p2, _current_plasmid_choice_for_plot_hidden],
outputs=[
status_message_txt, unique_enzymes_p1_txt, unique_enzymes_p2_txt,
plasmid1_data_state, plasmid2_data_state,
p1_unique_enzymes_list_state, p2_unique_enzymes_list_state,
enzyme_for_plot_dropdown
]
)
example_btn.click(
fn=load_examples_and_auto_process,
inputs=[], # No direct inputs, uses hardcoded paths
outputs=[
status_message_txt, unique_enzymes_p1_txt, unique_enzymes_p2_txt,
plasmid1_data_state, plasmid2_data_state,
p1_unique_enzymes_list_state, p2_unique_enzymes_list_state,
enzyme_for_plot_dropdown, # Update dropdown based on auto-selected enzyme
plasmid_to_plot_choice_radio, # Update radio based on auto-selected plasmid
gel_plot_output # Display the auto-generated plot
]
)
plasmid_to_plot_choice_radio.change(
fn=update_enzyme_dropdown_choices_on_radio_change,
inputs=[plasmid_to_plot_choice_radio, p1_unique_enzymes_list_state, p2_unique_enzymes_list_state],
outputs=[enzyme_for_plot_dropdown]
)
plot_btn.click(
fn=plot_selected_digest_controller,
inputs=[plasmid_to_plot_choice_radio, enzyme_for_plot_dropdown, plasmid1_data_state, plasmid2_data_state],
outputs=[gel_plot_output]
)
if __name__ == '__main__':
# Create eg_files directory if it doesn't exist (optional, good for local testing)
if not os.path.exists(EXAMPLE_DIR):
os.makedirs(EXAMPLE_DIR)
print(f"Created directory: {EXAMPLE_DIR}. Please add example plasmid files to it.")
# You might want to add a check here to see if files exist and guide the user
# For Hugging Face Spaces, you'd typically upload the eg_files directory with the files.
demo.launch()