import gradio as gr import matplotlib.pyplot as plt from Bio import SeqIO from Bio.Seq import Seq # Though not directly used in final logic, good for context from Bio.Restriction import RestrictionBatch, AllEnzymes, Analysis import os # For getting filename # Ensure matplotlib uses a non-interactive backend for Gradio import matplotlib matplotlib.use('Agg') # Define paths for example files EXAMPLE_DIR = "eg_files" EXAMPLE_PLASMID1_PATH = os.path.join(EXAMPLE_DIR, "plasmid1_example.gb") EXAMPLE_PLASMID2_PATH = os.path.join(EXAMPLE_DIR, "plasmid2_example.gb") # --- Core BioPython and Plotting Functions --- def simulate_digest_and_plot_gradio(plasmid_seq_record, enzyme_name, plasmid_label): """ Simulates restriction digest and plots a virtual agarose gel. Uses enzyme.catalyse() for robust fragment generation. """ fig, ax = plt.subplots(figsize=(6, 8)) # Adjusted size for better readability if plasmid_seq_record is None: ax.text(0.5, 0.5, f"Error: Plasmid data for '{plasmid_label}' is missing.", ha='center', va='center', wrap=True, color='red') ax.set_xticks([]); ax.set_yticks([]) ax.set_title(f"Virtual Gel: {plasmid_label} - Error", fontsize=10) plt.tight_layout() return fig try: enzyme = AllEnzymes.get(str(enzyme_name)) if not enzyme: raise ValueError(f"Enzyme '{enzyme_name}' not found in Biopython's AllEnzymes.") except Exception as e: ax.text(0.5, 0.5, f"Error: Could not load enzyme '{enzyme_name}'.\n{e}", ha='center', va='center', wrap=True, color='red') ax.set_xticks([]); ax.set_yticks([]) ax.set_title(f"Virtual Gel: {plasmid_label} - Error", fontsize=10) plt.tight_layout() return fig fragments_seqs = enzyme.catalyse(plasmid_seq_record.seq) is_uncut = False if len(fragments_seqs) == 1 and len(fragments_seqs[0]) == len(plasmid_seq_record.seq): if not enzyme.search(plasmid_seq_record.seq): is_uncut = True if is_uncut: ax.text(0.5, 0.5, f"Enzyme {enzyme_name} does not cut {plasmid_label}", ha='center', va='center', wrap=True) ax.set_title(f"Virtual Gel: {plasmid_label} + {enzyme_name} (No Sites)", fontsize=10) lengths = [len(plasmid_seq_record.seq)] else: lengths = sorted([len(f) for f in fragments_seqs], reverse=True) ax.set_yscale("log") min_display_size = 10 plasmid_len_for_scale = max(len(plasmid_seq_record.seq), min_display_size * 10) max_display_size = max(plasmid_len_for_scale * 1.1, min_display_size * 2) ax.set_ylim(min_display_size, max_display_size) band_width = 0.6 lane_center = 0.5 if not lengths: ax.text(0.5, 0.5, "No fragments to display.", ha='center', va='center') else: for i, size in enumerate(lengths): if size < min_display_size: ax.text(lane_center, min_display_size * 1.1 , f"(+ {len(lengths) - i} small fragments < {min_display_size}bp not shown)", ha='center', va='top', fontsize=7, color='gray') break ax.plot([lane_center - band_width/2, lane_center + band_width/2], [size, size], linewidth=6, color='royalblue', solid_capstyle='butt') ax.text(lane_center + band_width/2 + 0.05, size, f"{size} bp", va='center', ha='left', fontsize=8) ax.invert_yaxis() ax.set_title(f"Virtual Gel: {plasmid_label} digested with {enzyme_name}", fontsize=10) ax.set_ylabel("Fragment Size (bp)", fontsize=9) ax.set_xlabel("Lane 1", fontsize=9) ax.set_xticks([]) ax.tick_params(axis='y', labelsize=8) well_top_y = ax.get_ylim()[0] well_line_y = well_top_y * 1.01 well_depth_y = well_top_y * 0.98 ax.plot([lane_center - band_width/1.5, lane_center + band_width/1.5], [well_line_y, well_line_y], linewidth=1.5, color='black') ax.plot([lane_center - band_width/1.5, lane_center - band_width/1.5], [well_line_y, well_depth_y], linewidth=1.5, color='black') ax.plot([lane_center + band_width/1.5, lane_center + band_width/1.5], [well_line_y, well_depth_y], linewidth=1.5, color='black') plt.tight_layout(pad=1.5) return fig def analyze_plasmids_gradio(file1_path, file2_path, current_plasmid_choice_for_plot): """ Analyzes two plasmid files to find unique restriction enzymes and enzymes that cut both plasmids but with different fragmentation patterns. Returns status messages, plasmid data, lists of enzyme names, and an update for the enzyme selection dropdown. """ initial_enzyme_dd_update = gr.update(choices=["Analyze plasmids first"], value="Analyze plasmids first", interactive=False) empty_return_for_error = ["", "", "", None, None, [], [], [], initial_enzyme_dd_update] # Check if example files exist if paths match example paths example_file_error_msg = "" if file1_path == EXAMPLE_PLASMID1_PATH and not os.path.exists(EXAMPLE_PLASMID1_PATH): example_file_error_msg += f"Example file not found: {EXAMPLE_PLASMID1_PATH}. Please create it in the '{EXAMPLE_DIR}' directory.\n" if file2_path == EXAMPLE_PLASMID2_PATH and not os.path.exists(EXAMPLE_PLASMID2_PATH): example_file_error_msg += f"Example file not found: {EXAMPLE_PLASMID2_PATH}. Please create it in the '{EXAMPLE_DIR}' directory.\n" if example_file_error_msg: return example_file_error_msg, *empty_return_for_error if file1_path is None or file2_path is None: return "Error: Please upload or load both plasmid files.", *empty_return_for_error try: def read_plasmid(filepath, filename_for_error): try: return SeqIO.read(filepath, "genbank") except Exception: try: return SeqIO.read(filepath, "fasta") except Exception as e_fasta: raise ValueError(f"Could not parse '{filename_for_error}'. Ensure it's a valid GenBank or FASTA file. Last error: {e_fasta}") p1_orig_filename = os.path.basename(file1_path) p2_orig_filename = os.path.basename(file2_path) plasmid1_seq_rec = read_plasmid(file1_path, p1_orig_filename) plasmid2_seq_rec = read_plasmid(file2_path, p2_orig_filename) except Exception as e: return str(e), *empty_return_for_error valid_enzyme_objects = [] for enz_name in AllEnzymes.elements(): enzyme_obj = AllEnzymes.get(enz_name) if enzyme_obj and hasattr(enzyme_obj, 'site') and enzyme_obj.site is not None: if hasattr(enzyme_obj, 'is_restriction') and enzyme_obj.is_restriction(): valid_enzyme_objects.append(enzyme_obj) elif not hasattr(enzyme_obj, 'is_restriction'): valid_enzyme_objects.append(enzyme_obj) if not valid_enzyme_objects: return "Error: Could not load any restriction enzymes from Biopython.", *empty_return_for_error enzymes_batch = RestrictionBatch(valid_enzyme_objects) analysis1 = Analysis(enzymes_batch, plasmid1_seq_rec.seq, linear=False) analysis2 = Analysis(enzymes_batch, plasmid2_seq_rec.seq, linear=False) enzymes_cutting_p1_obj = set(analysis1.with_sites().keys()) enzymes_cutting_p2_obj = set(analysis2.with_sites().keys()) unique_to_1_obj = sorted(list(enzymes_cutting_p1_obj - enzymes_cutting_p2_obj), key=lambda e: str(e)) unique_to_2_obj = sorted(list(enzymes_cutting_p2_obj - enzymes_cutting_p1_obj), key=lambda e: str(e)) unique_to_1_names = [str(e) for e in unique_to_1_obj] unique_to_2_names = [str(e) for e in unique_to_2_obj] p1_display_label = f"Plasmid 1 ({p1_orig_filename})" p2_display_label = f"Plasmid 2 ({p2_orig_filename})" msg1 = f"Enzymes cutting only {p1_display_label} ({len(unique_to_1_names)}):\n" + ", ".join(unique_to_1_names) if unique_to_1_names else f"No unique enzymes found for {p1_display_label}." msg2 = f"Enzymes cutting only {p2_display_label} ({len(unique_to_2_names)}):\n" + ", ".join(unique_to_2_names) if unique_to_2_names else f"No unique enzymes found for {p2_display_label}." # New: Find enzymes cutting both but with different fragments common_enzymes_obj = enzymes_cutting_p1_obj.intersection(enzymes_cutting_p2_obj) common_diff_fragments_enzymes_names = [] for enzyme_obj in common_enzymes_obj: try: fragments1_seqs = enzyme_obj.catalyse(plasmid1_seq_rec.seq) fragments2_seqs = enzyme_obj.catalyse(plasmid2_seq_rec.seq) lengths1 = sorted([len(f) for f in fragments1_seqs]) lengths2 = sorted([len(f) for f in fragments2_seqs]) if lengths1 != lengths2: common_diff_fragments_enzymes_names.append(str(enzyme_obj)) except Exception as e_cat_common: print(f"Warning: Error during catalysis comparison for common enzyme {str(enzyme_obj)}: {e_cat_common}") # Optionally skip this enzyme or log more formally common_diff_fragments_enzymes_names = sorted(list(set(common_diff_fragments_enzymes_names))) msg_common_diff = f"Enzymes cutting BOTH plasmids with DIFFERENT fragments ({len(common_diff_fragments_enzymes_names)}):\n" + \ ", ".join(common_diff_fragments_enzymes_names) if common_diff_fragments_enzymes_names \ else "No enzymes found that cut both plasmids with different fragment patterns." status = "Analysis complete." if not unique_to_1_names and not unique_to_2_names and not common_diff_fragments_enzymes_names: status += " No differentiating enzymes found." dd_choices = [] if current_plasmid_choice_for_plot == "Plasmid 1": dd_choices = unique_to_1_names if unique_to_1_names else [f"No unique enzymes for {p1_display_label}"] else: dd_choices = unique_to_2_names if unique_to_2_names else [f"No unique enzymes for {p2_display_label}"] if (current_plasmid_choice_for_plot == "Plasmid 1" and unique_to_1_names) or \ (current_plasmid_choice_for_plot == "Plasmid 2" and unique_to_2_names): current_dd_update = gr.update(choices=["Select an enzyme"] + dd_choices, value="Select an enzyme", interactive=True) else: current_dd_update = gr.update(choices=dd_choices, value=dd_choices[0], interactive=False if not dd_choices or "No unique" in dd_choices[0] else True) return status, msg1, msg2, msg_common_diff, plasmid1_seq_rec, plasmid2_seq_rec, \ unique_to_1_names, unique_to_2_names, common_diff_fragments_enzymes_names, current_dd_update def plot_selected_digest_controller(plasmid_choice_label, enzyme_name, p1_data, p2_data): """ Controller to select the correct plasmid data and call the plotting function. """ fig_placeholder, ax_placeholder = plt.subplots(figsize=(6, 8)) ax_placeholder.text(0.5, 0.5, "Plot will appear here.", ha='center', va='center') ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([]) plt.tight_layout() if not enzyme_name or enzyme_name == "Select an enzyme" or "No unique enzymes" in enzyme_name or "Analyze plasmids first" in enzyme_name: ax_placeholder.clear() ax_placeholder.text(0.5, 0.5, "Please select a valid plasmid and enzyme after analysis.", ha='center', va='center', wrap=True) plt.tight_layout() return fig_placeholder target_plasmid_rec = None target_label = "" if plasmid_choice_label == "Plasmid 1": if p1_data is None: ax_placeholder.clear() ax_placeholder.text(0.5, 0.5, "Plasmid 1 data not loaded. Please re-analyze.", ha='center', va='center', wrap=True, color='red') plt.tight_layout() return fig_placeholder target_plasmid_rec = p1_data target_label = "Plasmid 1" if hasattr(p1_data, 'name') and p1_data.name and p1_data.name !="": target_label += f" ({p1_data.name})" elif hasattr(p1_data, 'id') and p1_data.id and p1_data.id !="": target_label += f" ({p1_data.id})" elif plasmid_choice_label == "Plasmid 2": if p2_data is None: ax_placeholder.clear() ax_placeholder.text(0.5, 0.5, "Plasmid 2 data not loaded. Please re-analyze.", ha='center', va='center', wrap=True, color='red') plt.tight_layout() return fig_placeholder target_plasmid_rec = p2_data target_label = "Plasmid 2" if hasattr(p2_data, 'name') and p2_data.name and p2_data.name !="": target_label += f" ({p2_data.name})" elif hasattr(p2_data, 'id') and p2_data.id and p2_data.id !="": target_label += f" ({p2_data.id})" else: ax_placeholder.clear() ax_placeholder.text(0.5, 0.5, "Invalid plasmid selection.", ha='center', va='center', wrap=True, color='red') plt.tight_layout() return fig_placeholder return simulate_digest_and_plot_gradio(target_plasmid_rec, enzyme_name, target_label) def update_enzyme_dropdown_choices_on_radio_change(plasmid_choice_label, p1_enzyme_names, p2_enzyme_names): if plasmid_choice_label == "Plasmid 1": choices = p1_enzyme_names if p1_enzyme_names else ["No unique enzymes for P1"] if p1_enzyme_names: return gr.update(choices=["Select an enzyme"] + choices, value="Select an enzyme", interactive=True) return gr.update(choices=choices, value=choices[0], interactive=False) elif plasmid_choice_label == "Plasmid 2": choices = p2_enzyme_names if p2_enzyme_names else ["No unique enzymes for P2"] if p2_enzyme_names: return gr.update(choices=["Select an enzyme"] + choices, value="Select an enzyme", interactive=True) return gr.update(choices=choices, value=choices[0], interactive=False) return gr.update(choices=[], value=None, interactive=False) def load_examples_and_auto_process(): """ Loads example files, triggers analysis, and then attempts to auto-plot. """ # Step 1: Perform analysis with example files status, msg1, msg2, msg_common_diff, p1_rec, p2_rec, p1_enz_names, p2_enz_names, \ _common_diff_list_ignore, enz_dd_update_from_analysis = \ analyze_plasmids_gradio(EXAMPLE_PLASMID1_PATH, EXAMPLE_PLASMID2_PATH, "Plasmid 1") # If analysis failed (e.g., files not found), p1_rec or p2_rec might be None if p1_rec is None or p2_rec is None : fig_error, ax_error = plt.subplots(figsize=(6,8)) ax_error.text(0.5, 0.5, f"Error during example analysis:\n{status}", ha='center', va='center', color='red', wrap=True) # Display actual error ax_error.set_xticks([]); ax_error.set_yticks([]) plt.tight_layout() # Ensure all expected output values are provided return status, msg1, msg2, msg_common_diff, None, None, [], [], \ gr.update(choices=["Error"], value="Error", interactive=False), \ gr.update(value="Plasmid 1"), fig_error # Step 2: Determine auto-plot parameters and update dropdown based on analysis auto_plot_plasmid_label = None auto_plot_enzyme_name = None auto_plot_plasmid_data = None final_radio_choice = "Plasmid 1" # Default final_enz_dd_update = enz_dd_update_from_analysis # Use directly from analysis initially if p1_enz_names: auto_plot_plasmid_label = "Plasmid 1" auto_plot_enzyme_name = p1_enz_names[0] auto_plot_plasmid_data = p1_rec final_radio_choice = "Plasmid 1" final_enz_dd_update = gr.update(choices=["Select an enzyme"] + p1_enz_names, value=auto_plot_enzyme_name, interactive=True) elif p2_enz_names: auto_plot_plasmid_label = "Plasmid 2" auto_plot_enzyme_name = p2_enz_names[0] auto_plot_plasmid_data = p2_rec final_radio_choice = "Plasmid 2" final_enz_dd_update = gr.update(choices=["Select an enzyme"] + p2_enz_names, value=auto_plot_enzyme_name, interactive=True) else: # No unique enzymes for either, dropdown should reflect current radio choice (P1 default from analyze call) if final_radio_choice == "Plasmid 1": final_enz_dd_update = gr.update(choices=[f"No unique enzymes for P1 ({os.path.basename(EXAMPLE_PLASMID1_PATH)})"], value=f"No unique enzymes for P1 ({os.path.basename(EXAMPLE_PLASMID1_PATH)})", interactive=False) # No explicit else for P2 needed here as P1 is the default for initial dropdown population # Step 3: Generate plot if possible if auto_plot_enzyme_name and auto_plot_plasmid_data: # Use the actual name from p1_data or p2_data for the label if available plot_label_detail = "" if auto_plot_plasmid_label == "Plasmid 1": if hasattr(p1_rec, 'name') and p1_rec.name and p1_rec.name != "": plot_label_detail = f" ({p1_rec.name})" elif hasattr(p1_rec, 'id') and p1_rec.id and p1_rec.id != "": plot_label_detail = f" ({p1_rec.id})" elif auto_plot_plasmid_label == "Plasmid 2": if hasattr(p2_rec, 'name') and p2_rec.name and p2_rec.name != "": plot_label_detail = f" ({p2_rec.name})" elif hasattr(p2_rec, 'id') and p2_rec.id and p2_rec.id != "": plot_label_detail = f" ({p2_rec.id})" gel_fig = simulate_digest_and_plot_gradio(auto_plot_plasmid_data, auto_plot_enzyme_name, f"{auto_plot_plasmid_label}{plot_label_detail}") else: fig_placeholder, ax_placeholder = plt.subplots(figsize=(6, 8)) ax_placeholder.text(0.5, 0.5, "No unique enzymes found for automatic plotting.", ha='center', va='center', wrap=True) ax_placeholder.set_xticks([]); ax_placeholder.set_yticks([]) plt.tight_layout() gel_fig = fig_placeholder if not p1_enz_names and not p2_enz_names: final_enz_dd_update = gr.update(choices=["No unique enzymes found"], value="No unique enzymes found", interactive=False) # Return all updates return status, msg1, msg2, msg_common_diff, p1_rec, p2_rec, p1_enz_names, p2_enz_names, \ final_enz_dd_update, gr.update(value=final_radio_choice), gel_fig # --- Gradio Interface Definition --- with gr.Blocks(theme=gr.themes.Default()) as demo: gr.Markdown("# Plasmid Restriction Digest Analyzer & Virtual Gel") gr.Markdown( "**Instructions:**\n" "1. Upload two plasmid sequence files (GenBank `.gb`/`.gbk` or FASTA `.fasta`/`.fna`/`.fa` format) OR click 'Load Example Files'.\n" "2. If uploading manually, click `Analyze Uploaded Plasmids`. Results will show:\n" " a. Enzymes that uniquely cut only Plasmid 1.\n" " b. Enzymes that uniquely cut only Plasmid 2.\n" " c. Enzymes that cut **both** plasmids but produce different fragment patterns.\n" "3. Select which plasmid's **unique** enzymes you want to consider for plotting.\n" "4. Choose a specific enzyme from the dropdown list.\n" "5. Click `Generate Gel Plot` to visualize the digestion pattern for the selected plasmid and enzyme.\n" f"Note: For 'Load Example Files', ensure `plasmid1_example.gb` and `plasmid2_example.gb` are in a folder named `{EXAMPLE_DIR}` next to this script." ) plasmid1_data_state = gr.State() plasmid2_data_state = gr.State() p1_unique_enzymes_list_state = gr.State([]) p2_unique_enzymes_list_state = gr.State([]) # common_diff_enzymes_list_state = gr.State([]) # Not strictly needed as state if only displayed with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 1. Upload Plasmids & Analyze") file_p1 = gr.File(label="Plasmid 1 File (e.g., .gb, .fasta)", type="filepath", file_types=[".gb", ".gbk", ".fasta", ".fna", ".fa"]) file_p2 = gr.File(label="Plasmid 2 File (e.g., .gb, .fasta)", type="filepath", file_types=[".gb", ".gbk", ".fasta", ".fna", ".fa"]) _current_plasmid_choice_for_plot_hidden = gr.Textbox(value="Plasmid 1", visible=False) # For analysis logic with dropdown analyze_btn = gr.Button("Analyze Uploaded Plasmids", variant="secondary") example_btn = gr.Button("Load Example Files & Auto-Analyze/Plot", variant="primary", elem_id="example_button") with gr.Column(scale=2): gr.Markdown("### Analysis Results") status_message_txt = gr.Textbox(label="Status", interactive=False, lines=1, max_lines=3) unique_enzymes_p1_txt = gr.Textbox(label="Enzymes cutting only Plasmid 1", interactive=False, lines=3, max_lines=6) unique_enzymes_p2_txt = gr.Textbox(label="Enzymes cutting only Plasmid 2", interactive=False, lines=3, max_lines=6) common_diff_enzymes_txt = gr.Textbox(label="Enzymes cutting BOTH plasmids (different fragments)", interactive=False, lines=3, max_lines=6) # New Textbox gr.Markdown("---") gr.Markdown("### 2. Visualize Digestion on Virtual Gel") with gr.Row(): with gr.Column(scale=1): plasmid_to_plot_choice_radio = gr.Radio( choices=["Plasmid 1", "Plasmid 2"], label="Select Plasmid for Gel Visualization (of its unique enzymes)", value="Plasmid 1", interactive=True ) enzyme_for_plot_dropdown = gr.Dropdown( label="Select Unique Enzyme for Chosen Plasmid", choices=["Analyze plasmids first"], value="Analyze plasmids first", interactive=False ) plot_btn = gr.Button("Generate Gel Plot for Selection", variant="secondary", elem_id="plot_button") with gr.Column(scale=2): gel_plot_output = gr.Plot(label="Virtual Agarose Gel") gr.Markdown("---") gr.Markdown("Developed using Biopython, Matplotlib, and Gradio.") gr.Markdown("Note: Large plasmid files or complex analyses might take a few moments.") # --- Event Handlers --- plasmid_to_plot_choice_radio.change( fn=lambda x: x, inputs=[plasmid_to_plot_choice_radio], outputs=[_current_plasmid_choice_for_plot_hidden] # Keep this to inform analyze_plasmids_gradio logic for initial dd ) analyze_btn.click( fn=analyze_plasmids_gradio, inputs=[file_p1, file_p2, _current_plasmid_choice_for_plot_hidden], # Pass the hidden value outputs=[ status_message_txt, unique_enzymes_p1_txt, unique_enzymes_p2_txt, common_diff_enzymes_txt, # New output for the new textbox plasmid1_data_state, plasmid2_data_state, p1_unique_enzymes_list_state, p2_unique_enzymes_list_state, gr.State(), # Placeholder for common_diff_fragments_enzymes_names list (returned but not stored in state) enzyme_for_plot_dropdown ] ) example_btn.click( fn=load_examples_and_auto_process, inputs=[], outputs=[ status_message_txt, unique_enzymes_p1_txt, unique_enzymes_p2_txt, common_diff_enzymes_txt, # New output plasmid1_data_state, plasmid2_data_state, p1_unique_enzymes_list_state, p2_unique_enzymes_list_state, enzyme_for_plot_dropdown, plasmid_to_plot_choice_radio, gel_plot_output ] ) plasmid_to_plot_choice_radio.change( fn=update_enzyme_dropdown_choices_on_radio_change, inputs=[plasmid_to_plot_choice_radio, p1_unique_enzymes_list_state, p2_unique_enzymes_list_state], outputs=[enzyme_for_plot_dropdown] ) plot_btn.click( fn=plot_selected_digest_controller, inputs=[plasmid_to_plot_choice_radio, enzyme_for_plot_dropdown, plasmid1_data_state, plasmid2_data_state], outputs=[gel_plot_output] ) if __name__ == '__main__': if not os.path.exists(EXAMPLE_DIR): os.makedirs(EXAMPLE_DIR) print(f"Created directory: {EXAMPLE_DIR}. Please add example plasmid files (plasmid1_example.gb, plasmid2_example.gb) to it for the example button to work.") # Check for example files and print a message if they are missing if not os.path.exists(EXAMPLE_PLASMID1_PATH) or not os.path.exists(EXAMPLE_PLASMID2_PATH): print(f"Warning: Example files (plasmid1_example.gb, plasmid2_example.gb) not found in '{EXAMPLE_DIR}'. The 'Load Example Files' button might not work as expected.") print("You can create dummy GenBank or FASTA files with these names for testing if needed.") demo.launch()