Spaces:

alidenewade
/

drug-discovery-app

Sleeping

App Files Files Community

alidenewade commited on Jun 9

Commit

89eae1b

verified ·

1 Parent(s): e7bd3ae

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -15

app.py CHANGED Viewed

@@ -68,26 +68,27 @@ sns.set_theme(style='whitegrid')
 # --- FINGERPRINT CONFIGURATION ---
 # Create a dummy PubChem.xml if no XML files are found, to ensure fp_config is populated
-# Check if the 'padel_descriptors' directory exists, create it if not
-if not os.path.exists('padel_descriptors'):
-    os.makedirs('padel_descriptors')
 # Check for XML files within the 'padel_descriptors' folder
-xml_files = sorted(glob.glob('padel_descriptors/*.xml'))
 if not xml_files:
-    # If no XML files found in the directory, try to create a dummy one.
     try:
-        with open('padel_descriptors/PubChem.xml', 'w') as f:
             f.write('')
-        xml_files = sorted(glob.glob('padel_descriptors/*.xml')) # Re-check after creating
     except IOError:
-        warnings.warn("Could not create a dummy 'PubChem.xml' file in 'padel_descriptors' folder. Fingerprint calculation might fail if no .xml files are present.")
 if not xml_files:
     warnings.warn(
-        "No descriptor .xml files found in the 'padel_descriptors' folder. Fingerprint calculation will not be possible. "
-        "Please place descriptor XML files in the 'padel_descriptors' folder."
     )
 fp_config = {os.path.splitext(os.path.basename(file))[0]: file for file in xml_files}
 FP_list = sorted(list(fp_config.keys()))
@@ -207,7 +208,7 @@ def mannwhitney_test(df, descriptor):
 # ==============================================================================
 # === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
 # ==============================================================================
 def create_molecule_grid_html(df, smiles_col='canonical_smiles', max_mols=20):
     html_parts = ['<div style="display: flex; flex-wrap: wrap; gap: 10px;">']
     for idx, row in df.head(max_mols).iterrows():
@@ -326,6 +327,7 @@ def run_regression_suite(df: pd.DataFrame, progress=gr.Progress()):
     model_choices = results_df['Model'].tolist()
     yield "✅ Model training & evaluation complete.", model_run_results, gr.Dropdown(choices=model_choices, interactive=True)
 def create_prediction_grid_html(df, smiles_col='canonical_smiles', pred_col='predicted_pIC50', max_mols=20):
     html_parts = ['<div style="display: flex; flex-wrap: wrap; gap: 10px;">']
     for idx, row in df.head(max_mols).iterrows():
@@ -392,9 +394,6 @@ def predict_on_upload(uploaded_file, model_name, current_state, progress=gr.Prog
         progress(0.9, desc="Generating visualization..."); yield "Generating visualization...", None, None
-        # DEBUG FIX: The main fix for the KeyError.
-        # Create a copy, rename the column *before* calling mols2grid.
-        # This is more robust than relying on the library's 'rename' parameter.
         df_grid_view = df_results.dropna(subset=['predicted_pIC50']).copy()
         mols_html = "<h3>No molecules with successful predictions to display.</h3>"
         if not df_grid_view.empty:
@@ -480,6 +479,8 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky"),
                     gr.Markdown("Upload a CSV with a `canonical_smiles` column to predict pIC50.")
                     with gr.Row():
                         upload_predict_file = gr.File(label="Upload CSV for Prediction", file_types=[".csv"])
                         predict_btn_s3 = gr.Button("Run Prediction", variant="primary")
                     status_step3_predict = gr.Textbox(label="Status", interactive=False)
                     prediction_results_df = gr.DataFrame(label="Prediction Results")
@@ -517,6 +518,22 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky"),
         model_results = current_state.get('model_results')
         if not model_results or not model_name: return None, None
         plotter = model_results.plotter; validation_fig = plotter.plot_validation(model_name); feature_fig = plotter.plot_feature_importance(model_name, int(feature_count)); plt.close('all'); return validation_fig, feature_fig
     fetch_btn.click(fn=get_target_chembl_id, inputs=query_input, outputs=[target_id_table, selected_target_dropdown, status_step1_fetch], show_progress="minimal")
     selected_target_dropdown.change(fn=enable_process_button, inputs=selected_target_dropdown, outputs=process_btn, show_progress="hidden")
@@ -531,6 +548,9 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky"),
     train_models_btn.click(fn=handle_model_training, inputs=[app_state], outputs=[status_step3_train, model_results_df, model_selector_s3, app_state])
     for listener in [model_selector_s3.change, feature_count_s3.change]: listener(fn=update_analysis_plots, inputs=[model_selector_s3, feature_count_s3, app_state], outputs=[validation_plot_s3, feature_plot_s3], show_progress="minimal")
     predict_btn_s3.click(fn=predict_on_upload, inputs=[upload_predict_file, model_selector_s3, app_state], outputs=[status_step3_predict, prediction_results_df, prediction_mols_grid])
 if __name__ == "__main__":
     demo.launch(debug=True)

 # --- FINGERPRINT CONFIGURATION ---
 # Create a dummy PubChem.xml if no XML files are found, to ensure fp_config is populated
+# Updated path for XML files to 'padel_descriptors/*.xml'
+padel_descriptors_dir = 'padel_descriptors'
+if not os.path.exists(padel_descriptors_dir):
+    os.makedirs(padel_descriptors_dir)
 # Check for XML files within the 'padel_descriptors' folder
+xml_files = sorted(glob.glob(os.path.join(padel_descriptors_dir, '*.xml')))
 if not xml_files:
     try:
+        # Create a dummy PubChem.xml inside 'padel_descriptors' if no XML files are found
+        with open(os.path.join(padel_descriptors_dir, 'PubChem.xml'), 'w') as f:
             f.write('')
+        xml_files = sorted(glob.glob(os.path.join(padel_descriptors_dir, '*.xml'))) # Re-scan after creating dummy
     except IOError:
+        warnings.warn("Could not create a dummy 'PubChem.xml' file in 'padel_descriptors'. Fingerprint calculation might fail if no .xml files are present.")
 if not xml_files:
     warnings.warn(
+        "No descriptor .xml files found in 'padel_descriptors' directory. "
+        "Fingerprint calculation will not be possible. "
+        "Please place descriptor XML files in the 'padel_descriptors' directory."
     )
 fp_config = {os.path.splitext(os.path.basename(file))[0]: file for file in xml_files}
 FP_list = sorted(list(fp_config.keys()))
 # ==============================================================================
 # === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
 # ==============================================================================
+# Replacement for mols2grid.display in Step 2
 def create_molecule_grid_html(df, smiles_col='canonical_smiles', max_mols=20):
     html_parts = ['<div style="display: flex; flex-wrap: wrap; gap: 10px;">']
     for idx, row in df.head(max_mols).iterrows():
     model_choices = results_df['Model'].tolist()
     yield "✅ Model training & evaluation complete.", model_run_results, gr.Dropdown(choices=model_choices, interactive=True)
+# Replacement for mols2grid.display in Step 3
 def create_prediction_grid_html(df, smiles_col='canonical_smiles', pred_col='predicted_pIC50', max_mols=20):
     html_parts = ['<div style="display: flex; flex-wrap: wrap; gap: 10px;">']
     for idx, row in df.head(max_mols).iterrows():
         progress(0.9, desc="Generating visualization..."); yield "Generating visualization...", None, None
         df_grid_view = df_results.dropna(subset=['predicted_pIC50']).copy()
         mols_html = "<h3>No molecules with successful predictions to display.</h3>"
         if not df_grid_view.empty:
                     gr.Markdown("Upload a CSV with a `canonical_smiles` column to predict pIC50.")
                     with gr.Row():
                         upload_predict_file = gr.File(label="Upload CSV for Prediction", file_types=[".csv"])
+                        # Add the example data button
+                        load_example_btn = gr.Button("Load Example Data (example_data.csv)", variant="secondary")
                         predict_btn_s3 = gr.Button("Run Prediction", variant="primary")
                     status_step3_predict = gr.Textbox(label="Status", interactive=False)
                     prediction_results_df = gr.DataFrame(label="Prediction Results")
         model_results = current_state.get('model_results')
         if not model_results or not model_name: return None, None
         plotter = model_results.plotter; validation_fig = plotter.plot_validation(model_name); feature_fig = plotter.plot_feature_importance(model_name, int(feature_count)); plt.close('all'); return validation_fig, feature_fig
+    # New function to load example data
+    def load_example_data():
+        example_file_path = "example_data.csv"
+        # Create a dummy example_data.csv if it doesn't exist for demonstration
+        if not os.path.exists(example_file_path):
+            dummy_data = pd.DataFrame({
+                'canonical_smiles': [
+                    'CCO',
+                    'CC(=O)Oc1ccccc1C(=O)O',
+                    'Cc1ccc(cc1)C(C)C(=O)O'
+                ]
+            })
+            dummy_data.to_csv(example_file_path, index=False)
+        return gr.File(value=example_file_path, interactive=True)
     fetch_btn.click(fn=get_target_chembl_id, inputs=query_input, outputs=[target_id_table, selected_target_dropdown, status_step1_fetch], show_progress="minimal")
     selected_target_dropdown.change(fn=enable_process_button, inputs=selected_target_dropdown, outputs=process_btn, show_progress="hidden")
     train_models_btn.click(fn=handle_model_training, inputs=[app_state], outputs=[status_step3_train, model_results_df, model_selector_s3, app_state])
     for listener in [model_selector_s3.change, feature_count_s3.change]: listener(fn=update_analysis_plots, inputs=[model_selector_s3, feature_count_s3, app_state], outputs=[validation_plot_s3, feature_plot_s3], show_progress="minimal")
     predict_btn_s3.click(fn=predict_on_upload, inputs=[upload_predict_file, model_selector_s3, app_state], outputs=[status_step3_predict, prediction_results_df, prediction_mols_grid])
+    # New event handler for the example data button
+    load_example_btn.click(fn=load_example_data, outputs=upload_predict_file, show_progress="hidden")
 if __name__ == "__main__":
     demo.launch(debug=True)