ProteinMPNN

Sleeping

App Files Files Community

simonduerr commited on Oct 4, 2023

Commit

3055c36

1 Parent(s): 2aa7536

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -14

app.py CHANGED Viewed

@@ -157,7 +157,7 @@ def make_tied_positions_for_homomers(pdb_dict_list):
     return my_dict
-def align_structures(pdb1, pdb2, lenRes, index):
     """Take two structure and superimpose pdb1 on pdb2"""
     import Bio.PDB
     import subprocess
@@ -173,13 +173,13 @@ def align_structures(pdb1, pdb2, lenRes, index):
     io = Bio.PDB.PDBIO()
     io.set_structure(ref_structure)
-    io.save(f"outputs/reference.pdb")
     io.set_structure(sample_structure)
-    io.save(f"outputs/out_{index}_aligned.pdb")
     # Doing this to get around biopython CEALIGN bug
     # subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
-    return aligner.rms, "outputs/reference.pdb", f"outputs/out_{index}_aligned.pdb"
 def save_pdb(outs, filename, LEN):
@@ -198,7 +198,7 @@ def save_pdb(outs, filename, LEN):
 @ray.remote(num_gpus=1, max_calls=1)
-def run_alphafold(sequences, num_recycles):
     recycles = int(num_recycles)
     RUNNER, OPT = setup_af(sequences[0])
     plddts = []
@@ -232,7 +232,8 @@ def run_alphafold(sequences, num_recycles):
                 outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
             )
         else:
-            save_pdb(outs, f"/home/user/app/outputs/out_{i}.pdb", LEN)
     return plddts, paes, LEN
@@ -320,8 +321,10 @@ def preprocess_mol(pdb_code="", filepath=""):
         os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
         print(os.getcwd())
         print(os.listdir())
         mol = Molecule(f"{pdb_code}.pdb")
-    mol.write("original.pdb")
     # clean messy files and only include protein itself
     mol.filter("protein")
     # renumber using moleculekit 0...len(protein)
@@ -334,8 +337,10 @@ def preprocess_mol(pdb_code="", filepath=""):
             indexes.append(j)
             j += 1
     df["proteinMPNN_index"] = indexes
-    mol.write("cleaned.pdb")
-    return "cleaned.pdb", df
 def assign_sasa(mol):
@@ -822,13 +827,14 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
             plt.figure(),
             plt.figure(),
         )
-    plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles))
     sequences = {}
     for i in range(lenSeqs):
         rms, input_pdb, aligned_pdb = align_structures(
-            pdb, f"outputs/out_{i}.pdb", num_res, i
         )
         sequences[i] = {
             "Seq": i,
@@ -896,6 +902,7 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
             selectedResidues,
             allSeqs,
             sequences,
         ),
         plotAF_plddt,
         pae_plots,
@@ -913,10 +920,10 @@ def read_mol(molpath):
 def molecule(
-    input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences
 ):
-    mol = read_mol("outputs/reference.pdb")
     options = ""
     pred_mol = "["
     seqdata = "{"
@@ -937,7 +944,7 @@ def molecule(
             + '"}'
         )
         options += f'<option {selected} value="{i}">sequence {i} </option>'  # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
-        p = f"outputs/out_{i}_aligned.pdb"
         pred_mol += f"`{read_mol(p)}`"
         selected = ""
         if i != lenSeqs - 1:

     return my_dict
+def align_structures(pdb1, pdb2, lenRes, index, random_dir):
     """Take two structure and superimpose pdb1 on pdb2"""
     import Bio.PDB
     import subprocess
     io = Bio.PDB.PDBIO()
     io.set_structure(ref_structure)
+    io.save(f"{random_dir}/outputs/reference.pdb")
     io.set_structure(sample_structure)
+    io.save(f"{random_dir}/outputs/out_{index}_aligned.pdb")
     # Doing this to get around biopython CEALIGN bug
     # subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
+    return aligner.rms, f"{random_dir}/outputs/reference.pdb", f"{random_dir}/outputs/out_{index}_aligned.pdb"
 def save_pdb(outs, filename, LEN):
 @ray.remote(num_gpus=1, max_calls=1)
+def run_alphafold(sequences, num_recycles, random_dir):
     recycles = int(num_recycles)
     RUNNER, OPT = setup_af(sequences[0])
     plddts = []
                 outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
             )
         else:
+            print(f"saving to {random_dir.name}")
+            save_pdb(outs, f"{random_dir.name}/outputs/out_{i}.pdb", LEN)
     return plddts, paes, LEN
         os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
         print(os.getcwd())
         print(os.listdir())
+        print(os.system(f"cat {pdb_code}.pdb"))
         mol = Molecule(f"{pdb_code}.pdb")
+    tf_original = tempfile.NamedTemporaryFile(delete=False)
+    mol.write(tf_original.name)
     # clean messy files and only include protein itself
     mol.filter("protein")
     # renumber using moleculekit 0...len(protein)
             indexes.append(j)
             j += 1
     df["proteinMPNN_index"] = indexes
+    tf_cleaned = tempfile.NamedTemporaryFile(delete=False)
+    mol.write(tf_cleaned.name)
+    return tf_cleaned.name, df
 def assign_sasa(mol):
             plt.figure(),
             plt.figure(),
         )
+    random_dir = tempfile.NamedTemporaryDir(delete=False)
+    plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles, random_dir ))
     sequences = {}
     for i in range(lenSeqs):
         rms, input_pdb, aligned_pdb = align_structures(
+            pdb, f"{random_dir}/outputs/out_{i}.pdb", num_res, i, random_dir.name
         )
         sequences[i] = {
             "Seq": i,
             selectedResidues,
             allSeqs,
             sequences,
+            random_dir.name
         ),
         plotAF_plddt,
         pae_plots,
 def molecule(
+    input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences, random_dir
 ):
+    mol = read_mol(f"{random_dir}/outputs/reference.pdb")
     options = ""
     pred_mol = "["
     seqdata = "{"
             + '"}'
         )
         options += f'<option {selected} value="{i}">sequence {i} </option>'  # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
+        p = f"{random_dir}/outputs/out_{i}_aligned.pdb"
         pred_mol += f"`{read_mol(p)}`"
         selected = ""
         if i != lenSeqs - 1: