Commit
·
3055c36
1
Parent(s):
2aa7536
Update app.py
Browse files
app.py
CHANGED
|
@@ -157,7 +157,7 @@ def make_tied_positions_for_homomers(pdb_dict_list):
|
|
| 157 |
return my_dict
|
| 158 |
|
| 159 |
|
| 160 |
-
def align_structures(pdb1, pdb2, lenRes, index):
|
| 161 |
"""Take two structure and superimpose pdb1 on pdb2"""
|
| 162 |
import Bio.PDB
|
| 163 |
import subprocess
|
|
@@ -173,13 +173,13 @@ def align_structures(pdb1, pdb2, lenRes, index):
|
|
| 173 |
|
| 174 |
io = Bio.PDB.PDBIO()
|
| 175 |
io.set_structure(ref_structure)
|
| 176 |
-
io.save(f"outputs/reference.pdb")
|
| 177 |
io.set_structure(sample_structure)
|
| 178 |
-
io.save(f"outputs/out_{index}_aligned.pdb")
|
| 179 |
# Doing this to get around biopython CEALIGN bug
|
| 180 |
# subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
|
| 181 |
|
| 182 |
-
return aligner.rms, "outputs/reference.pdb", f"outputs/out_{index}_aligned.pdb"
|
| 183 |
|
| 184 |
|
| 185 |
def save_pdb(outs, filename, LEN):
|
|
@@ -198,7 +198,7 @@ def save_pdb(outs, filename, LEN):
|
|
| 198 |
|
| 199 |
|
| 200 |
@ray.remote(num_gpus=1, max_calls=1)
|
| 201 |
-
def run_alphafold(sequences, num_recycles):
|
| 202 |
recycles = int(num_recycles)
|
| 203 |
RUNNER, OPT = setup_af(sequences[0])
|
| 204 |
plddts = []
|
|
@@ -232,7 +232,8 @@ def run_alphafold(sequences, num_recycles):
|
|
| 232 |
outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
|
| 233 |
)
|
| 234 |
else:
|
| 235 |
-
|
|
|
|
| 236 |
return plddts, paes, LEN
|
| 237 |
|
| 238 |
|
|
@@ -320,8 +321,10 @@ def preprocess_mol(pdb_code="", filepath=""):
|
|
| 320 |
os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
|
| 321 |
print(os.getcwd())
|
| 322 |
print(os.listdir())
|
|
|
|
| 323 |
mol = Molecule(f"{pdb_code}.pdb")
|
| 324 |
-
|
|
|
|
| 325 |
# clean messy files and only include protein itself
|
| 326 |
mol.filter("protein")
|
| 327 |
# renumber using moleculekit 0...len(protein)
|
|
@@ -334,8 +337,10 @@ def preprocess_mol(pdb_code="", filepath=""):
|
|
| 334 |
indexes.append(j)
|
| 335 |
j += 1
|
| 336 |
df["proteinMPNN_index"] = indexes
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
| 339 |
|
| 340 |
|
| 341 |
def assign_sasa(mol):
|
|
@@ -822,13 +827,14 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
|
|
| 822 |
plt.figure(),
|
| 823 |
plt.figure(),
|
| 824 |
)
|
|
|
|
| 825 |
|
| 826 |
-
plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles))
|
| 827 |
|
| 828 |
sequences = {}
|
| 829 |
for i in range(lenSeqs):
|
| 830 |
rms, input_pdb, aligned_pdb = align_structures(
|
| 831 |
-
pdb, f"outputs/out_{i}.pdb", num_res, i
|
| 832 |
)
|
| 833 |
sequences[i] = {
|
| 834 |
"Seq": i,
|
|
@@ -896,6 +902,7 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
|
|
| 896 |
selectedResidues,
|
| 897 |
allSeqs,
|
| 898 |
sequences,
|
|
|
|
| 899 |
),
|
| 900 |
plotAF_plddt,
|
| 901 |
pae_plots,
|
|
@@ -913,10 +920,10 @@ def read_mol(molpath):
|
|
| 913 |
|
| 914 |
|
| 915 |
def molecule(
|
| 916 |
-
input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences
|
| 917 |
):
|
| 918 |
|
| 919 |
-
mol = read_mol("outputs/reference.pdb")
|
| 920 |
options = ""
|
| 921 |
pred_mol = "["
|
| 922 |
seqdata = "{"
|
|
@@ -937,7 +944,7 @@ def molecule(
|
|
| 937 |
+ '"}'
|
| 938 |
)
|
| 939 |
options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
|
| 940 |
-
p = f"outputs/out_{i}_aligned.pdb"
|
| 941 |
pred_mol += f"`{read_mol(p)}`"
|
| 942 |
selected = ""
|
| 943 |
if i != lenSeqs - 1:
|
|
|
|
| 157 |
return my_dict
|
| 158 |
|
| 159 |
|
| 160 |
+
def align_structures(pdb1, pdb2, lenRes, index, random_dir):
|
| 161 |
"""Take two structure and superimpose pdb1 on pdb2"""
|
| 162 |
import Bio.PDB
|
| 163 |
import subprocess
|
|
|
|
| 173 |
|
| 174 |
io = Bio.PDB.PDBIO()
|
| 175 |
io.set_structure(ref_structure)
|
| 176 |
+
io.save(f"{random_dir}/outputs/reference.pdb")
|
| 177 |
io.set_structure(sample_structure)
|
| 178 |
+
io.save(f"{random_dir}/outputs/out_{index}_aligned.pdb")
|
| 179 |
# Doing this to get around biopython CEALIGN bug
|
| 180 |
# subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
|
| 181 |
|
| 182 |
+
return aligner.rms, f"{random_dir}/outputs/reference.pdb", f"{random_dir}/outputs/out_{index}_aligned.pdb"
|
| 183 |
|
| 184 |
|
| 185 |
def save_pdb(outs, filename, LEN):
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
@ray.remote(num_gpus=1, max_calls=1)
|
| 201 |
+
def run_alphafold(sequences, num_recycles, random_dir):
|
| 202 |
recycles = int(num_recycles)
|
| 203 |
RUNNER, OPT = setup_af(sequences[0])
|
| 204 |
plddts = []
|
|
|
|
| 232 |
outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
|
| 233 |
)
|
| 234 |
else:
|
| 235 |
+
print(f"saving to {random_dir.name}")
|
| 236 |
+
save_pdb(outs, f"{random_dir.name}/outputs/out_{i}.pdb", LEN)
|
| 237 |
return plddts, paes, LEN
|
| 238 |
|
| 239 |
|
|
|
|
| 321 |
os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
|
| 322 |
print(os.getcwd())
|
| 323 |
print(os.listdir())
|
| 324 |
+
print(os.system(f"cat {pdb_code}.pdb"))
|
| 325 |
mol = Molecule(f"{pdb_code}.pdb")
|
| 326 |
+
tf_original = tempfile.NamedTemporaryFile(delete=False)
|
| 327 |
+
mol.write(tf_original.name)
|
| 328 |
# clean messy files and only include protein itself
|
| 329 |
mol.filter("protein")
|
| 330 |
# renumber using moleculekit 0...len(protein)
|
|
|
|
| 337 |
indexes.append(j)
|
| 338 |
j += 1
|
| 339 |
df["proteinMPNN_index"] = indexes
|
| 340 |
+
tf_cleaned = tempfile.NamedTemporaryFile(delete=False)
|
| 341 |
+
|
| 342 |
+
mol.write(tf_cleaned.name)
|
| 343 |
+
return tf_cleaned.name, df
|
| 344 |
|
| 345 |
|
| 346 |
def assign_sasa(mol):
|
|
|
|
| 827 |
plt.figure(),
|
| 828 |
plt.figure(),
|
| 829 |
)
|
| 830 |
+
random_dir = tempfile.NamedTemporaryDir(delete=False)
|
| 831 |
|
| 832 |
+
plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles, random_dir ))
|
| 833 |
|
| 834 |
sequences = {}
|
| 835 |
for i in range(lenSeqs):
|
| 836 |
rms, input_pdb, aligned_pdb = align_structures(
|
| 837 |
+
pdb, f"{random_dir}/outputs/out_{i}.pdb", num_res, i, random_dir.name
|
| 838 |
)
|
| 839 |
sequences[i] = {
|
| 840 |
"Seq": i,
|
|
|
|
| 902 |
selectedResidues,
|
| 903 |
allSeqs,
|
| 904 |
sequences,
|
| 905 |
+
random_dir.name
|
| 906 |
),
|
| 907 |
plotAF_plddt,
|
| 908 |
pae_plots,
|
|
|
|
| 920 |
|
| 921 |
|
| 922 |
def molecule(
|
| 923 |
+
input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences, random_dir
|
| 924 |
):
|
| 925 |
|
| 926 |
+
mol = read_mol(f"{random_dir}/outputs/reference.pdb")
|
| 927 |
options = ""
|
| 928 |
pred_mol = "["
|
| 929 |
seqdata = "{"
|
|
|
|
| 944 |
+ '"}'
|
| 945 |
)
|
| 946 |
options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
|
| 947 |
+
p = f"{random_dir}/outputs/out_{i}_aligned.pdb"
|
| 948 |
pred_mol += f"`{read_mol(p)}`"
|
| 949 |
selected = ""
|
| 950 |
if i != lenSeqs - 1:
|