Upload 3 files
Browse files
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,73 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import streamlit as st
         | 
| 2 | 
            +
            import pandas as pd
         | 
| 3 | 
            +
            import rdkit
         | 
| 4 | 
            +
            import streamlit_ketcher
         | 
| 5 | 
            +
            from streamlit_ketcher import st_ketcher
         | 
| 6 | 
            +
            import run
         | 
| 7 | 
            +
            import screen
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Page setup
         | 
| 10 | 
            +
            st.set_page_config(page_title="DeepDAP", page_icon="🔋", layout="wide")
         | 
| 11 | 
            +
            st.title("🔋DeepDAP")
         | 
| 12 | 
            +
            st.subheader('',divider='rainbow')
         | 
| 13 | 
            +
            # Connect to the Google Sheet
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            url1= r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0/gviz/tq?tqx=out:csv&sheet=dap"
         | 
| 16 | 
            +
            df1 = pd.read_csv(url1, dtype=str, encoding='utf-8')
         | 
| 17 | 
            +
            col1, col2 = st.columns(2)
         | 
| 18 | 
            +
            with col1:
         | 
| 19 | 
            +
            	st.header("🔍**Search papers or molecules**")
         | 
| 20 | 
            +
            	text_search = st.text_input(label="_", value="",label_visibility="hidden" )
         | 
| 21 | 
            +
            	m1 = df1["Donor_Name"].str.contains(text_search)
         | 
| 22 | 
            +
            	m2 = df1["reference"].str.contains(text_search)
         | 
| 23 | 
            +
            	m3 = df1["Acceptor_Name"].str.contains(text_search)
         | 
| 24 | 
            +
            	df_search = df1[m1 | m2|m3]
         | 
| 25 | 
            +
            with col2:
         | 
| 26 | 
            +
            	st.link_button(":black[📝**DATABASE**]",  r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0")
         | 
| 27 | 
            +
            	st.caption(':black[👆If you want to update the origin database, click the button.]')
         | 
| 28 | 
            +
            if text_search:
         | 
| 29 | 
            +
                st.write(df_search)
         | 
| 30 | 
            +
                st.download_button( "⬇️Download edited files as .csv", df_search.to_csv(), "df_search.csv", use_container_width=True)
         | 
| 31 | 
            +
            edited_df = st.data_editor(df1, num_rows="dynamic")
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            st.download_button(
         | 
| 34 | 
            +
                "⬇️ Download edited files as .csv", edited_df.to_csv(), "edited_df.csv", use_container_width=True
         | 
| 35 | 
            +
            )
         | 
| 36 | 
            +
            st.subheader("👇 :red[***Select the type of active layer...***]")
         | 
| 37 | 
            +
            option = st.radio(
         | 
| 38 | 
            +
             "👇 :red[**Select the type of active layer...**]",
         | 
| 39 | 
            +
               [":black[**Donor**]", ":black[**Acceptor**]"], label_visibility="hidden"
         | 
| 40 | 
            +
            )
         | 
| 41 | 
            +
            if option ==":black[**Acceptor**]":
         | 
| 42 | 
            +
            	st.subheader("👨🔬**Input the SMILES of Acceptor Molecule**")
         | 
| 43 | 
            +
            	molecule = st.text_input("👨🔬**Input the SMILES of Acceptor Molecule**", label_visibility="hidden" )
         | 
| 44 | 
            +
            	acceptor= st_ketcher(molecule )
         | 
| 45 | 
            +
            	st.subheader(f"🏆**New SMILES of edited acceptor molecules**: {acceptor}")
         | 
| 46 | 
            +
            	st.subheader(":black[**🧡Input the SMILES of Donor Molecule**]")
         | 
| 47 | 
            +
            	donor= st.text_input(":black[**🧡Input the SMILES of Donor Molecule**]", label_visibility="hidden")
         | 
| 48 | 
            +
            if option ==":black[**Donor**]":
         | 
| 49 | 
            +
            	st.subheader("👨🔬**Input the SMILES of Donor Molecule**" )
         | 
| 50 | 
            +
            	do= st.text_input("👨🔬**Input the SMILES of Donor Molecule**" , label_visibility="hidden")
         | 
| 51 | 
            +
            	donor = st_ketcher(do)
         | 
| 52 | 
            +
            	st.subheader(f"🏆**New SMILES of edited donor molecules**: {donor}")
         | 
| 53 | 
            +
            	st.subheader(":black[**🧡Input the SMILES of Acceptor Molecule**]")
         | 
| 54 | 
            +
            	acceptor = st.text_input(":black[**🧡Input the SMILES of Acceptor Molecule**]", label_visibility="hidden")
         | 
| 55 | 
            +
            try:
         | 
| 56 | 
            +
             
         | 
| 57 | 
            +
                pce = run.smiles_aas_test( str(acceptor ), str(donor) )
         | 
| 58 | 
            +
                st.subheader(f"⚡**PCE**: ``{pce}``")
         | 
| 59 | 
            +
            except:
         | 
| 60 | 
            +
                st.subheader(f"⚡**PCE**:  None  ")
         | 
| 61 | 
            +
            st.subheader(":black[**🧡Batch screening for high-performance D/A pairs**]")
         | 
| 62 | 
            +
            uploaded_files = st.file_uploader("Choose a CSV file")
         | 
| 63 | 
            +
            st.write( "🎈upload a csv file containing ['donor' ] and ['acceptor']")
         | 
| 64 | 
            +
            if st.button("📑PREDICT"):
         | 
| 65 | 
            +
            	if uploaded_files is not None:
         | 
| 66 | 
            +
            		text = st.markdown(":red[Predictions are being made... Please wait...]")
         | 
| 67 | 
            +
            		st.progress(100, text=None)
         | 
| 68 | 
            +
            		x = screen.smiles_aas_test(uploaded_files )	
         | 
| 69 | 
            +
            		x = pd.DataFrame(x)
         | 
| 70 | 
            +
            		
         | 
| 71 | 
            +
            		st.download_button( "⬇️Download  the predicted files as .csv", x.to_csv(), "predict results.csv", use_container_width=True)
         | 
| 72 | 
            +
            	else:
         | 
| 73 | 
            +
            		st.markdown(":red[Please upload the file first!]")
         | 
    	
        run.py
    ADDED
    
    | @@ -0,0 +1,124 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import pandas as pd
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
            from torch.nn import functional as F
         | 
| 6 | 
            +
            from transformers import AutoTokenizer
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from util.utils import *
         | 
| 9 | 
            +
            from rdkit import Chem
         | 
| 10 | 
            +
            from tqdm import tqdm
         | 
| 11 | 
            +
            from train import markerModel
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
         | 
| 14 | 
            +
            os.environ["CUDA_VISIBLE_DEVICES"] = '0 '
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            device_count = torch.cuda.device_count()
         | 
| 17 | 
            +
            device_biomarker = torch.device('cuda' if torch.cuda.is_available() else "cpu")
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            device = torch.device('cpu')
         | 
| 20 | 
            +
            a_model_name = 'DeepChem/ChemBERTa-10M-MLM'
         | 
| 21 | 
            +
            d_model_name = 'DeepChem/ChemBERTa-10M-MTR'
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            tokenizer = AutoTokenizer.from_pretrained(a_model_name)
         | 
| 24 | 
            +
            d_tokenizer = AutoTokenizer.from_pretrained(d_model_name)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            #--biomarker Model
         | 
| 27 | 
            +
            ##-- hyper param config file Load --##
         | 
| 28 | 
            +
            config = load_hparams('config/predict.json')
         | 
| 29 | 
            +
            config = DictX(config)
         | 
| 30 | 
            +
            model = markerModel(config.d_model_name, config.p_model_name,
         | 
| 31 | 
            +
                                          config.lr, config.dropout, config.layer_features, config.loss_fn, config.layer_limit, config.pretrained['chem'], config.pretrained['prot'])
         | 
| 32 | 
            +
             
         | 
| 33 | 
            +
            model = markerModel.load_from_checkpoint(config.load_checkpoint,strict=False)
         | 
| 34 | 
            +
            model.eval()
         | 
| 35 | 
            +
            model.freeze()
         | 
| 36 | 
            +
                
         | 
| 37 | 
            +
            if device_biomarker.type == 'cuda':
         | 
| 38 | 
            +
                model = torch.nn.DataParallel(model)
         | 
| 39 | 
            +
                
         | 
| 40 | 
            +
            def get_marker(drug_inputs, prot_inputs):
         | 
| 41 | 
            +
                output_preds = model(drug_inputs, prot_inputs)
         | 
| 42 | 
            +
               
         | 
| 43 | 
            +
                predict = torch.squeeze( (output_preds)).tolist()
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                # output_preds = torch.relu(output_preds)
         | 
| 46 | 
            +
                # predict = torch.tanh(output_preds)
         | 
| 47 | 
            +
                # predict = predict.squeeze(dim=1).tolist()
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                return predict
         | 
| 50 | 
            +
             | 
| 51 | 
            +
             | 
| 52 | 
            +
            def marker_prediction(smiles, aas):
         | 
| 53 | 
            +
                try:
         | 
| 54 | 
            +
                    aas_input = []
         | 
| 55 | 
            +
                    for ass_data in aas:
         | 
| 56 | 
            +
                        aas_input.append(' '.join(list(ass_data)))
         | 
| 57 | 
            +
                
         | 
| 58 | 
            +
                    a_inputs = tokenizer(smiles, padding='max_length', max_length=510, truncation=True, return_tensors="pt")
         | 
| 59 | 
            +
                    # d_inputs = tokenizer(smiles, truncation=True, return_tensors="pt")
         | 
| 60 | 
            +
                    a_input_ids = a_inputs['input_ids'].to(device)
         | 
| 61 | 
            +
                    a_attention_mask = a_inputs['attention_mask'].to(device)
         | 
| 62 | 
            +
                    a_inputs = {'input_ids': a_input_ids, 'attention_mask': a_attention_mask}
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    d_inputs = d_tokenizer(aas_input, padding='max_length', max_length=510, truncation=True, return_tensors="pt")
         | 
| 65 | 
            +
                    # p_inputs = prot_tokenizer(aas_input, truncation=True, return_tensors="pt")
         | 
| 66 | 
            +
                    d_input_ids = d_inputs['input_ids'].to(device)
         | 
| 67 | 
            +
                    d_attention_mask = d_inputs['attention_mask'].to(device)
         | 
| 68 | 
            +
                    d_inputs = {'input_ids': d_input_ids, 'attention_mask': d_attention_mask}
         | 
| 69 | 
            +
             
         | 
| 70 | 
            +
                    output_list = get_marker(a_inputs, d_inputs)
         | 
| 71 | 
            +
                    
         | 
| 72 | 
            +
                  
         | 
| 73 | 
            +
                    return output_list
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                except Exception as e:
         | 
| 76 | 
            +
                    print(e)
         | 
| 77 | 
            +
                    return {'Error_message': e}
         | 
| 78 | 
            +
             | 
| 79 | 
            +
             | 
| 80 | 
            +
            def smiles_aas_test(smile_acc,smile_don):
         | 
| 81 | 
            +
                
         | 
| 82 | 
            +
                mola =  Chem.MolFromSmiles(smile_acc) 
         | 
| 83 | 
            +
                smile_acc = Chem.MolToSmiles(mola,   canonical=True)
         | 
| 84 | 
            +
                mold =  Chem.MolFromSmiles(smile_don)  
         | 
| 85 | 
            +
                smile_don = Chem.MolToSmiles(mold, canonical=True)
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                batch_size = 1
         | 
| 88 | 
            +
             
         | 
| 89 | 
            +
                datas = []
         | 
| 90 | 
            +
                marker_list = []
         | 
| 91 | 
            +
                marker_datas = []
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                
         | 
| 94 | 
            +
                 
         | 
| 95 | 
            +
                marker_datas.append([smile_acc,smile_don])
         | 
| 96 | 
            +
                if len(marker_datas) == batch_size:
         | 
| 97 | 
            +
                        marker_list.append(list(marker_datas))
         | 
| 98 | 
            +
                        marker_datas.clear()
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                if len(marker_datas) != 0:
         | 
| 101 | 
            +
                    marker_list.append(list(marker_datas))
         | 
| 102 | 
            +
                    marker_datas.clear()
         | 
| 103 | 
            +
                    
         | 
| 104 | 
            +
                for marker_datas in tqdm(marker_list, total=len(marker_list)):
         | 
| 105 | 
            +
                    smiles_d , smiles_a  = zip(*marker_datas)
         | 
| 106 | 
            +
                    output_pred = marker_prediction(list(smiles_d), list(smiles_a) )
         | 
| 107 | 
            +
                    if len(datas) == 0:
         | 
| 108 | 
            +
                        datas = output_pred
         | 
| 109 | 
            +
                    else:
         | 
| 110 | 
            +
                        datas = datas + output_pred
         | 
| 111 | 
            +
                
         | 
| 112 | 
            +
                # ## -- Export result data to csv -- ##
         | 
| 113 | 
            +
                # df = pd.DataFrame(datas)
         | 
| 114 | 
            +
                # df.to_csv('./results/predictData_nontonon_bindingdb_test.csv', index=None)
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                # print(df)
         | 
| 117 | 
            +
                return datas
         | 
| 118 | 
            +
                    
         | 
| 119 | 
            +
             
         | 
| 120 | 
            +
             | 
| 121 | 
            +
            if __name__ == '__main__':
         | 
| 122 | 
            +
                
         | 
| 123 | 
            +
                a = smiles_aas_test('CCCCCCCCCCCC1=C(/C=C2\C(=O)C3=C(C=C(F)C(F)=C3)C2=C(C#N)C#N)SC2=C1SC1=C2N(CC(CC)CCCC)C2=C1C1=NSN=C1C1=C2N(CC(CC)CCCC)C2=C1SC1=C2SC(/C=C2\C(=O)C3=C(C=C(F)C(F)=C3)C2=C(C#N)C#N)=C1CCCCCCCCCCC','CCCCCCC(CCCC)CC1=C(C)SC(C2=CC3=C(S2)C2=C(C=C(C4=CC(CC(CCCC)CCCCCC)=C(C5=CC6=C(C7=CC=C(CC(CC)CCCC)S7)C7=C(C=C(C)S7)C(C7=CC=C(CC(CC)CCCC)S7)=C6S5)S4)S2)C2=NSN=C23)=C1')                         
         | 
| 124 | 
            +
                
         | 
    	
        screen.py
    ADDED
    
    | @@ -0,0 +1,120 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import pandas as pd
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
            from torch.nn import functional as F
         | 
| 6 | 
            +
            from transformers import AutoTokenizer
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from util.utils import *
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            from tqdm import tqdm
         | 
| 11 | 
            +
            from train import markerModel
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
         | 
| 14 | 
            +
            os.environ["CUDA_VISIBLE_DEVICES"] = '0 '
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            device_count = torch.cuda.device_count()
         | 
| 17 | 
            +
            device_biomarker = torch.device('cuda' if torch.cuda.is_available() else "cpu")
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            device = torch.device('cpu')
         | 
| 20 | 
            +
            a_model_name = 'DeepChem/ChemBERTa-10M-MLM'
         | 
| 21 | 
            +
            d_model_name = 'DeepChem/ChemBERTa-10M-MTR'
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            tokenizer = AutoTokenizer.from_pretrained(a_model_name)
         | 
| 24 | 
            +
            d_tokenizer = AutoTokenizer.from_pretrained(d_model_name)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            #--biomarker Model
         | 
| 27 | 
            +
            ##-- hyper param config file Load --##
         | 
| 28 | 
            +
            config = load_hparams('config/predict.json')
         | 
| 29 | 
            +
            config = DictX(config)
         | 
| 30 | 
            +
            model = markerModel(config.d_model_name, config.p_model_name,
         | 
| 31 | 
            +
                                          config.lr, config.dropout, config.layer_features, config.loss_fn, config.layer_limit, config.pretrained['chem'], config.pretrained['prot'])
         | 
| 32 | 
            +
             
         | 
| 33 | 
            +
            model = markerModel.load_from_checkpoint(config.load_checkpoint,strict=False)
         | 
| 34 | 
            +
            model.eval()
         | 
| 35 | 
            +
            model.freeze()
         | 
| 36 | 
            +
                
         | 
| 37 | 
            +
            if device_biomarker.type == 'cuda':
         | 
| 38 | 
            +
                model = torch.nn.DataParallel(model)
         | 
| 39 | 
            +
                
         | 
| 40 | 
            +
            def get_marker(drug_inputs, prot_inputs):
         | 
| 41 | 
            +
                output_preds = model(drug_inputs, prot_inputs)
         | 
| 42 | 
            +
               
         | 
| 43 | 
            +
                predict = torch.squeeze( (output_preds)).tolist()
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                # output_preds = torch.relu(output_preds)
         | 
| 46 | 
            +
                # predict = torch.tanh(output_preds)
         | 
| 47 | 
            +
                # predict = predict.squeeze(dim=1).tolist()
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                return predict
         | 
| 50 | 
            +
             | 
| 51 | 
            +
             | 
| 52 | 
            +
            def marker_prediction(smiles, aas):
         | 
| 53 | 
            +
                try:
         | 
| 54 | 
            +
                    aas_input = []
         | 
| 55 | 
            +
                    for ass_data in aas:
         | 
| 56 | 
            +
                        aas_input.append(' '.join(list(ass_data)))
         | 
| 57 | 
            +
                
         | 
| 58 | 
            +
                    a_inputs = tokenizer(smiles, padding='max_length', max_length=510, truncation=True, return_tensors="pt")
         | 
| 59 | 
            +
                    # d_inputs = tokenizer(smiles, truncation=True, return_tensors="pt")
         | 
| 60 | 
            +
                    a_input_ids = a_inputs['input_ids'].to(device)
         | 
| 61 | 
            +
                    a_attention_mask = a_inputs['attention_mask'].to(device)
         | 
| 62 | 
            +
                    a_inputs = {'input_ids': a_input_ids, 'attention_mask': a_attention_mask}
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    d_inputs = d_tokenizer(aas_input, padding='max_length', max_length=510, truncation=True, return_tensors="pt")
         | 
| 65 | 
            +
                    # p_inputs = prot_tokenizer(aas_input, truncation=True, return_tensors="pt")
         | 
| 66 | 
            +
                    d_input_ids = d_inputs['input_ids'].to(device)
         | 
| 67 | 
            +
                    d_attention_mask = d_inputs['attention_mask'].to(device)
         | 
| 68 | 
            +
                    d_inputs = {'input_ids': d_input_ids, 'attention_mask': d_attention_mask}
         | 
| 69 | 
            +
             
         | 
| 70 | 
            +
                    output_predict = get_marker(a_inputs, d_inputs)
         | 
| 71 | 
            +
                    
         | 
| 72 | 
            +
                    output_list = [{'acceptor': smiles[i], 'donor': aas[i], 'predict': output_predict[i]} for i in range(0,len(aas))]
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                    return output_list
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                except Exception as e:
         | 
| 77 | 
            +
                    print(e)
         | 
| 78 | 
            +
                    return {'Error_message': e}
         | 
| 79 | 
            +
             | 
| 80 | 
            +
             | 
| 81 | 
            +
            def smiles_aas_test(file):
         | 
| 82 | 
            +
                 
         | 
| 83 | 
            +
                batch_size = 80
         | 
| 84 | 
            +
                try:
         | 
| 85 | 
            +
                    datas = []
         | 
| 86 | 
            +
                    marker_list = []
         | 
| 87 | 
            +
                    marker_datas = []
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    smiles_aas = pd.read_csv(file)
         | 
| 90 | 
            +
                    
         | 
| 91 | 
            +
                    ## -- 1 to 1 pair predict check -- ##
         | 
| 92 | 
            +
                    for data in smiles_aas.values:
         | 
| 93 | 
            +
                        mola =  Chem.MolFromSmiles(data[2]) 
         | 
| 94 | 
            +
                        data[2] = Chem.MolToSmiles(mola,   canonical=True)
         | 
| 95 | 
            +
                        mola =  Chem.MolFromSmiles(data[1]) 
         | 
| 96 | 
            +
                        data[1] = Chem.MolToSmiles(mola,   canonical=True)                        
         | 
| 97 | 
            +
                        marker_datas.append([data[2], data[1]])
         | 
| 98 | 
            +
                        if len(marker_datas) == batch_size:
         | 
| 99 | 
            +
                            marker_list.append(list(marker_datas))
         | 
| 100 | 
            +
                            marker_datas.clear()
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                    if len(marker_datas) != 0:
         | 
| 103 | 
            +
                        marker_list.append(list(marker_datas))
         | 
| 104 | 
            +
                        marker_datas.clear()
         | 
| 105 | 
            +
                        
         | 
| 106 | 
            +
                    for marker_datas in tqdm(marker_list, total=len(marker_list)):
         | 
| 107 | 
            +
                        smiles_d , smiles_a  = zip(*marker_datas)
         | 
| 108 | 
            +
                        output_pred = marker_prediction(list(smiles_d), list(smiles_a) )
         | 
| 109 | 
            +
                        if len(datas) == 0:
         | 
| 110 | 
            +
                            datas = output_pred
         | 
| 111 | 
            +
                        else:
         | 
| 112 | 
            +
                            datas = datas + output_pred
         | 
| 113 | 
            +
                    datas = pd.DataFrame(datas)
         | 
| 114 | 
            +
             
         | 
| 115 | 
            +
                    return datas
         | 
| 116 | 
            +
                    
         | 
| 117 | 
            +
                except Exception as e:
         | 
| 118 | 
            +
                    print(e)
         | 
| 119 | 
            +
                    return {'Error_message': e}
         | 
| 120 | 
            +
             |