| from tdc.multi_pred import DTI | |
| import pandas as pd | |
| import numpy as np | |
| if __name__ == '__main__': | |
| bindingDB_data = DTI(name = 'BindingDB_Kd') | |
| davis_data = DTI(name = 'DAVIS') | |
| bindingDB_data.harmonize_affinities(mode = 'max_affinity') | |
| bindingDB_data.convert_to_log(form = 'binding') | |
| davis_data.convert_to_log(form = 'binding') | |
| split_bindingDB = bindingDB_data.get_split() | |
| split_davis = davis_data.get_split() | |
| dataset_list = ["train", "valid", "test"] | |
| for dataset_type in dataset_list: | |
| df_bindingDB = pd.DataFrame(split_bindingDB[dataset_type]) | |
| df_davis = pd.DataFrame(split_davis[dataset_type]) | |
| df_bindingDB.to_csv(f"../dataset_kd/bindingDB_{dataset_type}.csv", index=False) | |
| df_davis.to_csv(f"../dataset_kd/davis_{dataset_type}.csv", index=False) | |
| Y_bindingDB = np.array(df_bindingDB.Y) | |
| Y_davis = np.array(df_davis.Y) | |
| Y_davis_log = [np.log10(Y_davis)] | |