| import numpy as np | |
| import pandas as pd | |
| if __name__ == '__main__': | |
| smiles = pd.read_csv("../dataset/external_smiles.csv") | |
| ass = pd.read_csv("../dataset/external_aas.csv") | |
| smiles_data = list(np.array(smiles['smiles'])) | |
| smiles_label = list(np.array(smiles['label'].tolist())) | |
| smiles_label = [x.split() for x in smiles_label] | |
| ass_data = list(np.array(ass['aas'])) | |
| cyp_type = list(np.array(ass['CYP_type'])) | |
| external_dataset = [] | |
| for smiles_idx in range(0, len(smiles_data)): | |
| for ass_idx in range(0, len(ass_data)): | |
| external_data = [smiles_data[smiles_idx], ass_data[ass_idx], cyp_type[ass_idx]] | |
| external_dataset.append(external_data) | |
| df = pd.DataFrame(external_dataset, columns=['smiles', 'aas', 'CYP_type']) | |
| df.to_csv('../dataset/external_dataset.csv', index=False) | |
| print(smiles['smiles'][0]) | |
| print(ass['CYP_type'][0]) |