| import os |
|
|
| import molbloom |
| import pandas as pd |
| import requests |
| from langchain.tools import BaseTool |
|
|
| from utils import is_smiles |
|
|
|
|
| class ChemSpace: |
| def __init__(self, chemspace_api_key=None): |
| self.chemspace_api_key = chemspace_api_key |
| self._renew_token() |
|
|
| def _renew_token(self): |
| self.chemspace_token = requests.get( |
| url="https://api.chem-space.com/auth/token", |
| headers={ |
| "Accept": "application/json", |
| "Authorization": f"Bearer {self.chemspace_api_key}", |
| }, |
| ).json()["access_token"] |
| |
| def _make_api_request( |
| self, |
| query, |
| request_type, |
| count, |
| categories, |
| ): |
| """ |
| Make a generic request to chem-space API. |
| |
| Categories request. |
| CSCS: Custom Request: Could be useful for requesting whole synthesis |
| CSMB: Make-On-Demand Building Blocks |
| CSSB: In-Stock Building Blocks |
| CSSS: In-stock Screening Compounds |
| CSMS: Make-On-Demand Screening Compounds |
| """ |
|
|
| def _do_request(): |
| data = requests.request( |
| "POST", |
| url=f"https://api.chem-space.com/v3/search/{request_type}?count={count}&page=1&categories={categories}", |
| headers={ |
| "Accept": "application/json; version=3.1", |
| "Authorization": f"Bearer {self.chemspace_token}", |
| }, |
| data={"SMILES": f"{query}"}, |
| ).json() |
| return data |
|
|
| data = _do_request() |
|
|
| |
| if "message" in data.keys(): |
| if data["message"] == "Your request was made with invalid credentials.": |
| self._renew_token() |
|
|
| data = _do_request() |
| return data |
|
|
| def _convert_single(self, query, search_type: str): |
| """Do query for a single molecule""" |
| data = self._make_api_request(query, "exact", 1, "CSCS,CSMB,CSSB") |
| if data["count"] > 0: |
| return data["items"][0][search_type] |
| else: |
| return "No data was found for this compound." |
|
|
| def convert_mol_rep(self, query, search_type: str = "smiles"): |
| if ", " in query: |
| query_list = query.split(", ") |
| else: |
| query_list = [query] |
| smi = "" |
| try: |
| for q in query_list: |
| smi += f"{query}'s {search_type} is: {str(self._convert_single(q, search_type))}" |
| return smi |
| except Exception: |
| return "The input provided is wrong. Input either a single molecule, or multiple molecules separated by a ', '" |
|
|
| def buy_mol( |
| self, |
| smiles, |
| request_type="exact", |
| count=1, |
| ): |
| """ |
| Get data about purchasing compounds. |
| |
| smiles: smiles string of the molecule you want to buy |
| request_type: one of "exact", "sim" (search by similarity), "sub" (search by substructure). |
| count: retrieve data for this many substances max. |
| """ |
|
|
| def purchasable_check( |
| s, |
| ): |
| if not is_smiles(s): |
| try: |
| s = self.convert_mol_rep(s, "smiles") |
| except: |
| return "Invalid SMILES string." |
|
|
| """Checks if molecule is available for purchase (ZINC20)""" |
| try: |
| r = molbloom.buy(s, canonicalize=True) |
| except: |
| print("invalid smiles") |
| return False |
| if r: |
| return True |
| else: |
| return False |
|
|
| purchasable = purchasable_check(smiles) |
|
|
| if request_type == "exact": |
| categories = "CSMB,CSSB" |
| elif request_type in ["sim", "sub"]: |
| categories = "CSSS,CSMS" |
|
|
| data = self._make_api_request(smiles, request_type, count, categories) |
|
|
| try: |
| if data["count"] == 0: |
| if purchasable: |
| return "Compound is purchasable, but price is unknown." |
| else: |
| return "Compound is not purchasable." |
| except KeyError: |
| return "Invalid query, try something else. " |
|
|
| print(f"Obtaining data for {data['count']} substances.") |
|
|
| dfs = [] |
| |
| for item in data["items"]: |
| dfs_tmp = [] |
| smiles = item["smiles"] |
| offers = item["offers"] |
|
|
| for off in offers: |
| df_tmp = pd.DataFrame(off["prices"]) |
| df_tmp["vendorName"] = off["vendorName"] |
| df_tmp["time"] = off["shipsWithin"] |
| df_tmp["purity"] = off["purity"] |
|
|
| dfs_tmp.append(df_tmp) |
|
|
| df_this = pd.concat(dfs_tmp) |
| df_this["smiles"] = smiles |
| dfs.append(df_this) |
|
|
| df = pd.concat(dfs).reset_index(drop=True) |
|
|
| df["quantity"] = df["pack"].astype(str) + df["uom"] |
| df["time"] = df["time"].astype(str) + " days" |
|
|
| df = df.drop(columns=["pack", "uom"]) |
| |
| df = df[df["priceUsd"].astype(str).str.isnumeric()] |
|
|
| cheapest = df.iloc[df["priceUsd"].astype(float).idxmin()] |
| return f"{cheapest['quantity']} of this molecule cost {cheapest['priceUsd']} USD and can be purchased at {cheapest['vendorName']}." |
|
|
|
|
| class GetMoleculePrice(BaseTool): |
| name :str = "GetMoleculePrice" |
| description :str = "Get the cheapest available price of a molecule." |
| chemspace_api_key: str = None |
| url: str = None |
|
|
| def __init__(self, chemspace_api_key: str = None): |
| super().__init__() |
| self.chemspace_api_key = chemspace_api_key |
| self.url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}" |
|
|
| def _run(self, query: str) -> str: |
| if not self.chemspace_api_key: |
| return "No Chemspace API key found. This tool may not be used without a Chemspace API key." |
| try: |
| chemspace = ChemSpace(self.chemspace_api_key) |
| price = chemspace.buy_mol(query) |
| return price |
| except Exception as e: |
| return str(e) |
|
|
| async def _arun(self, query: str) -> str: |
| """Use the tool asynchronously.""" |
| raise NotImplementedError() |
|
|