Spaces:
Build error
Build error
| from mp_api.client import MPRester | |
| from emmet.core.summary import HasProps | |
| import openai | |
| import langchain | |
| from langchain import OpenAI | |
| from langchain import agents | |
| from langchain.agents import initialize_agent | |
| from langchain.agents import Tool, tool | |
| from langchain import LLMMathChain, SerpAPIWrapper | |
| from gpt_index import GPTListIndex, GPTIndexMemory | |
| from langchain import SerpAPIWrapper | |
| from langchain.prompts.few_shot import FewShotPromptTemplate | |
| from langchain.prompts.prompt import PromptTemplate | |
| from langchain.vectorstores import FAISS, Chroma | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.prompts.example_selector import (MaxMarginalRelevanceExampleSelector, | |
| SemanticSimilarityExampleSelector) | |
| import requests | |
| from rdkit import Chem | |
| import pandas as pd | |
| import os | |
| class MAPITools: | |
| def __init__(self): | |
| self.model = 'text-ada-001' #maybe change to gpt-4 when ready | |
| self.k=10 | |
| def get_material_atoms(self, formula): | |
| '''Receives a material formula and returns the atoms symbols present in it separated by comma.''' | |
| import re | |
| pattern = re.compile(r"([A-Z][a-z]*)(\d*)") | |
| matches = pattern.findall(formula) | |
| atoms = [] | |
| for m in matches: | |
| atom, count = m | |
| count = int(count) if count else 1 | |
| atoms.append((atom, count)) | |
| return ",".join([a[0] for a in atoms]) | |
| def check_prop_by_formula(self, formula): | |
| raise NotImplementedError('Should be implemented in children classes') | |
| def search_similars_by_atom(self, atoms): | |
| '''This function receives a string with the atoms separated by comma as input and returns a list of similar materials''' | |
| atoms = atoms.replace(" ", "") | |
| with MPRester(os.getenv("MAPI_API_KEY")) as mpr: | |
| docs = mpr.summary.search(elements=atoms.split(','), fields=["formula_pretty", self.prop]) | |
| return docs | |
| def create_context_prompt(self, formula): | |
| raise NotImplementedError('Should be implemented in children classes') | |
| def LLM_predict(self, prompt): | |
| ''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion''' | |
| llm = OpenAI( | |
| model_name=self.model, | |
| temperature=0.7, | |
| n=1, | |
| best_of=5, | |
| top_p=1.0, | |
| stop=["\n\n", "###", "#", "##"], | |
| # model_kwargs=kwargs, | |
| ) | |
| return llm.generate([prompt]).generations[0][0].text | |
| def get_tools(self): | |
| return [ | |
| Tool( | |
| name = "Get atoms in material", | |
| func = self.get_material_atoms, | |
| description = ( | |
| "Receives a material formula and returns the atoms symbols present in it separated by comma." | |
| ) | |
| ), | |
| Tool( | |
| name = f"Checks if material is {self.prop_name} by formula", | |
| func = self.check_prop_by_formula, | |
| description = ( | |
| f"This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not." | |
| ) | |
| ), | |
| # Tool( | |
| # name = "Search similar materials by atom", | |
| # func = self.search_similars_by_atom, | |
| # description = ( | |
| # "This function receives a string with the atoms separated by comma as input and returns a list of similar materials." | |
| # ) | |
| # ), | |
| Tool( | |
| name = f"Create {self.prop_name} context to LLM search", | |
| func = self.create_context_prompt, | |
| description = ( | |
| f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the material is {self.prop_name}." | |
| if isinstance(self, MAPI_class_tools) else | |
| f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of a material." | |
| ) | |
| ), | |
| Tool(name = "LLM predictiom", | |
| func = self.LLM_predict, | |
| description = ( | |
| "This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion" | |
| ) | |
| ) | |
| ] | |
| class MAPI_class_tools(MAPITools): | |
| def __init__(self, prop, prop_name, p_label, n_label): | |
| super().__init__() | |
| self.prop = prop | |
| self.prop_name = prop_name | |
| self.p_label = p_label | |
| self.n_label = n_label | |
| def check_prop_by_formula(self, formula): | |
| f''' This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not''' | |
| with MPRester(os.getenv("MAPI_API_KEY")) as mpr: | |
| docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop]) | |
| if docs: | |
| if docs[0].formula_pretty == formula: | |
| return self.p_label if docs[0].dict()[self.prop] else self.n_label | |
| return f"Could not find any material while searching {formula}" | |
| def create_context_prompt(self, formula): | |
| '''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the formula is a stable material ''' | |
| elements = self.get_material_atoms(formula) | |
| similars = self.search_similars_by_atom(elements) | |
| similars = [ | |
| {'formula': ex.formula_pretty, | |
| 'prop': self.p_label if ex.dict()[self.prop] else self.n_label | |
| } for ex in similars | |
| ] | |
| examples = pd.DataFrame(similars).drop_duplicates().to_dict(orient="records") | |
| example_selector = MaxMarginalRelevanceExampleSelector.from_examples( | |
| examples, | |
| OpenAIEmbeddings(), | |
| FAISS, | |
| k=self.k, | |
| ) | |
| prefix=( | |
| f'You are a bot who can predict if a material is {self.prop_name}.\n' | |
| f'Given this list of known materials and the information if they are {self.p_label} or {self.n_label}, \n' | |
| f'you need to answer the question if the last material is {self.prop_name}:' | |
| ) | |
| prompt_template=PromptTemplate( | |
| input_variables=["formula", "prop"], | |
| template=f"Is {{formula}} a {self.prop_name} material?@@@\n{{prop}}###", | |
| ) | |
| suffix = f"Is {{formula}} a {self.prop_name} material?@@@\n" | |
| prompt = FewShotPromptTemplate( | |
| # examples=examples, | |
| example_prompt=prompt_template, | |
| example_selector=example_selector, | |
| prefix=prefix, | |
| suffix=suffix, | |
| input_variables=["formula"]) | |
| return prompt.format(formula=formula) | |
| class MAPI_reg_tools(MAPITools): | |
| # TODO: deal with units | |
| def __init__(self, prop, prop_name): | |
| super().__init__() | |
| self.prop = prop | |
| self.prop_name = prop_name | |
| def check_prop_by_formula(self, formula): | |
| ''' This functions searches in the material project's API for the formula and returns if it is stable or not''' | |
| with MPRester(os.getenv("MAPI_API_KEY")) as mpr: | |
| docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop]) | |
| if docs: | |
| if docs[0].formula_pretty == formula: | |
| return docs[0].dict()[self.prop] | |
| elif docs[0].dict()[self.prop] is None: | |
| return f"There is no record of {self.prop_name} for {formula}" | |
| return f"Could not find any material while searching {formula}" | |
| def create_context_prompt(self, formula): | |
| f'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of the material ''' | |
| elements = self.get_material_atoms(formula) | |
| similars = self.search_similars_by_atom(elements) | |
| similars = [ | |
| {'formula': ex.formula_pretty, | |
| 'prop': f"{ex.dict()[self.prop]:2f}" if ex.dict()[self.prop] is not None else None | |
| } for ex in similars | |
| ] | |
| examples = pd.DataFrame(similars).drop_duplicates().dropna().to_dict(orient="records") | |
| example_selector = MaxMarginalRelevanceExampleSelector.from_examples( | |
| examples, | |
| OpenAIEmbeddings(), | |
| FAISS, | |
| k=self.k, | |
| ) | |
| prefix=( | |
| f'You are a bot who can predict the {self.prop_name} of a material .\n' | |
| f'Given this list of known materials and the measurement of their {self.prop_name}, \n' | |
| f'you need to answer the what is the {self.prop_name} of the material:' | |
| 'The answer should be numeric and finish with ###' | |
| ) | |
| prompt_template=PromptTemplate( | |
| input_variables=["formula", "prop"], | |
| template=f"What is the {self.prop_name} for {{formula}}?@@@\n{{prop}}###", | |
| ) | |
| suffix = f"What is the {self.prop_name} for {{formula}}?@@@\n" | |
| prompt = FewShotPromptTemplate( | |
| # examples=examples, | |
| example_prompt=prompt_template, | |
| example_selector=example_selector, | |
| prefix=prefix, | |
| suffix=suffix, | |
| input_variables=["formula"]) | |
| return prompt.format(formula=formula) | |