from typing import Any, Dict, Iterator, List, Mapping, Optional from models.business_logic_utils.business_logic import process_app_request from langchain_core.callbacks.manager import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from langchain_core.outputs import GenerationChunk class CustomDatabricksLLM(LLM): endpoint_url: str bearer_token: str issue: str language: str temperature: float texter_name: str = "" """The number of characters from the last message of the prompt to be echoed.""" def generate_databricks_request(self, prompt): return { "inputs": { "conversation_id": [""], "prompt": [prompt], "issue": [self.issue], "language": [self.language], "temperature": [self.temperature], "max_tokens": [128], "texter_name": [self.texter_name] } } def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: request = self.generate_databricks_request(prompt) output = process_app_request(request, self.endpoint_url, self.bearer_token) return output['predictions'][0]['generated_text'] def _stream( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> Iterator[GenerationChunk]: output = self._call(prompt, stop, run_manager, **kwargs) for char in output: chunk = GenerationChunk(text=char) if run_manager: run_manager.on_llm_new_token(chunk.text, chunk=chunk) yield chunk @property def _identifying_params(self) -> Dict[str, Any]: """Return a dictionary of identifying parameters.""" return { # The model name allows users to specify custom token counting # rules in LLM monitoring applications (e.g., in LangSmith users # can provide per token pricing for their model and monitor # costs for the given LLM.) "model_name": "CustomChatModel", } @property def _llm_type(self) -> str: """Get the type of language model used by this chat model. Used for logging purposes only.""" return "custom"