from langchain.chains import TransformChain from langchain_core.messages import HumanMessage from langchain_openai import ChatOpenAI from langchain import globals from langchain_core.runnables import chain import base64 from typing import Dict,List,Union def load_image(inputs: dict) -> dict: """Load image from file and encode it as base64.""" image_path = inputs["image_path"] def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') image_base64 = encode_image(image_path) return {"image": image_base64} load_image_chain = TransformChain( input_variables=["image_path"], output_variables=["image"], transform=load_image ) @chain def image_model(inputs: dict) -> Union[str, List[str], dict]: """Invoke model with image and prompt.""" model = ChatOpenAI(temperature=0.1, model="gpt-4o", max_tokens=1024) parser = inputs["parser"] msg = model.invoke( [HumanMessage( content=[ {"type": "text", "text": inputs["prompt"]}, {"type": "text", "text": parser.get_format_instructions()}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}}, ])] ) return msg.content def get_image_informations(image_path: str,prompt,parser) -> dict: vision_chain = load_image_chain | image_model | parser return vision_chain.invoke({'image_path': f'{image_path}', 'prompt': prompt, 'parser': parser })