Spaces:
Running
Running
from langchain.chains import TransformChain | |
from langchain_core.messages import HumanMessage | |
from langchain_openai import ChatOpenAI | |
from langchain import globals | |
from langchain_core.runnables import chain | |
import base64 | |
from typing import Dict,List,Union | |
def load_image(inputs: dict) -> dict: | |
"""Load image from file and encode it as base64.""" | |
image_path = inputs["image_path"] | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
image_base64 = encode_image(image_path) | |
return {"image": image_base64} | |
load_image_chain = TransformChain( | |
input_variables=["image_path"], | |
output_variables=["image"], | |
transform=load_image | |
) | |
def image_model(inputs: dict) -> Union[str, List[str], dict]: | |
"""Invoke model with image and prompt.""" | |
model = ChatOpenAI(temperature=0.1, model="gpt-4o", max_tokens=1024) | |
parser = inputs["parser"] | |
msg = model.invoke( | |
[HumanMessage( | |
content=[ | |
{"type": "text", "text": inputs["prompt"]}, | |
{"type": "text", "text": parser.get_format_instructions()}, | |
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}}, | |
])] | |
) | |
return msg.content | |
def get_image_informations(image_path: str,prompt,parser) -> dict: | |
vision_chain = load_image_chain | image_model | parser | |
return vision_chain.invoke({'image_path': f'{image_path}', | |
'prompt': prompt, | |
'parser': parser | |
}) |