data_extraction_demo / functions.py
Amamrnaf
app done ?
6e805b9
from langchain.chains import TransformChain
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain import globals
from langchain_core.runnables import chain
import base64
from typing import Dict,List,Union
def load_image(inputs: dict) -> dict:
"""Load image from file and encode it as base64."""
image_path = inputs["image_path"]
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
image_base64 = encode_image(image_path)
return {"image": image_base64}
load_image_chain = TransformChain(
input_variables=["image_path"],
output_variables=["image"],
transform=load_image
)
@chain
def image_model(inputs: dict) -> Union[str, List[str], dict]:
"""Invoke model with image and prompt."""
model = ChatOpenAI(temperature=0.1, model="gpt-4o", max_tokens=1024)
parser = inputs["parser"]
msg = model.invoke(
[HumanMessage(
content=[
{"type": "text", "text": inputs["prompt"]},
{"type": "text", "text": parser.get_format_instructions()},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
])]
)
return msg.content
def get_image_informations(image_path: str,prompt,parser) -> dict:
vision_chain = load_image_chain | image_model | parser
return vision_chain.invoke({'image_path': f'{image_path}',
'prompt': prompt,
'parser': parser
})