talk_to_pdf / app /embed.py
sapatevaibhav
Refactor embedding logic and integrate Gemini API for image queries
7922b0e
import numpy as np
from PIL import Image
import torch
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
def get_image_embedding(image_path):
"""
Get image embedding using a local CLIP model.
Args:
image_path: Path to the image file
Returns:
Embedding as numpy array, or None if embedding failed
"""
try:
# Import here to avoid immediate dependency requirements
import clip
# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# Load and preprocess the image
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
# Generate the embedding
with torch.no_grad():
image_features = model.encode_image(image)
# Convert to numpy and normalize
embedding = image_features.cpu().numpy()[0]
embedding = embedding / np.linalg.norm(embedding)
return embedding
except ImportError:
print("CLIP not installed. Falling back to simpler embedding method.")
return get_simple_embedding(image_path)
except Exception as e:
print(f"[CLIP] Error: {e}")
return None
def get_simple_embedding(image_path):
"""
A simpler fallback embedding method using image histograms.
This is much less sophisticated than CLIP but can work as a backup.
Args:
image_path: Path to the image file
Returns:
Embedding as numpy array
"""
try:
# Open image and convert to grayscale
image = Image.open(image_path).convert('L')
# Resize for consistency
image = image.resize((224, 224))
# Convert to numpy array
img_array = np.array(image)
# Create a simple histogram-based feature
hist, _ = np.histogram(img_array, bins=64, range=(0, 256))
# Normalize the histogram
embedding = hist / np.linalg.norm(hist)
return embedding
except Exception as e:
print(f"[Simple Embedding] Error: {e}")
return None