Spaces:

sapatevaibhav
/

talk_to_pdf

Sleeping

talk_to_pdf / app /embed.py

sapatevaibhav

Refactor embedding logic and integrate Gemini API for image queries

7922b0e 8 months ago

2.13 kB

	import numpy as np
	from PIL import Image
	import torch
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	def get_image_embedding(image_path):
	"""
	Get image embedding using a local CLIP model.

	Args:
	image_path: Path to the image file

	Returns:
	Embedding as numpy array, or None if embedding failed
	"""
	try:
	# Import here to avoid immediate dependency requirements
	import clip

	# Load CLIP model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model, preprocess = clip.load("ViT-B/32", device=device)

	# Load and preprocess the image
	image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

	# Generate the embedding
	with torch.no_grad():
	image_features = model.encode_image(image)

	# Convert to numpy and normalize
	embedding = image_features.cpu().numpy()[0]
	embedding = embedding / np.linalg.norm(embedding)

	return embedding

	except ImportError:
	print("CLIP not installed. Falling back to simpler embedding method.")
	return get_simple_embedding(image_path)
	except Exception as e:
	print(f"[CLIP] Error: {e}")
	return None

	def get_simple_embedding(image_path):
	"""
	A simpler fallback embedding method using image histograms.
	This is much less sophisticated than CLIP but can work as a backup.

	Args:
	image_path: Path to the image file

	Returns:
	Embedding as numpy array
	"""
	try:
	# Open image and convert to grayscale
	image = Image.open(image_path).convert('L')

	# Resize for consistency
	image = image.resize((224, 224))

	# Convert to numpy array
	img_array = np.array(image)

	# Create a simple histogram-based feature
	hist, _ = np.histogram(img_array, bins=64, range=(0, 256))

	# Normalize the histogram
	embedding = hist / np.linalg.norm(hist)

	return embedding
	except Exception as e:
	print(f"[Simple Embedding] Error: {e}")
	return None