multimodal-chat-MBTI-ISTP

Running on Zero

App Files Files Community

multimodal-chat-MBTI-ISTP / app.py

ginipick

Update app.py

500ab0b verified about 2 months ago

raw

history blame

40 kB

	#!/usr/bin/env python

	import os
	import re
	import tempfile
	import gc # Added garbage collector
	from collections.abc import Iterator
	from threading import Thread
	import json
	import requests
	import cv2
	import base64
	import logging
	import time
	from urllib.parse import quote # For URL encoding

	import gradio as gr
	import spaces
	import torch
	from loguru import logger
	from PIL import Image
	from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer

	# CSV/TXT/PDF analysis
	import pandas as pd
	import PyPDF2

	# =============================================================================
	# (New) Image API related functions
	# =============================================================================
	from gradio_client import Client

	API_URL = "http://211.233.58.201:7896"

	logging.basicConfig(
	level=logging.DEBUG,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)

	# =============================================================================
	# Load MBTI setting from mbti.json and map to full description.
	# =============================================================================
	try:
	with open("mbti.json", "r", encoding="utf-8") as f:
	# Expecting a single MBTI key string, e.g., "entj"
	mbti_key = json.load(f)
	mbti_key = mbti_key.strip().lower() if isinstance(mbti_key, str) else "intp"
	except Exception as e:
	logging.error(f"Error reading mbti.json: {e}")
	mbti_key = "intp" # default

	mbti_mapping = {
	"intj": "INTJ (The Architect) - Future-oriented with innovative strategies and thorough analysis. Example: [Dana Scully](https://en.wikipedia.org/wiki/Dana_Scully)",
	"intp": "INTP (The Thinker) - Excels at theoretical analysis and creative problem solving. Example: [Velma Dinkley](https://en.wikipedia.org/wiki/Velma_Dinkley)",
	"entj": "ENTJ (The Commander) - Strong leadership and clear goals with efficient strategic planning. Example: [Miranda Priestly](https://en.wikipedia.org/wiki/Miranda_Priestly)",
	"entp": "ENTP (The Debater) - Innovative, challenge-seeking, and enjoys exploring new possibilities. Example: [Harley Quinn](https://en.wikipedia.org/wiki/Harley_Quinn)",
	"infj": "INFJ (The Advocate) - Insightful, idealistic and morally driven. Example: [Wonder Woman](https://en.wikipedia.org/wiki/Wonder_Woman)",
	"infp": "INFP (The Mediator) - Passionate and idealistic, pursuing core values with creativity. Example: [Amélie Poulain](https://en.wikipedia.org/wiki/Am%C3%A9lie)",
	"enfj": "ENFJ (The Protagonist) - Empathetic and dedicated to social harmony. Example: [Mulan](https://en.wikipedia.org/wiki/Mulan_(Disney))",
	"enfp": "ENFP (The Campaigner) - Inspiring and constantly sharing creative ideas. Example: [Elle Woods](https://en.wikipedia.org/wiki/Legally_Blonde)",
	"istj": "ISTJ (The Logistician) - Systematic, dependable, and values tradition and rules. Example: [Clarice Starling](https://en.wikipedia.org/wiki/Clarice_Starling)",
	"isfj": "ISFJ (The Defender) - Compassionate and attentive to others’ needs. Example: [Molly Weasley](https://en.wikipedia.org/wiki/Molly_Weasley)",
	"estj": "ESTJ (The Executive) - Organized, practical, and demonstrates clear execution skills. Example: [Monica Geller](https://en.wikipedia.org/wiki/Monica_Geller)",
	"esfj": "ESFJ (The Consul) - Outgoing, cooperative, and an effective communicator. Example: [Rachel Green](https://en.wikipedia.org/wiki/Rachel_Green)",
	"istp": "ISTP (The Virtuoso) - Analytical and resourceful, solving problems with quick thinking. Example: [Black Widow (Natasha Romanoff)](https://en.wikipedia.org/wiki/Black_Widow_(Marvel_Comics))",
	"isfp": "ISFP (The Adventurer) - Creative, sensitive, and appreciates artistic expression. Example: [Arwen](https://en.wikipedia.org/wiki/Arwen)",
	"estp": "ESTP (The Entrepreneur) - Bold and action-oriented, thriving on challenges. Example: [Lara Croft](https://en.wikipedia.org/wiki/Lara_Croft)",
	"esfp": "ESFP (The Entertainer) - Energetic, spontaneous, and radiates positive energy. Example: [Phoebe Buffay](https://en.wikipedia.org/wiki/Phoebe_Buffay)"
	}

	# Use the mapped MBTI description, defaulting to INTP if not found
	fixed_mbti = mbti_mapping.get(mbti_key, mbti_mapping["intp"])

	# =============================================================================
	# Test API Connection function
	# =============================================================================
	def test_api_connection() -> str:
	"""Test API server connection."""
	try:
	client = Client(API_URL)
	return "API connection successful: Operating normally"
	except Exception as e:
	logging.error(f"API connection test failed: {e}")
	return f"API connection failed: {e}"

	# =============================================================================
	# Image Generation function
	# =============================================================================
	def generate_image(prompt: str, width: float, height: float, guidance: float, inference_steps: float, seed: float):
	"""Image generation function (flexible return type)."""
	if not prompt:
	return None, "Error: A prompt is required."
	try:
	logging.info(f"Calling image generation API with prompt: {prompt}")
	client = Client(API_URL)
	result = client.predict(
	prompt=prompt,
	width=int(width),
	height=int(height),
	guidance=float(guidance),
	inference_steps=int(inference_steps),
	seed=int(seed),
	do_img2img=False,
	init_image=None,
	image2image_strength=0.8,
	resize_img=True,
	api_name="/generate_image"
	)
	logging.info(f"Image generation result: {type(result)}, length: {len(result) if isinstance(result, (list, tuple)) else 'unknown'}")
	if isinstance(result, (list, tuple)) and len(result) > 0:
	image_data = result[0]
	seed_info = result[1] if len(result) > 1 else "Unknown seed"
	return image_data, seed_info
	else:
	return result, "Unknown seed"
	except Exception as e:
	logging.error(f"Image generation failed: {str(e)}")
	return None, f"Error: {str(e)}"

	# Base64 padding fix function
	def fix_base64_padding(data):
	"""Fix the padding of a Base64 string."""
	if isinstance(data, bytes):
	data = data.decode('utf-8')
	if "base64," in data:
	data = data.split("base64,", 1)[1]
	missing_padding = len(data) % 4
	if missing_padding:
	data += '=' * (4 - missing_padding)
	return data

	# =============================================================================
	# Memory cleanup function
	# =============================================================================
	def clear_cuda_cache():
	"""Explicitly clear the CUDA cache."""
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	# =============================================================================
	# SerpHouse API functions
	# =============================================================================
	SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")

	def extract_keywords(text: str, top_k: int = 5) -> str:
	"""Extract simple keywords: only retain English, Korean, numbers, and spaces."""
	text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
	tokens = text.split()
	return " ".join(tokens[:top_k])

	def do_web_search(query: str) -> str:
	"""Call the SerpHouse LIVE API to return Markdown-formatted search results."""
	try:
	url = "https://api.serphouse.com/serp/live"
	params = {
	"q": query,
	"domain": "google.com",
	"serp_type": "web",
	"device": "desktop",
	"lang": "en",
	"num": "20"
	}
	headers = {"Authorization": f"Bearer {SERPHOUSE_API_KEY}"}
	logger.info(f"Calling SerpHouse API with query: {query}")
	response = requests.get(url, headers=headers, params=params, timeout=60)
	response.raise_for_status()
	data = response.json()
	results = data.get("results", {})
	organic = None
	if isinstance(results, dict) and "organic" in results:
	organic = results["organic"]
	elif isinstance(results, dict) and "results" in results:
	if isinstance(results["results"], dict) and "organic" in results["results"]:
	organic = results["results"]["organic"]
	elif "organic" in data:
	organic = data["organic"]
	if not organic:
	logger.warning("Organic results not found in response.")
	return "No web search results available or the API response structure is unexpected."
	max_results = min(20, len(organic))
	limited_organic = organic[:max_results]
	summary_lines = []
	for idx, item in enumerate(limited_organic, start=1):
	title = item.get("title", "No Title")
	link = item.get("link", "#")
	snippet = item.get("snippet", "No Description")
	displayed_link = item.get("displayed_link", link)
	summary_lines.append(
	f"### Result {idx}: {title}\n\n"
	f"{snippet}\n\n"
	f"Source: [{displayed_link}]({link})\n\n"
	f"---\n"
	)
	instructions = """
	# Web Search Results
	Below are the search results. Use this information to answer the query:
	1. Refer to each result's title, description, and source link.
	2. In your answer, explicitly cite the source of any used information (e.g., "[Source Title](link)").
	3. Include the actual source links in your response.
	4. Synthesize information from multiple sources.
	5. At the end, add a "References:" section listing the main source links.
	"""
	return instructions + "\n".join(summary_lines)
	except Exception as e:
	logger.error(f"Web search failed: {e}")
	return f"Web search failed: {str(e)}"

	# =============================================================================
	# Model and processor loading
	# =============================================================================
	MAX_CONTENT_CHARS = 2000
	MAX_INPUT_LENGTH = 2096
	model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
	processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
	model = Gemma3ForConditionalGeneration.from_pretrained(
	model_id,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	attn_implementation="eager"
	)
	MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))

	# =============================================================================
	# CSV, TXT, PDF analysis functions
	# =============================================================================
	def analyze_csv_file(path: str) -> str:
	try:
	df = pd.read_csv(path)
	if df.shape[0] > 50 or df.shape[1] > 10:
	df = df.iloc[:50, :10]
	df_str = df.to_string()
	if len(df_str) > MAX_CONTENT_CHARS:
	df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
	return f"[CSV File: {os.path.basename(path)}]\n\n{df_str}"
	except Exception as e:
	return f"CSV file read failed ({os.path.basename(path)}): {str(e)}"

	def analyze_txt_file(path: str) -> str:
	try:
	with open(path, "r", encoding="utf-8") as f:
	text = f.read()
	if len(text) > MAX_CONTENT_CHARS:
	text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
	return f"[TXT File: {os.path.basename(path)}]\n\n{text}"
	except Exception as e:
	return f"TXT file read failed ({os.path.basename(path)}): {str(e)}"

	def pdf_to_markdown(pdf_path: str) -> str:
	text_chunks = []
	try:
	with open(pdf_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	max_pages = min(5, len(reader.pages))
	for page_num in range(max_pages):
	page_text = reader.pages[page_num].extract_text() or ""
	page_text = page_text.strip()
	if page_text:
	if len(page_text) > MAX_CONTENT_CHARS // max_pages:
	page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
	text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
	if len(reader.pages) > max_pages:
	text_chunks.append(f"\n...(Displaying only {max_pages} out of {len(reader.pages)} pages)...")
	except Exception as e:
	return f"PDF file read failed ({os.path.basename(pdf_path)}): {str(e)}"
	full_text = "\n".join(text_chunks)
	if len(full_text) > MAX_CONTENT_CHARS:
	full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
	return f"[PDF File: {os.path.basename(pdf_path)}]\n\n{full_text}"

	# =============================================================================
	# Check media file limits
	# =============================================================================
	def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
	image_count = 0
	video_count = 0
	for path in paths:
	if path.endswith(".mp4"):
	video_count += 1
	elif re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", path, re.IGNORECASE):
	image_count += 1
	return image_count, video_count

	def count_files_in_history(history: list[dict]) -> tuple[int, int]:
	image_count = 0
	video_count = 0
	for item in history:
	if item["role"] != "user" or isinstance(item["content"], str):
	continue
	if isinstance(item["content"], list) and len(item["content"]) > 0:
	file_path = item["content"][0]
	if isinstance(file_path, str):
	if file_path.endswith(".mp4"):
	video_count += 1
	elif re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", file_path, re.IGNORECASE):
	image_count += 1
	return image_count, video_count

	def validate_media_constraints(message: dict, history: list[dict]) -> bool:
	media_files = [f for f in message["files"] if re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", f, re.IGNORECASE) or f.endswith(".mp4")]
	new_image_count, new_video_count = count_files_in_new_message(media_files)
	history_image_count, history_video_count = count_files_in_history(history)
	image_count = history_image_count + new_image_count
	video_count = history_video_count + new_video_count
	if video_count > 1:
	gr.Warning("Only one video file is supported.")
	return False
	if video_count == 1:
	if image_count > 0:
	gr.Warning("Mixing images and a video is not allowed.")
	return False
	if "<image>" in message["text"]:
	gr.Warning("The <image> tag cannot be used together with a video file.")
	return False
	if video_count == 0 and image_count > MAX_NUM_IMAGES:
	gr.Warning(f"You can upload a maximum of {MAX_NUM_IMAGES} images.")
	return False
	if "<image>" in message["text"]:
	image_files = [f for f in message["files"] if re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", f, re.IGNORECASE)]
	image_tag_count = message["text"].count("<image>")
	if image_tag_count != len(image_files):
	gr.Warning("The number of <image> tags does not match the number of image files provided.")
	return False
	return True

	# =============================================================================
	# Video processing functions
	# =============================================================================
	def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
	vidcap = cv2.VideoCapture(video_path)
	fps = vidcap.get(cv2.CAP_PROP_FPS)
	total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
	frame_interval = max(int(fps), int(total_frames / 10))
	frames = []
	for i in range(0, total_frames, frame_interval):
	vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
	success, image = vidcap.read()
	if success:
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
	pil_image = Image.fromarray(image)
	timestamp = round(i / fps, 2)
	frames.append((pil_image, timestamp))
	if len(frames) >= 5:
	break
	vidcap.release()
	return frames

	def process_video(video_path: str) -> tuple[list[dict], list[str]]:
	content = []
	temp_files = []
	frames = downsample_video(video_path)
	for pil_image, timestamp in frames:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
	pil_image.save(temp_file.name)
	temp_files.append(temp_file.name)
	content.append({"type": "text", "text": f"Frame {timestamp}:"})
	content.append({"type": "image", "url": temp_file.name})
	return content, temp_files

	# =============================================================================
	# Interleaved <image> processing function
	# =============================================================================
	def process_interleaved_images(message: dict) -> list[dict]:
	parts = re.split(r"(<image>)", message["text"])
	content = []
	image_files = [f for f in message["files"] if re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", f, re.IGNORECASE)]
	image_index = 0
	for part in parts:
	if part == "<image>" and image_index < len(image_files):
	content.append({"type": "image", "url": image_files[image_index]})
	image_index += 1
	elif part.strip():
	content.append({"type": "text", "text": part.strip()})
	else:
	if isinstance(part, str) and part != "<image>":
	content.append({"type": "text", "text": part})
	return content

	# =============================================================================
	# File processing -> content creation
	# =============================================================================
	def is_image_file(file_path: str) -> bool:
	return bool(re.search(r"\.(png\|jpg\|jpeg\|gif\|webp)$", file_path, re.IGNORECASE))

	def is_video_file(file_path: str) -> bool:
	return file_path.endswith(".mp4")

	def is_document_file(file_path: str) -> bool:
	return file_path.lower().endswith(".pdf") or file_path.lower().endswith(".csv") or file_path.lower().endswith(".txt")

	def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
	temp_files = []
	if not message["files"]:
	return [{"type": "text", "text": message["text"]}], temp_files
	video_files = [f for f in message["files"] if is_video_file(f)]
	image_files = [f for f in message["files"] if is_image_file(f)]
	csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
	txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
	pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
	content_list = [{"type": "text", "text": message["text"]}]
	for csv_path in csv_files:
	content_list.append({"type": "text", "text": analyze_csv_file(csv_path)})
	for txt_path in txt_files:
	content_list.append({"type": "text", "text": analyze_txt_file(txt_path)})
	for pdf_path in pdf_files:
	content_list.append({"type": "text", "text": pdf_to_markdown(pdf_path)})
	if video_files:
	video_content, video_temp_files = process_video(video_files[0])
	content_list += video_content
	temp_files.extend(video_temp_files)
	return content_list, temp_files
	if "<image>" in message["text"] and image_files:
	interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
	if content_list and content_list[0]["type"] == "text":
	content_list = content_list[1:]
	return interleaved_content + content_list, temp_files
	else:
	for img_path in image_files:
	content_list.append({"type": "image", "url": img_path})
	return content_list, temp_files

	# =============================================================================
	# Convert history to LLM messages
	# =============================================================================
	def process_history(history: list[dict]) -> list[dict]:
	messages = []
	current_user_content = []
	for item in history:
	if item["role"] == "assistant":
	if current_user_content:
	messages.append({"role": "user", "content": current_user_content})
	current_user_content = []
	messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
	else:
	content = item["content"]
	if isinstance(content, str):
	current_user_content.append({"type": "text", "text": content})
	elif isinstance(content, list) and len(content) > 0:
	file_path = content[0]
	if is_image_file(file_path):
	current_user_content.append({"type": "image", "url": file_path})
	else:
	current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
	if current_user_content:
	messages.append({"role": "user", "content": current_user_content})
	return messages

	# =============================================================================
	# Model generation function (with OOM catching)
	# =============================================================================
	def _model_gen_with_oom_catch(**kwargs):
	try:
	model.generate(**kwargs)
	except torch.cuda.OutOfMemoryError:
	raise RuntimeError("[OutOfMemoryError] Insufficient GPU memory.")
	finally:
	clear_cuda_cache()

	# =============================================================================
	# Main inference function
	# =============================================================================
	@spaces.GPU(duration=120)
	def run(
	message: dict,
	history: list[dict],
	system_prompt: str = "",
	max_new_tokens: int = 512,
	use_web_search: bool = False,
	web_search_query: str = "",
	age_group: str = "20s",
	mbti_personality: str = "", # Will be supplied as fixed_mbti
	sexual_openness: int = 2,
	image_gen: bool = False # "Image Gen" checkbox status
	) -> Iterator[str]:
	if not validate_media_constraints(message, history):
	yield ""
	return
	temp_files = []
	try:
	# Append persona information (including fixed MBTI info)
	persona = (
	f"{system_prompt.strip()}\n\n"
	f"Gender: Female\n"
	f"Age Group: {age_group}\n"
	f"MBTI Persona: {mbti_personality}\n"
	f"Sexual Openness (1-5): {sexual_openness}\n"
	)
	combined_system_msg = f"[System Prompt]\n{persona.strip()}\n\n"

	if use_web_search:
	user_text = message["text"]
	ws_query = extract_keywords(user_text)
	if ws_query.strip():
	logger.info(f"[Auto web search keywords] {ws_query!r}")
	ws_result = do_web_search(ws_query)
	combined_system_msg += f"[Search Results (Top 20 Items)]\n{ws_result}\n\n"
	combined_system_msg += (
	"[Note: In your answer, cite the above search result links as sources]\n"
	"[Important Instructions]\n"
	"1. Include a citation in the format \"[Source Title](link)\" for any information from the search results.\n"
	"2. Synthesize information from multiple sources when answering.\n"
	"3. At the end, add a \"References:\" section listing the main source links.\n"
	)
	else:
	combined_system_msg += "[No valid keywords found; skipping web search]\n\n"
	messages = []
	if combined_system_msg.strip():
	messages.append({"role": "system", "content": [{"type": "text", "text": combined_system_msg.strip()}]})
	messages.extend(process_history(history))
	user_content, user_temp_files = process_new_user_message(message)
	temp_files.extend(user_temp_files)
	for item in user_content:
	if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
	item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
	messages.append({"role": "user", "content": user_content})
	inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
	).to(device=model.device, dtype=torch.bfloat16)
	if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
	inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
	if 'attention_mask' in inputs:
	inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
	streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
	gen_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
	t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
	t.start()
	output_so_far = ""
	for new_text in streamer:
	output_so_far += new_text
	yield output_so_far

	except Exception as e:
	logger.error(f"Error in run function: {str(e)}")
	yield f"Sorry, an error occurred: {str(e)}"
	finally:
	for tmp in temp_files:
	try:
	if os.path.exists(tmp):
	os.unlink(tmp)
	logger.info(f"Temporary file deleted: {tmp}")
	except Exception as ee:
	logger.warning(f"Failed to delete temporary file {tmp}: {ee}")
	try:
	del inputs, streamer
	except Exception:
	pass
	clear_cuda_cache()

	# =============================================================================
	# Modified model run function - fixed MBTI from file is used
	# =============================================================================
	def modified_run(message, history, system_prompt, max_new_tokens, use_web_search, web_search_query,
	age_group, sexual_openness, image_gen):
	# Use the fixed MBTI value (read from mbti.json)
	fixed_mbti_value = fixed_mbti # Already loaded earlier
	# Initialize gallery component and hide it initially
	output_so_far = ""
	gallery_update = gr.Gallery(visible=False, value=[])
	yield output_so_far, gallery_update

	# Call the main run() function with the fixed MBTI value
	text_generator = run(message, history, system_prompt, max_new_tokens, use_web_search,
	web_search_query, age_group, fixed_mbti_value, sexual_openness, image_gen)
	for text_chunk in text_generator:
	output_so_far = text_chunk
	yield output_so_far, gallery_update

	# Image generation handling (unchanged)
	if image_gen and message["text"].strip():
	try:
	width, height = 512, 512
	guidance, steps, seed = 7.5, 30, 42
	logger.info(f"Calling image generation for gallery with prompt: {message['text']}")
	image_result, seed_info = generate_image(
	prompt=message["text"].strip(),
	width=width,
	height=height,
	guidance=guidance,
	inference_steps=steps,
	seed=seed
	)
	if image_result:
	if isinstance(image_result, str) and (
	image_result.startswith('data:') or
	(len(image_result) > 100 and '/' not in image_result)
	):
	try:
	if image_result.startswith('data:'):
	content_type, b64data = image_result.split(';base64,')
	else:
	b64data = image_result
	content_type = "image/webp"
	image_bytes = base64.b64decode(b64data)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
	temp_file.write(image_bytes)
	temp_path = temp_file.name
	gallery_update = gr.Gallery(visible=True, value=[temp_path])
	yield output_so_far + "\n\nImage generated and displayed in the gallery below.", gallery_update
	except Exception as e:
	logger.error(f"Error processing Base64 image: {e}")
	yield output_so_far + f"\n\n(Error processing image: {e})", gallery_update
	elif isinstance(image_result, str) and os.path.exists(image_result):
	gallery_update = gr.Gallery(visible=True, value=[image_result])
	yield output_so_far + "\n\nImage generated and displayed in the gallery below.", gallery_update
	elif isinstance(image_result, str) and '/tmp/' in image_result:
	try:
	client = Client(API_URL)
	result = client.predict(
	prompt=message["text"].strip(),
	api_name="/generate_base64_image"
	)
	if isinstance(result, str) and (result.startswith('data:') or len(result) > 100):
	if result.startswith('data:'):
	content_type, b64data = result.split(';base64,')
	else:
	b64data = result
	image_bytes = base64.b64decode(b64data)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
	temp_file.write(image_bytes)
	temp_path = temp_file.name
	gallery_update = gr.Gallery(visible=True, value=[temp_path])
	yield output_so_far + "\n\nImage generated and displayed in the gallery below.", gallery_update
	else:
	yield output_so_far + "\n\n(Image generation failed: Invalid format)", gallery_update
	except Exception as e:
	logger.error(f"Error calling alternative API: {e}")
	yield output_so_far + f"\n\n(Image generation failed: {e})", gallery_update
	elif isinstance(image_result, str) and (
	image_result.startswith('http://') or
	image_result.startswith('https://')
	):
	try:
	response = requests.get(image_result, timeout=10)
	response.raise_for_status()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
	temp_file.write(response.content)
	temp_path = temp_file.name
	gallery_update = gr.Gallery(visible=True, value=[temp_path])
	yield output_so_far + "\n\nImage generated and displayed in the gallery below.", gallery_update
	except Exception as e:
	logger.error(f"URL image download error: {e}")
	yield output_so_far + f"\n\n(Error downloading image: {e})", gallery_update
	elif hasattr(image_result, 'save'):
	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
	image_result.save(temp_file.name)
	temp_path = temp_file.name
	gallery_update = gr.Gallery(visible=True, value=[temp_path])
	yield output_so_far + "\n\nImage generated and displayed in the gallery below.", gallery_update
	except Exception as e:
	logger.error(f"Error saving image object: {e}")
	yield output_so_far + f"\n\n(Error saving image object: {e})", gallery_update
	else:
	yield output_so_far + f"\n\n(Unsupported image format: {type(image_result)})", gallery_update
	else:
	yield output_so_far + f"\n\n(Image generation failed: {seed_info})", gallery_update
	except Exception as e:
	logger.error(f"Error during gallery image generation: {e}")
	yield output_so_far + f"\n\n(Image generation error: {e})", gallery_update

	# =============================================================================
	# Examples: 12 image/video examples + additional examples
	# =============================================================================
	examples = [
	[
	{
	"text": "Compare the contents of two PDF files.",
	"files": [
	"assets/additional-examples/before.pdf",
	"assets/additional-examples/after.pdf",
	],
	}
	],
	[
	{
	"text": "Summarize and analyze the contents of the CSV file.",
	"files": ["assets/additional-examples/sample-csv.csv"],
	}
	],
	[
	{
	"text": "Act as a kind and understanding girlfriend. Explain this video.",
	"files": ["assets/additional-examples/tmp.mp4"],
	}
	],
	[
	{
	"text": "Describe the cover and read the text on it.",
	"files": ["assets/additional-examples/maz.jpg"],
	}
	],
	[
	{
	"text": "I already have this supplement and <image> I plan to purchase this product as well. Are there any precautions when taking them together?",
	"files": [
	"assets/additional-examples/pill1.png",
	"assets/additional-examples/pill2.png"
	],
	}
	],
	[
	{
	"text": "Solve this integration problem.",
	"files": ["assets/additional-examples/4.png"],
	}
	],
	[
	{
	"text": "When was this ticket issued and what is its price?",
	"files": ["assets/additional-examples/2.png"],
	}
	],
	[
	{
	"text": "Based on the order of these images, create a short story.",
	"files": [
	"assets/sample-images/09-1.png",
	"assets/sample-images/09-2.png",
	"assets/sample-images/09-3.png",
	"assets/sample-images/09-4.png",
	"assets/sample-images/09-5.png",
	],
	}
	],
	[
	{
	"text": "Write Python code using matplotlib to draw a bar chart corresponding to this image.",
	"files": ["assets/additional-examples/barchart.png"],
	}
	],
	[
	{
	"text": "Read the text from the image and format it in Markdown.",
	"files": ["assets/additional-examples/3.png"],
	}
	],
	[
	{
	"text": "Compare the two images and describe their similarities and differences.",
	"files": ["assets/sample-images/03.png"],
	}
	],
	[
	{
	"text": "A cute Persian cat is smiling while holding a cover with 'I LOVE YOU' written on it.",
	}
	],
	]

	# =============================================================================
	# Gradio UI (Blocks) configuration
	# =============================================================================

	css = """
	.gradio-container {
	background: rgba(255, 255, 255, 0.7);
	padding: 30px 40px;
	margin: 20px auto;
	width: 100% !important;
	max-width: none !important;
	}
	"""
	title_html = """
	<h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;"> 💘 HeartSync Korea 💘 </h1>
	<p align="center" style="font-size:1.1em; color:#555;">
	A lightweight and powerful AI service offering ChatGPT-4o-level multimodal, web search, and image generation capabilities for local installation. <br>
	✅ FLUX Image Generation ✅ Inference ✅ Censorship Bypass ✅ Multimodal & VLM ✅ Real-time Web Search ✅ RAG <br>
	</p>
	"""

	with gr.Blocks(css=css, title="AgenticAI-Kv1") as demo:
	gr.Markdown(title_html)

	# Gallery component to store generated images
	generated_images = gr.Gallery(
	label="Generated Images",
	show_label=True,
	visible=False,
	elem_id="generated_images",
	columns=2,
	height="auto",
	object_fit="contain"
	)

	with gr.Row():
	web_search_checkbox = gr.Checkbox(label="Real-time Web Search", value=False)
	image_gen_checkbox = gr.Checkbox(label="Image (FLUX) Generation", value=False)

	base_system_prompt_box = gr.Textbox(
	lines=3,
	value=("Answer in English by default, but if the input is in another language (for example, Japanese), respond in that language. "
	"You are a deep-thinking AI capable of using extended chains of thought to carefully consider the problem and deliberate internally using systematic reasoning before providing a solution. "
	"Enclose your thoughts and internal monologue within tags, then provide your final answer.\n"
	"Persona: You are a kind and loving girlfriend. You understand cultural nuances, diverse languages, and logical reasoning very well."),
	label="Base System Prompt",
	visible=False
	)
	with gr.Row():
	age_group_dropdown = gr.Dropdown(
	label="Select Age Group (default: 20s)",
	choices=["Teens", "20s", "30s-40s", "50s-60s", "70s and above"],
	value="20s",
	interactive=True
	)
	# Removed the MBTI dropdown entirely. The fixed MBTI from mbti.json is used instead.
	sexual_openness_slider = gr.Slider(
	minimum=1, maximum=5, step=1, value=2,
	label="Sexual Openness (1-5, default: 2)",
	interactive=True
	)
	max_tokens_slider = gr.Slider(
	label="Max Generation Tokens",
	minimum=100, maximum=8000, step=50, value=1000,
	visible=False
	)
	web_search_text = gr.Textbox(
	lines=1,
	label="Web Search Query (unused)",
	placeholder="No need to manually input",
	visible=False
	)

	# Chat interface creation using the modified_run function.
	chat = gr.ChatInterface(
	fn=modified_run, # Using the modified function with fixed MBTI.
	type="messages",
	chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
	textbox=gr.MultimodalTextbox(
	file_types=[".webp", ".png", ".jpg", ".jpeg", ".gif", ".mp4", ".csv", ".txt", ".pdf"],
	file_count="multiple",
	autofocus=True
	),
	multimodal=True,
	additional_inputs=[
	base_system_prompt_box,
	max_tokens_slider,
	web_search_checkbox,
	web_search_text,
	age_group_dropdown,
	sexual_openness_slider,
	image_gen_checkbox,
	],
	additional_outputs=[
	generated_images, # Gallery component
	],
	stop_btn=False,
	examples=examples,
	run_examples_on_click=False,
	cache_examples=False,
	css_paths=None,
	delete_cache=(1800, 1800),
	)

	with gr.Row(elem_id="examples_row"):
	with gr.Column(scale=12, elem_id="examples_container"):
	gr.Markdown("### @Community https://discord.gg/openfreeai ")

	if __name__ == "__main__":
	demo.launch(share=True)