import subprocess
import os
from pathlib import Path
def install_cuda_toolkit():
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
os.environ["CUDA_HOME"] = "/usr/local/cuda"
os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
os.environ["CUDA_HOME"],
"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
)
# Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
os.environ["BASICSR_JIT"] = "False"
install_cuda_toolkit()
BASE_DIR = Path("/home/user/app/JarvisIR")
commands = [
("python -V", BASE_DIR),
("pip install -r requirements.txt", BASE_DIR),
("python setup.py develop --no_cuda_ext", BASE_DIR / "package/agent_tools/Retinexformer"),
("pip install basicsr_ridcp-0.0.0-cp310-cp310-linux_x86_64.whl", BASE_DIR),
("pip install -e .", BASE_DIR / "package")
]
def run_command(cmd, cwd=None):
try:
result = subprocess.run(
cmd, # 注意:这里不再使用 shlex.split()
cwd=str(cwd) if cwd else None,
shell=True, # 需要 shell=True 来支持 && 等操作符
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
print(f"[SUCCESS] {cmd}")
if result.stdout: print(result.stdout)
return True
except subprocess.CalledProcessError as e:
print(f"[FAILED] {cmd}")
print(f"Error: {e.stderr}")
return False
for cmd, cwd in commands:
run_command(cmd, cwd)
import sys
sys.path.append("/home/user/app/JarvisIR/package")
sys.path.append("/home/user/app/JarvisIR/package/agent_tools/Retinexformer")
# sys.path.append("/home/user/app/JarvisIR/package/agent_tools/RIDCP")
# change import error
from pathlib import Path
# 1. 定义文件路径
file_path = Path("/usr/local/lib/python3.10/site-packages/basicsr/data/degradations.py") # 替换为实际路径
# 2. 读取文件内容
try:
content = file_path.read_text(encoding='utf-8')
# 3. 执行替换
new_content = content.replace(
'from torchvision.transforms.functional_tensor import rgb_to_grayscale',
'from torchvision.transforms.functional import rgb_to_grayscale'
)
# 4. 写回文件
if content != new_content:
file_path.write_text(new_content, encoding='utf-8')
print("Change it!")
else:
print("No changes")
except FileNotFoundError:
print(f"{file_path} does not exist")
except Exception as e:
print(f"Error: {str(e)}")
import spaces
import re
import random
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
from threading import Thread
def download_tools_ckpts(target_dir, url):
from huggingface_hub import snapshot_download
import os
import shutil
tmp_dir = "hf_temp_download"
os.makedirs(tmp_dir, exist_ok=True)
snapshot_download(
repo_id="LYL1015/JarvisIR",
repo_type="model",
local_dir=tmp_dir,
allow_patterns=os.path.join(url, "**"),
local_dir_use_symlinks=False,
)
src_dir = os.path.join(tmp_dir, url)
shutil.copytree(src_dir, target_dir)
shutil.rmtree(tmp_dir)
target_dir = "JarvisIR/checkpoints/agent_tools"
if not os.path.exists(target_dir):
download_tools_ckpts(target_dir, "agent_tools/checkpoints")
llm_targer_dir = "JarvisIR/checkpoints/pretrained_preview"
if not os.path.exists(llm_targer_dir):
download_tools_ckpts(llm_targer_dir, "pretrained/preview")
# Model configuration
# XXX: Path to the fine-tuned LLaVA model
model_id = llm_targer_dir
# Available image restoration tasks and their corresponding models
all_tasks = " {denoise: [scunet, restormer], lighten: [retinexformer_fivek, hvicidnet, lightdiff], \
derain: [idt, turbo_rain, s2former], defog:[ridcp, kanet], \
desnow:[turbo_snow, snowmaster], super_resolution: [real_esrgan], \
}"
# Various prompt templates for querying the LLM about image degradation and restoration tasks
prompts_query2 = [
f"Considering the image's degradation, suggest the required tasks with explanations, and identify suitable tools for each task. Options for tasks and tools include: {all_tasks}.",
f"Given the image's degradation, outline the essential tasks along with justifications, and choose the appropriate tools for each task from the following options: {all_tasks}.",
f"Please specify the tasks required due to the image's degradation, explain the reasons, and select relevant tools for each task from the provided options: {all_tasks}.",
f"Based on the image degradation, determine the necessary tasks and their reasons, along with the appropriate tools for each task. Choose from these options: {all_tasks}.",
f"Identify the tasks required to address the image's degradation, including the reasons for each, and select tools from the options: {all_tasks}.",
f"Considering the degradation observed, list the tasks needed and their justifications, then pick the most suitable tools for each task from these options: {all_tasks}.",
f"Evaluate the image degradation, and based on that, provide the necessary tasks and reasons, along with tools chosen from the options: {all_tasks}.",
f"With respect to the image degradation, outline the tasks needed and explain why, selecting tools from the following list: {all_tasks}.",
f"Given the level of degradation in the image, specify tasks to address it, include reasons, and select tools for each task from: {all_tasks}.",
f"Examine the image's degradation, propose relevant tasks and their explanations, and identify tools from the options provided: {all_tasks}.",
f"Based on observed degradation, detail the tasks required, explain your choices, and select tools from these options: {all_tasks}.",
f"Using the image's degradation as a guide, list the necessary tasks, include explanations, and pick tools from the provided choices: {all_tasks}.",
f"Assess the image degradation, provide the essential tasks and reasons, and select the appropriate tools for each task from the options: {all_tasks}.",
f"According to the image's degradation, determine which tasks are necessary and why, choosing tools for each task from: {all_tasks}.",
f"Observe the degradation in the image, specify the needed tasks with justifications, and select appropriate tools from: {all_tasks}.",
f"Taking the image degradation into account, specify tasks needed, provide reasons, and choose tools from the following: {all_tasks}.",
f"Consider the image's degradation level, outline the tasks necessary, provide reasoning, and select suitable tools from: {all_tasks}.",
f"Evaluate the degradation in the image, identify tasks required, explain your choices, and pick tools from: {all_tasks}.",
f"Analyze the image degradation and suggest tasks with justifications, choosing the best tools from these options: {all_tasks}.",
f"Review the image degradation, and based on it, specify tasks needed, provide reasons, and select tools for each task from: {all_tasks}."
]
# Initialize models
print("Loading LLM model...")
# Initialize the image restoration toolkit
from agent_tools import RestorationToolkit
tool_engine = RestorationToolkit(score_weight=[0,0,0,0,0])
# Load the LLaVA model in half precision to reduce memory usage
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="auto",
low_cpu_mem_usage=True
)
processor = AutoProcessor.from_pretrained(model_id)
print("Loading tool engine...")
def parse_llm_response(response):
"""
Parse the LLM response to extract reason and answer sections
Args:
response (str): The raw response from the LLM
Returns:
tuple: (reason, answer) extracted from the response
"""
reason_match = re.search(r'(.*?)', response, re.DOTALL)
answer_match = re.search(r'(.*?)', response, re.DOTALL)
reason = reason_match.group(1).strip() if reason_match else "No reasoning provided"
answer = answer_match.group(1).strip() if answer_match else "No answer provided"
return reason, answer
def extract_models_from_answer(answer):
"""
Extract model names from the answer string using regex
Args:
answer (str): The answer string containing model recommendations
Returns:
list: List of extracted model names
"""
# Pattern to match [type:xxx]:(model:xxx)
pattern = r'\[type:[^\]]+\]:\(model:([^)]+)\)'
models = re.findall(pattern, answer)
return models
def beautify_recommended_actions(answer, models):
"""
Format the LLM's recommendations in a more visually appealing way
Args:
answer (str): The raw answer from LLM
models (list): List of extracted model names
Returns:
str: Beautified display of recommendations
"""
# Task type to emoji mapping for visual enhancement
task_icons = {
'denoise': '🧹',
'lighten': '💡',
'derain': '🌧️',
'defog': '🌫️',
'desnow': '❄️',
'super_resolution': '🔍'
}
# Parse the answer to extract tasks and models
pattern = r'\[type:([^\]]+)\]:\(model:([^)]+)\)'
matches = re.findall(pattern, answer)
if not matches:
return f"**🎯 Recommended Actions:**\n\n{answer}\n\n**Extracted Models:** {', '.join(models) if models else 'None'}"
# Create beautified display
beautified = "**🎯 Recommended Actions:**\n"
beautified += "> "
# Create horizontal flow of actions
action_parts = []
for task_type, model_name in matches:
task_type = task_type.strip()
model_name = model_name.strip()
# Get icon for task type
icon = task_icons.get(task_type, '🔧')
# Format task name (capitalize and replace underscores)
task_display = task_type.title().replace('_', ' ')
# Create action part: icon + task + model
action_part = f"{icon} {task_display}:`{model_name}`"
action_parts.append(action_part)
# Join with arrows to show sequence
beautified += " ➡ ".join(action_parts) + "\n\n"
# Add summary information
beautified += f"**📋 Processing Pipeline:** {len(matches)} steps\n"
beautified += f"**🛠️ Models to use:** {' → '.join(models)}"
return beautified
def resize_image_to_original(processed_image_path, original_size):
"""
Resize processed image back to original dimensions
Args:
processed_image_path (str): Path to the processed image
original_size (tuple): Original image dimensions (width, height)
Returns:
str: Path to the resized image
"""
if processed_image_path and os.path.exists(processed_image_path):
img = Image.open(processed_image_path)
img_resized = img.resize(original_size, Image.Resampling.LANCZOS)
# Save resized image
output_path = os.path.join('temp_outputs', 'final_result.png')
img_resized.save(output_path)
return output_path
return processed_image_path
def get_llm_response_streaming(image_path):
"""
Get streaming response from LLM for image analysis
Args:
image_path (str): Path to the input image
Returns:
TextIteratorStreamer: A streamer object to yield tokens
"""
# Select random prompt from the templates
instruction = prompts_query2[random.randint(0, len(prompts_query2)-1)]
# Format the prompt with image for multimodal input
prompt = (f"<|start_header_id|>user<|end_header_id|>\n\n\n{instruction}<|eot_id|>"
"<|start_header_id|>assistant<|end_header_id|>\n\n")
# Load and process image
raw_image = Image.open(image_path)
inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
# Setup streaming for token-by-token generation
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
# Generate response in a separate thread to avoid blocking
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=400,
do_sample=False
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
return streamer
def process_image_with_tools(image_path, models, original_size):
"""
Process image using the tool engine and restore to original size
Args:
image_path (str): Path to the input image
models (list): List of models to apply
original_size (tuple): Original image dimensions
Returns:
str: Path to the final processed image
"""
if not models:
return None
# Create output directory
os.makedirs('temp_outputs', exist_ok=True)
# Process the image with selected models
print("---------using tools---------")
print("CUDA runtime version:", torch.version.cuda)
res = tool_engine.process_image(models, image_path, 'temp_outputs')
print("---------resizing back---------")
# Resize back to original dimensions
final_result = resize_image_to_original(res['output_path'], original_size)
return final_result
@spaces.GPU(duration=150)
def process_full_pipeline(image):
"""
Main processing pipeline with streaming UI updates
Args:
image (str): Path to the input image
Yields:
tuple: (chat_history, processed_image) for Gradio UI updates
"""
if image is None:
return [], None
# Get original image size for later restoration
original_img = Image.open(image)
original_size = original_img.size
# Initialize chat history for UI
chat_history = [("Image uploaded for analysis", None)]
# Step 1: Get streaming LLM response
streamer = get_llm_response_streaming(image)
# Stream the response to UI with real-time updates
full_response = ""
in_reason = False
in_answer = False
reason_displayed = False
answer_displayed = False
reasoning_added = False # Track if reasoning entry was added
for new_text in streamer:
full_response += new_text
# Check if we're entering reason section or if we need to start showing content
if ('' in full_response and not in_reason and not reason_displayed) or (not reasoning_added and not in_reason and not reason_displayed):
in_reason = True
reasoning_added = True
if '' in full_response:
# Extract content after
reason_start = full_response.find('') + len('')
reason_content = full_response[reason_start:].strip()
else:
# Show all content as reasoning if no tag yet
reason_content = full_response.strip()
# Add reasoning to chat history
chat_history.append((None, f"**🤔 Analysis & Reasoning:**\n\n{reason_content}"))
yield chat_history, None
# If we're in reason section, update content
elif in_reason and not reason_displayed:
# Check if reason section is complete
if '' in full_response:
# Extract complete reason content
reason_start = full_response.find('') + len('')
reason_end = full_response.find('')
reason_content = full_response[reason_start:reason_end].strip()
# Update chat history with complete reason
chat_history[1] = (None, f"**🤔 Analysis & Reasoning:**\n\n{reason_content}")
reason_displayed = True
in_reason = False
yield chat_history, None
else:
# Continue streaming reason content
if '' in full_response:
reason_start = full_response.find('') + len('')
reason_content = full_response[reason_start:].strip()
else:
reason_content = full_response.strip()
# Update chat history with partial reason
chat_history[1] = (None, f"**🤔 Analysis & Reasoning:**\n\n{reason_content}")
yield chat_history, None
# Check if we're entering answer section
elif '' in full_response and not in_answer and not answer_displayed and reason_displayed:
in_answer = True
# Extract content after
answer_start = full_response.find('') + len('')
answer_content = full_response[answer_start:]
# Add partial answer to chat history
models = extract_models_from_answer(answer_content)
beautified = beautify_recommended_actions(answer_content, models)
chat_history.append((None, beautified))
yield chat_history, None
# If we're in answer section, update content
elif in_answer and not answer_displayed:
# Check if answer section is complete
if '' in full_response:
# Extract complete answer content
answer_start = full_response.find('') + len('')
answer_end = full_response.find('')
answer_content = full_response[answer_start:answer_end].strip()
# Parse and process final answer
models = extract_models_from_answer(answer_content)
beautified = beautify_recommended_actions(answer_content, models)
chat_history[2] = (None, beautified)
answer_displayed = True
in_answer = False
yield chat_history, None
# Process image with tools
if models:
chat_history.append((None, "**🔄 Processing image...**"))
yield chat_history, None
processed_image = process_image_with_tools(image, models, original_size)
chat_history[-1] = (None, "**✅ Processing Complete!**")
yield chat_history, processed_image
return
else:
chat_history.append((None, "**❌ No valid models found in the response**"))
yield chat_history, None
return
else:
# Continue streaming answer content
answer_start = full_response.find('') + len('')
answer_content = full_response[answer_start:].strip()
# Update chat history with partial answer
models = extract_models_from_answer(answer_content)
beautified = beautify_recommended_actions(answer_content, models)
chat_history[2] = (None, beautified)
yield chat_history, None
# Fallback if streaming completes without proper tags
if not answer_displayed:
reason, answer = parse_llm_response(full_response)
models = extract_models_from_answer(answer)
chat_history = [
("Image uploaded for analysis", None),
(None, f"**🤔 Analysis & Reasoning:**\n\n{reason}"),
(None, beautify_recommended_actions(answer, models))
]
if models:
chat_history.append((None, "**🔄 Processing image...**"))
yield chat_history, None
processed_image = process_image_with_tools(image, models, original_size)
chat_history[-1] = (None, "**✅ Processing Complete!**")
yield chat_history, processed_image
else:
chat_history.append((None, "**❌ No valid models found in the response**"))
yield chat_history, None
# Create Gradio interface
def create_interface():
"""
Create and configure the Gradio web interface
Returns:
gr.Blocks: Configured Gradio interface
"""
with gr.Blocks(title="JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration", theme=gr.themes.Soft()) as demo:
# Header with logo and title
gr.Markdown("""
#
JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration
Upload an image and let JarvisIR analyze its degradation and recommend the best restoration tools!
""")
with gr.Row():
with gr.Column(scale=1):
# Input image upload component
input_image = gr.Image(
type="filepath",
label="📸 Upload Your Image",
height=400
)
# Process button
process_btn = gr.Button(
"🚀 Analyze & Process",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Chat interface to show analysis
chatbot = gr.Chatbot(
label="💬 AI Analysis Chat",
height=400,
show_label=True,
bubble_full_width=False
)
with gr.Row():
# Output image display
output_image = gr.Image(
label="✨ Processed Result",
height=300
)
# Connect event handler for the process button
process_btn.click(
fn=process_full_pipeline,
inputs=[input_image],
outputs=[chatbot, output_image]
)
# Instructions section
gr.Markdown("### 📝 Instructions:")
gr.Markdown("""
1. **Upload an image** that needs restoration (blurry, dark, noisy, etc.)
2. **Or click on sample images** below to try with pre-loaded examples
3. **Click 'Analyze & Process'** to let JarvisIR analyze the image
4. **View the chat** to see JarvisIR's reasoning and recommendations in real-time
5. **Check the result** - processed image restored to original dimensions
""")
# Add examples section at the bottom
gr.Examples(
examples=[
["example/img4.jpg"],
["example/img10.png"],
["example/img12.png"],
["example/img13.jpg"]
],
inputs=input_image,
label="🖼️ Example Images - Click to Try!"
)
return demo
if __name__ == "__main__":
print("Starting Image Restoration Assistant...")
demo = create_interface()
# Launch the Gradio app on specified host and port
demo.launch(
server_name="0.0.0.0",
share=False
)