Spaces:

HBDing
/

DreamRenderer

Sleeping

App Files Files Community

Longxiang-ai commited on May 23

Commit

0274afd

0 Parent(s):

Initial commit: DreamRenderer with Zero GPU support

Browse files

Files changed (6) hide show

.gitignore +49 -0
README.md +57 -0
app.py +565 -0
bbox_component.html +353 -0
dream_renderer.py +312 -0
requirements.txt +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,49 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyTorch
+*.pth
+*.pt
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+# Temporary files
+*.tmp
+*.temp
+# Test files (不需要上传到生产环境)
+test_*.py
+*_test.py

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+title: DreamRenderer
+emoji: 🎨
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+hardware: zero-gpu-medium
+---
+# DreamRenderer: Multi-Instance Attribute Control 🎨
+A powerful Gradio implementation of DreamRenderer for precise multi-instance attribute control in text-to-image generation, powered by **Zero GPU** for fast inference.
+## ✨ Features
+- 🖼️ **Interactive Bounding Box Drawing**: 直观的画布界面，轻松绘制多个区域
+- 🎯 **Multi-Instance Attribute Control**: 为每个区域设置独特的生成内容
+- ⚡ **Zero GPU Acceleration**: 利用Hugging Face的Zero GPU获得极速推理
+- 🚀 **FLUX Model Support**: 支持最新的FLUX扩散模型
+- 🎨 **Real-time Preview**: 实时预览边界框和生成参数
+## 🚀 Quick Start
+1. 在画布上拖拽鼠标绘制边界框
+2. 为每个边界框添加详细的描述
+3. 设置全局提示词和生成参数
+4. 点击生成按钮，享受AI创作的乐趣！
+## 🛠️ Technical Details
+- **Frontend**: Gradio 4.44.0
+- **Backend**: PyTorch + Diffusers
+- **Model**: FLUX-based diffusion model
+- **Acceleration**: Hugging Face Zero GPU
+- **Memory**: Optimized for GPU memory efficiency
+## 💡 Usage Tips
+- 描述越详细，生成效果越好
+- 可以为不同区域设置完全不同的内容
+- 调整推理步数和引导强度来控制生成质量
+- 使用种子功能可以获得可重现的结果
+## 🎯 Perfect for
+- 复杂场景构图
+- 多物体图像生成
+- 精确的空间布局控制
+- 创意设计和艺术创作
+---
+**注意**: 此应用使用Zero GPU资源，首次加载可能需要几秒钟时间进行模型初始化。

app.py ADDED Viewed

	@@ -0,0 +1,565 @@

+import gradio as gr
+import spaces
+import torch
+import numpy as np
+from PIL import Image, ImageDraw
+import json
+import base64
+import io
+from typing import List, Dict, Tuple, Optional
+import warnings
+from dream_renderer import DreamRendererPipeline
+warnings.filterwarnings("ignore")
+# 全局变量
+pipeline = None
+current_bbox_data = []
+@spaces.GPU
+def initialize_pipeline():
+    """初始化DreamRenderer管道"""
+    global pipeline
+    try:
+        if pipeline is None:
+            pipeline = DreamRendererPipeline()
+            # 预加载模型以节省时间
+            success = pipeline.load_model()
+            if success:
+                return "✅ DreamRenderer管道已成功初始化并加载模型！"
+            else:
+                return "⚠️ DreamRenderer管道已初始化，但模型加载失败。将使用演示模式。"
+        else:
+            return "✅ DreamRenderer管道已经初始化完成！"
+    except Exception as e:
+        return f"❌ 初始化失败: {str(e)}"
+def load_bbox_component():
+    """加载边界框绘制组件"""
+    try:
+        with open('bbox_component_fixed.html', 'r', encoding='utf-8') as f:
+            return f.read()
+    except FileNotFoundError:
+        # 如果修复版本不存在，使用原版本
+        with open('bbox_component.html', 'r', encoding='utf-8') as f:
+            return f.read()
+def update_bbox_data(bbox_json: str):
+    """更新边界框数据显示"""
+    global current_bbox_data
+    try:
+        if not bbox_json or bbox_json.strip() == "":
+            current_bbox_data = []
+            return "📦 暂无边界框数据\n\n💡 提示：在画布上拖拽鼠标绘制边界框", ""
+        bbox_data = json.loads(bbox_json)
+        current_bbox_data = bbox_data  # 重要：更新全局变量
+        if not bbox_data:
+            current_bbox_data = []
+            return "📦 暂无边界框数据\n\n💡 提示：在画布上拖拽鼠标绘制边界框", ""
+        info_lines = [
+            f"📦 边界框数据 ({len(bbox_data)} 个)",
+            "=" * 40,
+            ""
+        ]
+        # 生成边界框编辑界面HTML
+        edit_html_lines = [
+            '<div style="max-height: 400px; overflow-y: auto; padding: 10px; border: 1px solid #ddd; border-radius: 8px; background: #f9f9f9;">',
+            '<h4 style="color: #333; margin-top: 0;">🎯 边界框描述编辑</h4>'
+        ]
+        for i, bbox in enumerate(bbox_data, 1):
+            x = bbox.get('x', 0)
+            y = bbox.get('y', 0)
+            width = bbox.get('width', 0)
+            height = bbox.get('height', 0)
+            label = bbox.get('label', f'区域{i}')
+            prompt = bbox.get('prompt', '')  # 获取已有的提示词
+            info_lines.extend([
+                f"🎯 边界框 {i}:",
+                f"   📍 位置: ({x:.3f}, {y:.3f})",
+                f"   📏 大小: {width:.3f} × {height:.3f}",
+                f"   🏷️  标签: {label}",
+                f"   💬 描述: {prompt or '(请在下方输入描述)'}",
+                ""
+            ])
+            # 为每个边界框生成编辑界面
+            color = f"hsl({(i-1) * 60}, 70%, 50%)"
+            edit_html_lines.extend([
+                f'<div style="margin: 15px 0; padding: 15px; border-left: 4px solid {color}; background: white; border-radius: 8px;">',
+                f'  <div style="display: flex; align-items: center; margin-bottom: 10px;">',
+                f'    <div style="width: 20px; height: 20px; background: {color}; border-radius: 4px; margin-right: 10px;"></div>',
+                f'    <strong style="color: #333;">边界框 {i} - {label}</strong>',
+                f'    <span style="margin-left: auto; font-size: 0.9em; color: #666;">({x:.2f}, {y:.2f}) {width:.2f}×{height:.2f}</span>',
+                f'  </div>',
+                f'  <div style="margin-bottom: 8px;">',
+                f'    <label style="display: block; font-weight: bold; color: #555; margin-bottom: 5px;">🏷️ 区域标签:</label>',
+                f'    <input type="text" id="bbox_label_{i-1}" value="{label}" placeholder="为这个区域命名..." ',
+                f'           style="width: 100%; padding: 8px; border: 1px solid #ddd; border-radius: 4px; font-size: 14px;" ',
+                f'           onchange="updateBboxField({i-1}, \'label\', this.value)">',
+                f'  </div>',
+                f'  <div style="margin-bottom: 8px;">',
+                f'    <label style="display: block; font-weight: bold; color: #555; margin-bottom: 5px;">💬 详细描述:</label>',
+                f'    <textarea id="bbox_prompt_{i-1}" placeholder="描述这个区域应该生成什么内容..." ',
+                f'              style="width: 100%; height: 80px; padding: 8px; border: 1px solid #ddd; border-radius: 4px; font-size: 14px; resize: vertical;" ',
+                f'              onchange="updateBboxField({i-1}, \'prompt\', this.value)">{prompt}</textarea>',
+                f'  </div>',
+                f'  <div style="text-align: right;">',
+                f'    <button onclick="deleteBbox({i-1})" style="background: #ff4757; color: white; border: none; padding: 6px 12px; border-radius: 4px; cursor: pointer; font-size: 12px;">',
+                f'      🗑️ 删除此框',
+                f'    </button>',
+                f'  </div>',
+                f'</div>'
+            ])
+        edit_html_lines.extend([
+            '<div style="margin-top: 20px; padding: 15px; background: #e8f5e8; border-radius: 8px;">',
+            '  <div style="display: flex; justify-content: space-between; align-items: center;">',
+            '    <div>',
+            f'      <strong style="color: #2d5a2d;">✅ 共 {len(bbox_data)} 个边界框</strong>',
+            '      <br><small style="color: #5a5a5a;">修改描述后会自动保存</small>',
+            '    </div>',
+            '    <button onclick="clearAllBboxes()" style="background: #ff6b6b; color: white; border: none; padding: 8px 16px; border-radius: 6px; cursor: pointer;">',
+            '      🗑️ 清空所有',
+            '    </button>',
+            '  </div>',
+            '</div>',
+            '</div>'
+        ])
+        info_lines.extend([
+            "💡 使用说明:",
+            "• 在画布上拖拽绘制新的边界框",
+            "• 在右侧为每个框输入具体描述",
+            "• 每个框可以有不同的生成内容",
+            "• 描述越详细，生成效果越好"
+        ])
+        print(f"DEBUG: 边界框数据已更新: {len(current_bbox_data)}个")  # 调试信息
+        return "\n".join(info_lines), "\n".join(edit_html_lines)
+    except json.JSONDecodeError:
+        current_bbox_data = []
+        return f"❌ 边界框数据格式错误\n\n原始数据: {bbox_json[:200]}...", ""
+    except Exception as e:
+        current_bbox_data = []
+        return f"❌ 处理边界框数据时出错: {str(e)}", ""
+@spaces.GPU
+def generate_image_with_bbox(prompt: str, negative_prompt: str,
+                           num_inference_steps: int, guidance_scale: float,
+                           width: int, height: int, seed: int, use_seed: bool):
+    """使用边界框生成图像"""
+    global pipeline, current_bbox_data
+    if pipeline is None:
+        return None, "❌ 请先初始化DreamRenderer管道！"
+    if not prompt.strip():
+        return None, "❌ 请输入提示词！"
+    try:
+        # 设置种子
+        actual_seed = seed if use_seed else None
+        # 生成图像
+        image = pipeline.generate_image(
+            prompt=prompt,
+            bbox_data=current_bbox_data,
+            negative_prompt=negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            width=width,
+            height=height,
+            seed=actual_seed
+        )
+        info = f"✅ 图像生成成功！\n"
+        info += f"🔸 使用边界框: {len(current_bbox_data)}个\n"
+        info += f"🔸 推理步数: {num_inference_steps}\n"
+        info += f"🔸 引导强度: {guidance_scale}\n"
+        info += f"🔸 图像尺寸: {width}×{height}\n"
+        if actual_seed is not None:
+            info += f"🔸 随机种子: {actual_seed}"
+        return image, info
+    except Exception as e:
+        return None, f"❌ 生成图像时出错: {str(e)}"
+def create_interface():
+    """创建Gradio界面"""
+    # 自定义CSS
+    css = """
+    .main-container {
+        max-width: 1400px;
+        margin: 0 auto;
+    }
+    .bbox-container {
+        border: 2px solid #e1e5e9;
+        border-radius: 12px;
+        padding: 20px;
+        margin: 15px 0;
+        background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
+    }
+    .generate-btn {
+        background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
+        border: none;
+        border-radius: 25px;
+        padding: 15px 35px;
+        color: white;
+        font-weight: bold;
+        font-size: 18px;
+        box-shadow: 0 4px 15px rgba(0,0,0,0.2);
+        transition: all 0.3s ease;
+    }
+    .generate-btn:hover {
+        transform: translateY(-2px);
+        box-shadow: 0 6px 20px rgba(0,0,0,0.3);
+    }
+    .init-btn {
+        background: linear-gradient(45deg, #667eea, #764ba2);
+        border: none;
+        border-radius: 20px;
+        color: white;
+        font-weight: bold;
+        padding: 12px 25px;
+    }
+    .parameter-group {
+        background: #f8f9fa;
+        border-radius: 10px;
+        padding: 15px;
+        margin: 10px 0;
+    }
+    """
+    with gr.Blocks(css=css, title="DreamRenderer - Multi-Instance Control", theme=gr.themes.Soft()) as demo:
+        gr.HTML("""
+        <div style="text-align: center; padding: 20px;">
+            <h1 style="background: linear-gradient(45deg, #FF6B6B, #4ECDC4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 3em; margin-bottom: 10px;">
+                🎨 DreamRenderer
+            </h1>
+            <h2 style="color: #666; margin-bottom: 20px;">Multi-Instance Attribute Control</h2>
+            <p style="font-size: 1.2em; color: #888; max-width: 800px; margin: 0 auto;">
+                基于ZeroGPU的高质量多实例属性控制文本到图像生成工具
+            </p>
+        </div>
+        """)
+        # 使用说明
+        with gr.Accordion("📖 使用说明", open=False):
+            gr.Markdown("""
+            ### 🚀 快速开始：
+            1. **初始化**: 点击"初始化DreamRenderer"按钮加载模型
+            2. **绘制区域**: 在画布上拖拽鼠标绘制边界框
+            3. **添加描述**: 为每个边界框输入描述文本
+            4. **设置参数**: 调整生成参数（可选）
+            5. **生成图像**: 输入主提示词并点击生成
+            ### ✨ 功能特点：
+            - 🎯 **精确控制**: 通过边界框精确控制每个实例的位置和属性
+            - 🚀 **ZeroGPU加速**: 利用Hugging Face的ZeroGPU实现快速推理
+            - 🎨 **高质量生成**: 基于FLUX模型的高质量图像生成
+            - 🔧 **灵活参数**: 丰富的参数调节选项
+            """)
+        with gr.Row():
+            # 左侧：边界框绘制和控制
+            with gr.Column(scale=1):
+                # 初始化部分
+                with gr.Group():
+                    gr.Markdown("### 🚀 模型初始化")
+                    init_btn = gr.Button("🚀 初始化DreamRenderer", variant="primary", elem_classes=["init-btn"])
+                    init_status = gr.Textbox(label="初始化状态", interactive=False, lines=2)
+                # 边界框绘制区域
+                with gr.Group():
+                    gr.Markdown("### 📦 边界框绘制")
+                    gr.HTML("""
+                    <div style="background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); padding: 10px; border-radius: 8px; margin: 10px 0;">
+                        <p style="margin: 0; color: #1976d2;"><strong>步骤1:</strong> 在画布上拖拽鼠标绘制边界框</p>
+                        <p style="margin: 5px 0 0 0; color: #1976d2;"><strong>步骤2:</strong> 在右侧为每个框输入详细描述</p>
+                    </div>
+                    """)
+                    bbox_component = gr.HTML(load_bbox_component(), elem_classes=["bbox-container"])
+                    # 隐藏的输入框用于接收边界框数据
+                    bbox_data_input = gr.Textbox(visible=False, elem_id="bbox_data")
+                    bbox_info = gr.Textbox(label="📦 边界框信息", interactive=False, lines=8, placeholder="边界框信息将在这里显示...")
+            # 右侧：边界框编辑和生成参数
+            with gr.Column(scale=1):
+                # 边界框编辑区域
+                with gr.Group():
+                    gr.Markdown("### ✏️ 边界框描述编辑")
+                    bbox_editor = gr.HTML(
+                        value="<div style='text-align: center; padding: 40px; color: #666;'>绘制边界框后，编辑界面将出现在这里</div>",
+                        elem_id="bbox_editor"
+                    )
+                # 提示词设置
+                with gr.Group():
+                    gr.Markdown("### 📝 提示词设置")
+                    prompt = gr.Textbox(
+                        label="主提示词",
+                        placeholder="描述你想要生成的整体场景...",
+                        lines=3,
+                        value="a beautiful landscape"
+                    )
+                    negative_prompt = gr.Textbox(
+                        label="负向提示词",
+                        placeholder="描述你不想看到的内容...",
+                        lines=2,
+                        value="blurry, low quality, distorted"
+                    )
+                # 生成参数
+                with gr.Group():
+                    gr.Markdown("### ⚙️ 生成参数")
+                    with gr.Row():
+                        num_steps = gr.Slider(
+                            minimum=1, maximum=100, value=20, step=1,
+                            label="推理步数",
+                            info="更多步数通常能获得更好的质量"
+                        )
+                        guidance_scale = gr.Slider(
+                            minimum=1.0, maximum=30.0, value=7.5, step=0.5,
+                            label="引导强度",
+                            info="控制对提示词的遵循程度"
+                        )
+                    with gr.Row():
+                        width = gr.Slider(
+                            minimum=256, maximum=1024, value=512, step=64,
+                            label="宽度"
+                        )
+                        height = gr.Slider(
+                            minimum=256, maximum=1024, value=512, step=64,
+                            label="高度"
+                        )
+                    with gr.Row():
+                        use_seed = gr.Checkbox(label="使用固定种子", value=False)
+                        seed = gr.Number(label="随机种子", value=42, precision=0)
+                # 生成按钮
+                generate_btn = gr.Button(
+                    "🎨 生成图像",
+                    variant="primary",
+                    elem_classes=["generate-btn"],
+                    size="lg"
+                )
+                # 结果显示
+                with gr.Group():
+                    gr.Markdown("### 🖼️ 生成结果")
+                    output_image = gr.Image(label="生成的图像", height=500, show_label=False)
+                    generation_info = gr.Textbox(label="生成信息", interactive=False, lines=6)
+        # 示例和更多选项
+        with gr.Accordion("🎯 示例和技巧", open=False):
+            gr.Markdown("""
+            ### 📌 提示词示例：
+            - **风景场景**: "a serene mountain landscape with a lake, golden hour lighting"
+            - **城市场景**: "modern city skyline at sunset, futuristic architecture"
+            - **人物场景**: "a group of people in a park, casual clothing, natural lighting"
+            ### 🎨 使用技巧：
+            1. **边界框大小**: 合适的边界框大小有助于更好的控制效果
+            2. **描述精确性**: 为每个区域提供具体而精确的描述
+            3. **参数调节**: 较高的引导强度可以提高对提示词的遵循度
+            4. **种子控制**: 使用固定种子可以获得可重复的结果
+            """)
+        # 事件绑定
+        init_btn.click(
+            fn=initialize_pipeline,
+            outputs=init_status,
+            show_progress=True
+        )
+        bbox_data_input.change(
+            fn=update_bbox_data,
+            inputs=bbox_data_input,
+            outputs=[bbox_info, bbox_editor]
+        )
+        generate_btn.click(
+            fn=generate_image_with_bbox,
+            inputs=[prompt, negative_prompt, num_steps, guidance_scale, width, height, seed, use_seed],
+            outputs=[output_image, generation_info],
+            show_progress=True
+        )
+        # JavaScript代码用于处理边界框数据通信
+        demo.load(None, None, None, js="""
+        function() {
+            console.log('DreamRenderer界面已加载');
+            // 全局变量
+            let isDrawing = false;
+            let startX, startY, currentRect;
+            let bboxes = [];
+            const canvas = document.getElementById('bboxCanvas');
+            if (!canvas) {
+                console.error('画布元素未找到');
+                return;
+            }
+            const ctx = canvas.getContext('2d');
+            // 清除之前的监听器并添加新的
+            canvas.replaceWith(canvas.cloneNode(true));
+            const newCanvas = document.getElementById('bboxCanvas');
+            const newCtx = newCanvas.getContext('2d');
+            // 重新设置样式
+            newCanvas.style.display = 'block';
+            newCanvas.style.border = '2px solid #4ECDC4';
+            newCanvas.style.backgroundColor = 'white';
+            newCanvas.style.cursor = 'crosshair';
+            newCanvas.style.borderRadius = '8px';
+            // 边界框编辑函数
+            window.updateBboxField = function(index, field, value) {
+                if (index >= 0 && index < bboxes.length) {
+                    bboxes[index][field] = value;
+                    console.log(`更新边界框 ${index} 的 ${field}:`, value);
+                    updateBboxData();
+                }
+            };
+            window.deleteBbox = function(index) {
+                if (index >= 0 && index < bboxes.length) {
+                    bboxes.splice(index, 1);
+                    console.log(`删除边界框 ${index}`);
+                    redrawCanvas();
+                    updateBboxData();
+                }
+            };
+            window.clearAllBboxes = function() {
+                bboxes = [];
+                console.log('清空所有边界框');
+                redrawCanvas();
+                updateBboxData();
+            };
+            // 重绘画布
+            function redrawCanvas() {
+                newCtx.clearRect(0, 0, newCanvas.width, newCanvas.height);
+                bboxes.forEach((bbox, index) => {
+                    newCtx.strokeStyle = `hsl(${index * 60}, 70%, 50%)`;
+                    newCtx.lineWidth = 2;
+                    newCtx.strokeRect(bbox.x, bbox.y, bbox.width, bbox.height);
+                    // 绘制标签
+                    newCtx.fillStyle = `hsl(${index * 60}, 70%, 50%)`;
+                    newCtx.font = '12px Arial';
+                    newCtx.fillText(bbox.label || `区域${index + 1}`, bbox.x + 5, bbox.y - 5);
+                });
+            }
+            // 更新边界框数据
+            function updateBboxData() {
+                const relativeBboxes = bboxes.map(b => ({
+                    x: b.x / newCanvas.width,
+                    y: b.y / newCanvas.height,
+                    width: b.width / newCanvas.width,
+                    height: b.height / newCanvas.height,
+                    label: b.label || '',
+                    prompt: b.prompt || ''
+                }));
+                const dataString = JSON.stringify(relativeBboxes);
+                console.log('📤 更新数据:', relativeBboxes.length, '个边界框');
+                const textarea = document.querySelector('#bbox_data textarea');
+                if (textarea) {
+                    textarea.value = dataString;
+                    textarea.dispatchEvent(new Event('input', { bubbles: true }));
+                }
+            }
+            // 添加绘制事件监听器
+            newCanvas.addEventListener('mousedown', function(e) {
+                isDrawing = true;
+                startX = e.offsetX;
+                startY = e.offsetY;
+                console.log('🎯 开始绘制:', startX, startY);
+            });
+            newCanvas.addEventListener('mousemove', function(e) {
+                if (!isDrawing) return;
+                const currentX = e.offsetX;
+                const currentY = e.offsetY;
+                // 清除画布并重绘所有边界框
+                redrawCanvas();
+                // 绘制当前正在绘制的框
+                newCtx.strokeStyle = '#007bff';
+                newCtx.lineWidth = 2;
+                newCtx.setLineDash([5, 5]);
+                const width = currentX - startX;
+                const height = currentY - startY;
+                newCtx.strokeRect(startX, startY, width, height);
+                newCtx.setLineDash([]);
+            });
+            newCanvas.addEventListener('mouseup', function(e) {
+                if (!isDrawing) return;
+                isDrawing = false;
+                const endX = e.offsetX;
+                const endY = e.offsetY;
+                const width = endX - startX;
+                const height = endY - startY;
+                // 只有当框足够大时才添加
+                if (Math.abs(width) > 10 && Math.abs(height) > 10) {
+                    const bbox = {
+                        x: Math.min(startX, endX),
+                        y: Math.min(startY, endY),
+                        width: Math.abs(width),
+                        height: Math.abs(height),
+                        label: `区域${bboxes.length + 1}`,
+                        prompt: ''
+                    };
+                    bboxes.push(bbox);
+                    console.log('✅ 添加边界框:', bbox);
+                    redrawCanvas();
+                    updateBboxData();
+                }
+                console.log('🎯 绘制结束，当前边界框数量:', bboxes.length);
+            });
+            console.log('🚀 DreamRenderer边界框功能已就绪！');
+        }
+        """)
+    return demo
+if __name__ == "__main__":
+    # 创建并启动应用
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        show_api=False,
+        favicon_path=None,
+        show_error=True
+    )

bbox_component.html ADDED Viewed

	@@ -0,0 +1,353 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <style>
+        .canvas-container {
+            position: relative;
+            display: inline-block;
+            border: 2px solid #ddd;
+            border-radius: 8px;
+            overflow: hidden;
+            background-color: #f8f9fa;
+        }
+        .bbox-canvas {
+            cursor: crosshair;
+            display: block;
+            background-color: white;
+        }
+        .bbox-list {
+            margin-top: 10px;
+            padding: 10px;
+            background-color: #f8f9fa;
+            border-radius: 5px;
+            max-height: 200px;
+            overflow-y: auto;
+        }
+        .bbox-item {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            padding: 5px;
+            margin: 2px 0;
+            background-color: white;
+            border-radius: 3px;
+            border-left: 4px solid #007bff;
+        }
+        .bbox-input {
+            width: 150px;
+            padding: 2px 5px;
+            border: 1px solid #ddd;
+            border-radius: 3px;
+        }
+        .delete-btn {
+            background-color: #dc3545;
+            color: white;
+            border: none;
+            padding: 2px 8px;
+            border-radius: 3px;
+            cursor: pointer;
+            font-size: 12px;
+        }
+        .delete-btn:hover {
+            background-color: #c82333;
+        }
+        .clear-btn {
+            background-color: #6c757d;
+            color: white;
+            border: none;
+            padding: 5px 15px;
+            border-radius: 3px;
+            cursor: pointer;
+            margin-top: 10px;
+        }
+        .clear-btn:hover {
+            background-color: #5a6268;
+        }
+        .color-indicator {
+            width: 20px;
+            height: 20px;
+            border-radius: 3px;
+            border: 2px solid white;
+            box-shadow: 0 0 3px rgba(0,0,0,0.3);
+        }
+        .info-text {
+            margin: 10px 0;
+            padding: 8px;
+            background-color: #e3f2fd;
+            border-radius: 4px;
+            font-size: 14px;
+            color: #1976d2;
+        }
+        .debug-info {
+            margin: 10px 0;
+            padding: 8px;
+            background-color: #fff3cd;
+            border-radius: 4px;
+            font-size: 12px;
+            color: #856404;
+            font-family: monospace;
+        }
+    </style>
+</head>
+<body>
+    <div class="info-text">
+        💡 拖拽鼠标在画布上绘制边界框，然后为每个框添加描述
+    </div>
+    <div class="canvas-container">
+        <canvas id="bboxCanvas" class="bbox-canvas" width="512" height="512"></canvas>
+    </div>
+    <script>
+        console.log('边界框组件已加载');
+        const canvas = document.getElementById('bboxCanvas');
+        const ctx = canvas.getContext('2d');
+        const debugInfo = document.getElementById('debugInfo');
+        let isDrawing = false;
+        let startX, startY;
+        let boxes = [];
+        let currentBox = null;
+        const colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8', '#F7DC6F'];
+        let colorIndex = 0;
+        // 添加调试日志函数
+        function log(message) {
+            console.log(message);
+            debugInfo.textContent = `调试: ${message}`;
+        }
+        // 初始化画布
+        function initCanvas() {
+            ctx.fillStyle = '#ffffff';
+            ctx.fillRect(0, 0, canvas.width, canvas.height);
+            ctx.strokeStyle = '#ddd';
+            ctx.lineWidth = 1;
+            ctx.strokeRect(0, 0, canvas.width, canvas.height);
+            log('画布已初始化');
+        }
+        // 事件监听器
+        canvas.addEventListener('mousedown', startDrawing);
+        canvas.addEventListener('mousemove', draw);
+        canvas.addEventListener('mouseup', stopDrawing);
+        canvas.addEventListener('mouseleave', stopDrawing); // 添加鼠标离开事件
+        function startDrawing(e) {
+            isDrawing = true;
+            const rect = canvas.getBoundingClientRect();
+            startX = e.clientX - rect.left;
+            startY = e.clientY - rect.top;
+            log(`开始绘制: (${Math.round(startX)}, ${Math.round(startY)})`);
+        }
+        function draw(e) {
+            if (!isDrawing) return;
+            const rect = canvas.getBoundingClientRect();
+            const currentX = e.clientX - rect.left;
+            const currentY = e.clientY - rect.top;
+            redrawCanvas();
+            // 绘制当前正在绘制的框
+            ctx.strokeStyle = colors[colorIndex % colors.length];
+            ctx.lineWidth = 2;
+            ctx.setLineDash([5, 5]);
+            ctx.strokeRect(startX, startY, currentX - startX, currentY - startY);
+            ctx.setLineDash([]);
+        }
+        function stopDrawing(e) {
+            if (!isDrawing) return;
+            isDrawing = false;
+            const rect = canvas.getBoundingClientRect();
+            const endX = e.clientX - rect.left;
+            const endY = e.clientY - rect.top;
+            const width = Math.abs(endX - startX);
+            const height = Math.abs(endY - startY);
+            if (width > 10 && height > 10) {
+                const box = {
+                    x: Math.min(startX, endX),
+                    y: Math.min(startY, endY),
+                    width: width,
+                    height: height,
+                    color: colors[colorIndex % colors.length],
+                    label: '',
+                    id: Date.now()
+                };
+                boxes.push(box);
+                colorIndex++;
+                addBoxToList(box);
+                redrawCanvas();
+                updateOutput();
+                log(`添加边界框: ${boxes.length}个`);
+            } else {
+                redrawCanvas();
+                log('边界框太小，已忽略');
+            }
+        }
+        function redrawCanvas() {
+            // 清除画布并重新绘制背景
+            ctx.fillStyle = '#ffffff';
+            ctx.fillRect(0, 0, canvas.width, canvas.height);
+            // 绘制所有边界框
+            boxes.forEach((box, index) => {
+                ctx.strokeStyle = box.color;
+                ctx.lineWidth = 2;
+                ctx.setLineDash([]);
+                ctx.strokeRect(box.x, box.y, box.width, box.height);
+                // 绘制标签
+                if (box.label) {
+                    ctx.fillStyle = box.color;
+                    ctx.font = '14px Arial';
+                    ctx.fillText(box.label, box.x, box.y - 5);
+                }
+                // 绘制索引号
+                ctx.fillStyle = box.color;
+                ctx.font = 'bold 12px Arial';
+                ctx.fillText(`${index + 1}`, box.x + 3, box.y + 15);
+            });
+        }
+        function addBoxToList(box) {
+            const bboxItems = document.getElementById('bboxItems');
+            const item = document.createElement('div');
+            item.className = 'bbox-item';
+            item.id = `bbox-item-${box.id}`;
+            item.innerHTML = `
+                <div style="display: flex; align-items: center; gap: 10px;">
+                    <div class="color-indicator" style="background-color: ${box.color}"></div>
+                    <input type="text" class="bbox-input" placeholder="输入描述..."
+                           onchange="updateBoxLabel(${box.id}, this.value)"
+                           oninput="updateBoxLabel(${box.id}, this.value)">
+                    <span style="font-size: 12px; color: #666;">
+                        (${Math.round(box.x)}, ${Math.round(box.y)}, ${Math.round(box.width)}, ${Math.round(box.height)})
+                    </span>
+                </div>
+                <button class="delete-btn" onclick="deleteBox(${box.id})">删除</button>
+            `;
+            bboxItems.appendChild(item);
+        }
+        function updateBoxLabel(boxId, label) {
+            const box = boxes.find(b => b.id === boxId);
+            if (box) {
+                box.label = label;
+                redrawCanvas();
+                updateOutput();
+                log(`更新标签: ${label}`);
+            }
+        }
+        function deleteBox(boxId) {
+            const oldLength = boxes.length;
+            boxes = boxes.filter(b => b.id !== boxId);
+            redrawBboxList();
+            redrawCanvas();
+            updateOutput();
+            log(`删除边界框: ${oldLength} -> ${boxes.length}`);
+        }
+        function clearAllBoxes() {
+            boxes = [];
+            redrawBboxList();
+            redrawCanvas();
+            updateOutput();
+            log('清除所有边界框');
+        }
+        function redrawBboxList() {
+            const bboxItems = document.getElementById('bboxItems');
+            bboxItems.innerHTML = '';
+            boxes.forEach(box => addBoxToList(box));
+        }
+        function updateOutput() {
+            try {
+                // 将边界框数据传递给Gradio
+                const boxData = boxes.map(box => ({
+                    x: box.x / canvas.width,  // 归一化坐标
+                    y: box.y / canvas.height,
+                    width: box.width / canvas.width,
+                    height: box.height / canvas.height,
+                    label: box.label || ''
+                }));
+                const dataString = JSON.stringify(boxData);
+                log(`发送数据: ${boxData.length}个边界框`);
+                // 直接查找Gradio输入框（因为组件直接嵌入在页面中）
+                const bboxInput = document.querySelector('#bbox_data textarea');
+                if (bboxInput) {
+                    bboxInput.value = dataString;
+                    // 触发多种事件确保Gradio能检测到变化
+                    bboxInput.dispatchEvent(new Event('input', { bubbles: true }));
+                    bboxInput.dispatchEvent(new Event('change', { bubbles: true }));
+                    bboxInput.dispatchEvent(new Event('blur', { bubbles: true }));
+                    log('直接更新Gradio输入框成功');
+                } else {
+                    // 如果直接查找失败，尝试延迟查找
+                    setTimeout(() => {
+                        const delayedBboxInput = document.querySelector('#bbox_data textarea') ||
+                                                document.querySelector('[data-testid="textbox"] textarea');
+                        if (delayedBboxInput) {
+                            delayedBboxInput.value = dataString;
+                            delayedBboxInput.dispatchEvent(new Event('input', { bubbles: true }));
+                            delayedBboxInput.dispatchEvent(new Event('change', { bubbles: true }));
+                            log('延迟更新Gradio输入框成功');
+                        } else {
+                            log('未找到Gradio输入框');
+                        }
+                    }, 500);
+                }
+                // 同时触发自定义事件作为备用
+                document.dispatchEvent(new CustomEvent('bbox_data_update', {
+                    detail: { data: boxData, dataString: dataString }
+                }));
+            } catch (error) {
+                log(`更新输出时出错: ${error.message}`);
+                console.error('updateOutput error:', error);
+            }
+        }
+        // 接收来自Gradio的图片更新
+        window.addEventListener('message', function(event) {
+            if (event.data && event.data.type === 'update_image') {
+                const img = new Image();
+                img.onload = function() {
+                    canvas.width = img.width;
+                    canvas.height = img.height;
+                    ctx.drawImage(img, 0, 0);
+                    redrawCanvas();
+                    log('图片已更新');
+                };
+                img.src = event.data.imageUrl;
+            }
+        });
+        // 页面加载完成后初始化
+        window.addEventListener('load', function() {
+            initCanvas();
+            log('组件已就绪');
+        });
+        // 立即初始化
+        initCanvas();
+    </script>
+</body>
+</html>

dream_renderer.py ADDED Viewed

	@@ -0,0 +1,312 @@

+"""
+DreamRenderer实现模块
+"""
+import torch
+import torch.nn.functional as F
+from diffusers import FluxPipeline
+from PIL import Image, ImageDraw
+import numpy as np
+from typing import List, Dict, Optional, Tuple
+import spaces
+class DreamRendererPipeline:
+    """
+    DreamRenderer管道实现
+    """
+    def __init__(self, model_id: str = "black-forest-labs/FLUX.1-dev"):
+        """
+        初始化DreamRenderer管道
+        Args:
+            model_id: 使用的模型ID
+        """
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model_id = model_id
+        self.pipe = None
+        self.loaded = False
+    def load_model(self):
+        """加载FLUX模型"""
+        try:
+            print(f"正在加载模型: {self.model_id}")
+            self.pipe = FluxPipeline.from_pretrained(
+                self.model_id,
+                torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.float32,
+                use_safetensors=True
+            )
+            self.pipe = self.pipe.to(self.device)
+            # 启用内存高效的注意力机制
+            if hasattr(self.pipe, 'enable_xformers_memory_efficient_attention'):
+                self.pipe.enable_xformers_memory_efficient_attention()
+            self.loaded = True
+            print("模型加载完成！")
+            return True
+        except Exception as e:
+            print(f"模型加载失败: {str(e)}")
+            self.loaded = False
+            return False
+    def create_layout_mask(self, bbox_data: List[Dict], width: int, height: int) -> torch.Tensor:
+        """
+        根据边界框数据创建布局掩码
+        Args:
+            bbox_data: 边界框数据列表
+            width: 图像宽度
+            height: 图像高度
+        Returns:
+            布局掩码张量
+        """
+        mask = torch.zeros((height, width), dtype=torch.float32)
+        for i, bbox in enumerate(bbox_data):
+            x = int(bbox['x'] * width)
+            y = int(bbox['y'] * height)
+            w = int(bbox['width'] * width)
+            h = int(bbox['height'] * height)
+            # 在掩码中标记区域
+            mask[y:y+h, x:x+w] = i + 1
+        return mask
+    def create_attention_mask(self, bbox_data: List[Dict], width: int, height: int) -> List[torch.Tensor]:
+        """
+        为每个实例创建注意力掩码
+        Args:
+            bbox_data: 边界框数据列表
+            width: 图像宽度
+            height: 图像高度
+        Returns:
+            注意力掩码列表
+        """
+        masks = []
+        for bbox in bbox_data:
+            mask = torch.zeros((height, width), dtype=torch.float32)
+            x = int(bbox['x'] * width)
+            y = int(bbox['y'] * height)
+            w = int(bbox['width'] * width)
+            h = int(bbox['height'] * height)
+            # 创建软边界的掩码
+            mask[y:y+h, x:x+w] = 1.0
+            # 应用高斯模糊以创建软边界
+            if torch.cuda.is_available():
+                mask = mask.unsqueeze(0).unsqueeze(0).cuda()
+                mask = F.avg_pool2d(mask, kernel_size=3, stride=1, padding=1)
+                mask = mask.squeeze().cpu()
+            masks.append(mask)
+        return masks
+    def modify_attention_weights(self, attention_weights: torch.Tensor,
+                               attention_masks: List[torch.Tensor],
+                               current_token_idx: int) -> torch.Tensor:
+        """
+        修改注意力权重以实现区域控制
+        Args:
+            attention_weights: 原始注意力权重
+            attention_masks: 注意力掩码列表
+            current_token_idx: 当前token索引
+        Returns:
+            修改后的注意力权重
+        """
+        # 这里实现DreamRenderer的核心注意力修改逻辑
+        # 根据当前token和对应的区域掩码调整注意力权重
+        if current_token_idx < len(attention_masks):
+            mask = attention_masks[current_token_idx]
+            # 将掩码应用到注意力权重
+            if mask.device != attention_weights.device:
+                mask = mask.to(attention_weights.device)
+            # 增强对应区域的注意力
+            attention_weights = attention_weights * (1 + mask * 0.5)
+        return attention_weights
+    @spaces.GPU
+    def generate_image(self,
+                      prompt: str,
+                      bbox_data: List[Dict],
+                      negative_prompt: str = "",
+                      num_inference_steps: int = 20,
+                      guidance_scale: float = 7.5,
+                      width: int = 512,
+                      height: int = 512,
+                      seed: Optional[int] = None) -> Image.Image:
+        """
+        生成图像的主要函数
+        Args:
+            prompt: 主提示词
+            bbox_data: 边界框数据
+            negative_prompt: 负向提示词
+            num_inference_steps: 推理步数
+            guidance_scale: 引导强度
+            width: 图像宽度
+            height: 图像高度
+            seed: 随机种子
+        Returns:
+            生成的图像
+        """
+        if not self.loaded:
+            if not self.load_model():
+                # 如果模型加载失败，返回一个演示图像
+                return self._create_demo_image(prompt, bbox_data, width, height)
+        # 设置随机种子
+        if seed is not None:
+            generator = torch.Generator(device=self.device).manual_seed(seed)
+        else:
+            generator = None
+        try:
+            # 构建完整的提示词
+            full_prompt = self._build_full_prompt(prompt, bbox_data)
+            # 如果没有边界框数据，直接使用标准生成
+            if not bbox_data:
+                image = self.pipe(
+                    prompt=full_prompt,
+                    negative_prompt=negative_prompt,
+                    num_inference_steps=num_inference_steps,
+                    guidance_scale=guidance_scale,
+                    width=width,
+                    height=height,
+                    generator=generator
+                ).images[0]
+            else:
+                # 使用DreamRenderer的区域控制逻辑
+                image = self._generate_with_bbox_control(
+                    full_prompt, bbox_data, negative_prompt,
+                    num_inference_steps, guidance_scale,
+                    width, height, generator
+                )
+            return image
+        except Exception as e:
+            print(f"生成图像时出错: {str(e)}")
+            # 返回演示图像
+            return self._create_demo_image(prompt, bbox_data, width, height)
+    def _build_full_prompt(self, main_prompt: str, bbox_data: List[Dict]) -> str:
+        """构建包含区域描述的完整提示词"""
+        full_prompt = main_prompt
+        if bbox_data:
+            region_descriptions = []
+            for i, bbox in enumerate(bbox_data):
+                if bbox['label']:
+                    region_descriptions.append(f"{bbox['label']}")
+            if region_descriptions:
+                full_prompt += ", " + ", ".join(region_descriptions)
+        return full_prompt
+    def _generate_with_bbox_control(self, prompt: str, bbox_data: List[Dict],
+                                   negative_prompt: str, num_inference_steps: int,
+                                   guidance_scale: float, width: int, height: int,
+                                   generator: Optional[torch.Generator]) -> Image.Image:
+        """使用边界框控制生成图像"""
+        # 创建注意力掩码
+        attention_masks = self.create_attention_mask(bbox_data, width, height)
+        # 这里应该实现DreamRenderer的核心算法
+        # 包括注意力修改、交叉注意力控制等
+        # 现在先用标准方法生成，后续可以替换为实际的DreamRenderer实现
+        image = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            width=width,
+            height=height,
+            generator=generator
+        ).images[0]
+        # 在生成的图像上绘制边界框作为演示
+        image = self._add_bbox_overlay(image, bbox_data)
+        return image
+    def _add_bbox_overlay(self, image: Image.Image, bbox_data: List[Dict]) -> Image.Image:
+        """在图像上添加边界框覆盖层（用于演示）"""
+        if not bbox_data:
+            return image
+        draw = ImageDraw.Draw(image)
+        colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'pink', 'cyan']
+        for i, bbox in enumerate(bbox_data):
+            color = colors[i % len(colors)]
+            x = int(bbox['x'] * image.width)
+            y = int(bbox['y'] * image.height)
+            w = int(bbox['width'] * image.width)
+            h = int(bbox['height'] * image.height)
+            # 绘制边界框
+            draw.rectangle([x, y, x+w, y+h], outline=color, width=2)
+            # 绘制标签
+            if bbox['label']:
+                draw.text((x, y-15), bbox['label'], fill=color)
+        return image
+    def _create_demo_image(self, prompt: str, bbox_data: List[Dict],
+                          width: int, height: int) -> Image.Image:
+        """创建演示图像（当模型加载失败时使用）"""
+        # 创建一个渐变背景
+        image = Image.new('RGB', (width, height))
+        draw = ImageDraw.Draw(image)
+        # 绘制渐变背景
+        for y in range(height):
+            color_value = int(255 * (y / height))
+            color = (100 + color_value//3, 150 + color_value//4, 200 + color_value//5)
+            draw.line([(0, y), (width, y)], fill=color)
+        # 添加提示词文本
+        draw.text((10, 10), f"Prompt: {prompt}", fill='white')
+        draw.text((10, 30), "DreamRenderer Demo", fill='white')
+        # 绘制边界框
+        colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange']
+        for i, bbox in enumerate(bbox_data):
+            color = colors[i % len(colors)]
+            x = int(bbox['x'] * width)
+            y = int(bbox['y'] * height)
+            w = int(bbox['width'] * width)
+            h = int(bbox['height'] * height)
+            # 绘制边界框
+            draw.rectangle([x, y, x+w, y+h], outline=color, width=3)
+            # 绘制标签
+            if bbox['label']:
+                draw.text((x, y-20), bbox['label'], fill=color)
+        return image

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio>=4.40.0
+spaces
+torch>=2.0.0
+torchvision
+diffusers>=0.21.0
+transformers>=4.30.0
+accelerate
+pillow
+numpy
+opencv-python-headless