Spaces:

rajmethun0
/

Data_Extractor_Using_Gemini

Sleeping

methunraj commited on 14 days ago

Commit

cfeb3a6

1 Parent(s): e09cfd6

feat: initialize project structure with core components

build: add Dockerfile and CI/CD configuration
docs: update README with installation and usage instructions
style: format code and add consistent file structure

Files changed (25) hide show

.claude/settings.local.json +19 -0
.dockerignore +71 -0
.gitignore +149 -0
.gradio/certificate.pem +31 -0
.vercel/project.json +1 -0
Dockerfile +166 -0
README.md +157 -6
TERMINAL_README.md +230 -0
__init__.py +0 -0
app.py +2367 -0
config/__init__.py +0 -0
config/prompt_gallery.json +30 -0
config/settings.py +54 -0
models/__init__.py +1 -0
models/data_models.py +153 -0
prompt_gallery.json +30 -0
requirements.txt +17 -0
settings.py +54 -0
static/terminal.html +588 -0
terminal_stream.py +205 -0
utils/__init__.py +0 -0
utils/file_handler.py +101 -0
utils/logger.py +30 -0
workflow/__init__.py +0 -0
workflow/financial_workflow.py +505 -0

.claude/settings.local.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "permissions": {
+    "allow": [
+      "Bash(mkdir:*)",
+      "Bash(python test:*)",
+      "Bash(/usr/local/bin/python3:*)",
+      "Bash(ls:*)",
+      "Bash(rm:*)",
+      "Bash(python:*)",
+      "Bash(find:*)",
+      "mcp__zen__analyze",
+      "Bash(pkill:*)",
+      "Bash(touch:*)",
+      "Bash(docker build:*)",
+      "Bash(/dev/null)"
+    ],
+    "deny": []
+  }
+}

.dockerignore ADDED Viewed

	@@ -0,0 +1,71 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Environment
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Logs
+*.log
+logs/
+app.log
+# Cache
+.ruff_cache/
+.pytest_cache/
+.coverage
+# Temporary files
+temp/
+tmp/
+# Git
+.git/
+.gitignore
+# Documentation
+README.md
+*.md
+# Claude
+.claude/

.gitignore ADDED Viewed

	@@ -0,0 +1,149 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.env.production
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+# Application specific
+logs/
+temp/
+*.log

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

.vercel/project.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"projectName":"trae_tl66rxeh"}

Dockerfile ADDED Viewed

	@@ -0,0 +1,166 @@

+# Use the official Python 3.11 slim image for better compatibility
+FROM python:3.11-slim
+# Set environment variables for optimal Python and Gradio behavior
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PIP_NO_CACHE_DIR=1
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+ENV DEBIAN_FRONTEND=noninteractive
+# Create app user for security (but run as root for HF Spaces compatibility)
+RUN useradd --create-home --shell /bin/bash app
+# Set the working directory inside the container
+WORKDIR /app
+# Install system dependencies required for multi-user AI application
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    # Build tools
+    build-essential \
+    gcc \
+    g++ \
+    make \
+    cmake \
+    pkg-config \
+    # Network and download tools
+    curl \
+    wget \
+    git \
+    # Development libraries
+    libffi-dev \
+    libssl-dev \
+    # Image processing libraries
+    libjpeg-dev \
+    libpng-dev \
+    libfreetype6-dev \
+    libtiff5-dev \
+    libopenjp2-7-dev \
+    # Document processing libraries
+    libxml2-dev \
+    libxslt1-dev \
+    zlib1g-dev \
+    # OCR and PDF processing
+    tesseract-ocr \
+    tesseract-ocr-eng \
+    poppler-utils \
+    # SQLite for session storage
+    sqlite3 \
+    libsqlite3-dev \
+    # Cleanup
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -rf /var/cache/apt/*
+# Upgrade pip to latest version
+RUN python -m pip install --upgrade pip setuptools wheel
+# Set pip configuration for better performance and reliability
+RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \
+    && pip config set global.no-cache-dir true \
+    && pip config set global.disable-pip-version-check true
+# Copy the requirements file first to leverage Docker's build cache
+COPY requirements.txt .
+# Install Python dependencies with optimizations for concurrent usage
+RUN pip install --no-cache-dir --upgrade -r requirements.txt \
+    && pip install --no-cache-dir \
+        # Additional packages for multi-user support
+        gunicorn \
+        uvloop \
+        # Performance monitoring
+        psutil \
+    && pip list --outdated
+# Copy the rest of the application code
+COPY . .
+# Create comprehensive directory structure for multi-user application
+RUN mkdir -p \
+    # Core application directories
+    temp logs uploads downloads cache \
+    # Multi-user session directories (will be created dynamically)
+    /tmp/data_extractor_temp \
+    # WebSocket and terminal directories
+    static \
+    # Database directory for session storage
+    data \
+    && chmod -R 755 /app
+# Set optimized permissions for multi-user concurrent access
+RUN chmod -R 777 temp logs uploads downloads cache /tmp \
+    && chmod -R 755 static \
+    && chmod 755 app.py \
+    && chmod -R 755 config utils workflow models
+# Create non-root user but keep root permissions for HF Spaces
+RUN chown -R app:app /app \
+    && chown -R app:app /tmp/data_extractor_temp
+# Set comprehensive environment variables for multi-user application
+ENV PYTHONPATH=/app
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+ENV GRADIO_SHARE=False
+ENV GRADIO_DEBUG=False
+# Matplotlib configuration for headless operation
+ENV MPLBACKEND=Agg
+ENV MPLCONFIGDIR=/tmp/mpl_cache
+# Optimize for multi-user concurrent access
+ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10
+ENV GRADIO_MAX_THREADS=20
+# WebSocket and networking configuration
+ENV WEBSOCKET_HOST=0.0.0.0
+ENV WEBSOCKET_PORT=8765
+# Session and temporary file configuration
+ENV TEMP_DIR=/tmp/data_extractor_temp
+ENV SESSION_TIMEOUT=1800
+ENV MAX_FILE_SIZE_MB=50
+# AI model configuration (will be overridden by user env vars)
+ENV COORDINATOR_MODEL=gemini-2.5-pro
+ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro
+ENV DATA_ARRANGER_MODEL=gemini-2.5-pro
+ENV CODE_GENERATOR_MODEL=gemini-2.5-pro
+# Security and performance settings
+ENV PYTHONSAFEPATH=1
+ENV PYTHONHASHSEED=random
+# Expose the port that the Gradio application will run on
+EXPOSE 7860
+EXPOSE 8765
+# Health check for container monitoring
+HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:7860/ || exit 1
+# Run as root for Hugging Face Spaces compatibility
+USER root
+# Create startup script for better error handling and logging
+RUN echo '#!/bin/bash\n\
+set -e\n\
+echo "🚀 Starting Data Extractor Multi-User Application..."\n\
+echo "📊 Python version: $(python --version)"\n\
+echo "🌐 Server: 0.0.0.0:7860"\n\
+echo "👥 Multi-user concurrency: Enabled"\n\
+echo "🔒 Session isolation: Active"\n\
+echo "💾 Temp directory: $TEMP_DIR"\n\
+\n\
+# Create runtime directories\n\
+mkdir -p "$TEMP_DIR"\n\
+mkdir -p /tmp/mpl_cache\n\
+chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\
+\n\
+# Start the application\n\
+exec python app.py\n\
+' > /app/start.sh && chmod +x /app/start.sh
+# The command to run when the container starts
+CMD ["/app/start.sh"]

README.md CHANGED Viewed

@@ -1,11 +1,162 @@
 ---
-title: Data Extractor Using Gemini
-emoji: 🐢
-colorFrom: yellow
-colorTo: blue
 sdk: docker
 pinned: false
-short_description: Document processing application built with Agno v1.7.4 featu
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Agno Document Analysis
+emoji: 📄
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+license: mit
 ---
+# Agno Document Analysis Workflow
+A sophisticated document processing application built with Agno v1.7.4 featuring a multi-agent workflow for intelligent document analysis and data extraction.
+## Features
+- **5-Agent Workflow**: Coordinator, Prompt Engineer, Data Extractor, Data Arranger, Code Generator
+- **Multi-format Support**: PDF, TXT, PNG, JPG, JPEG, DOCX, XLSX, CSV, MD, JSON, XML, HTML, PY, JS, TS, DOC, XLS, PPT, PPTX
+- **Real-time Processing**: Streaming interface with live updates
+- **Sandboxed Execution**: Safe code execution environment
+- **Beautiful UI**: Modern Gradio interface with custom animations
+## Quick Start
+### Automated Installation
+```bash
+# Clone the repository
+git clone <repository-url>
+cd Data_Extractor
+# Quick installation (recommended)
+./install.sh
+# Or use Python setup script
+python setup.py
+```
+### Manual Installation
+```bash
+# Create virtual environment
+python -m venv data_extractor_env
+source data_extractor_env/bin/activate  # On Windows: data_extractor_env\Scripts\activate
+# Install dependencies
+pip install -r requirements.txt
+# Create environment file
+cp .env.example .env  # Update with your API keys
+# Run the application
+python app.py
+```
+## Installation Options
+### Requirements Files
+- **`requirements-minimal.txt`**: Essential dependencies only (~50 packages)
+  ```bash
+  pip install -r requirements-minimal.txt
+  ```
+- **`requirements.txt`**: Complete feature set (~200+ packages)
+  ```bash
+  pip install -r requirements.txt
+  ```
+- **`requirements-dev.txt`**: Development dependencies with testing tools
+  ```bash
+  pip install -r requirements-dev.txt
+  ```
+### System Dependencies
+Some features require system-level dependencies:
+**macOS:**
+```bash
+brew install tesseract imagemagick poppler
+```
+**Ubuntu/Debian:**
+```bash
+sudo apt-get install tesseract-ocr libmagickwand-dev poppler-utils
+```
+**Windows:**
+```bash
+choco install tesseract imagemagick poppler
+```
+## Usage
+1. **Setup Environment**: Follow installation instructions above
+2. **Configure API Keys**: Update `.env` file with your API keys
+3. **Upload Document**: Support for 20+ file formats
+4. **Select Analysis**: Choose from predefined types or custom prompts
+5. **Process**: Watch the multi-agent workflow in real-time
+6. **Download Results**: Get structured data and generated Excel reports
+## Environment Variables
+Create a `.env` file with the following variables:
+```bash
+# Required API Keys
+GOOGLE_API_KEY=your_google_api_key_here
+OPENAI_API_KEY=your_openai_api_key_here  # Optional
+# Application Settings
+DEBUG=False
+LOG_LEVEL=INFO
+SESSION_TIMEOUT=3600
+# File Processing
+MAX_FILE_SIZE=50MB
+SUPPORTED_FORMATS=pdf,docx,xlsx,txt
+# Database (Optional)
+DATABASE_URL=sqlite:///data_extractor.db
+```
+## Advanced Features
+### Financial Document Processing
+- Comprehensive financial data extraction
+- 13-category data organization
+- Excel report generation with charts
+- XBRL and SEC filing support
+### OCR and Image Processing
+- EasyOCR and PaddleOCR integration
+- Tesseract OCR support
+- Advanced image preprocessing
+### Machine Learning Integration
+- TensorFlow and PyTorch support
+- Scikit-learn for data analysis
+- XGBoost and LightGBM for predictions
+## Troubleshooting
+For detailed troubleshooting and installation issues, see:
+- [`INSTALLATION.md`](INSTALLATION.md) - Comprehensive installation guide
+- [`FIXES_SUMMARY.md`](FIXES_SUMMARY.md) - Known issues and solutions
+### Common Issues
+1. **Import Errors**: Try minimal installation first
+2. **OCR Issues**: Install system dependencies
+3. **Memory Issues**: Use smaller batch sizes
+4. **API Errors**: Verify API keys in `.env` file
+## Docker Support
+```dockerfile
+# Build and run with Docker
+docker build -t data-extractor .
+docker run -p 7860:7860 --env-file .env data-extractor
+```

TERMINAL_README.md ADDED Viewed

	@@ -0,0 +1,230 @@

+# 🚀 Manus AI-Style Terminal Integration
+This document explains the real-time terminal streaming functionality added to the Data Extractor application.
+## 📋 Overview
+The terminal integration provides a **Manus AI-style terminal interface** with real-time command execution and streaming output, seamlessly integrated into the existing Gradio application.
+## 🏗️ Architecture
+### Components
+1. **WebSocket Server** (`terminal_stream.py`)
+   - Handles real-time communication between frontend and backend
+   - Manages command execution with streaming output
+   - Supports multiple concurrent connections
+   - Runs on port 8765
+2. **Frontend Terminal** (`static/terminal.html`)
+   - Beautiful Manus AI-inspired terminal interface
+   - Real-time output streaming via WebSocket
+   - Command history navigation
+   - Keyboard shortcuts and controls
+3. **Gradio Integration** (Modified `app.py`)
+   - Added terminal tab to existing interface
+   - Embedded terminal as iframe component
+   - Auto-starts WebSocket server on application launch
+## 🎨 Features
+### Terminal Interface
+- **Real-time Streaming**: Live command output as it happens
+- **Command History**: Navigate with ↑/↓ arrow keys
+- **Interrupt Support**: Ctrl+C to stop running commands
+- **Auto-reconnect**: Automatically reconnects on connection loss
+- **Status Indicators**: Visual connection and execution status
+- **Responsive Design**: Works on desktop and mobile
+### Security
+- **Command Sanitization**: Uses `shlex.split()` for safe command parsing
+- **Process Isolation**: Commands run in separate processes
+- **Error Handling**: Robust error handling and logging
+## 🚀 Usage
+### Starting the Application
+```bash
+python app.py
+```
+The terminal WebSocket server automatically starts on port 8765.
+### Accessing the Terminal
+1. Open the Gradio interface (usually http://localhost:7860)
+2. Click on the "💻 Terminal" tab
+3. Start typing commands in the terminal interface
+### Keyboard Shortcuts
+- **Enter**: Execute command
+- **↑/↓**: Navigate command history
+- **Ctrl+C**: Interrupt running command
+- **Ctrl+L**: Clear terminal screen
+- **Tab**: Command completion (planned feature)
+## 🔧 Configuration
+### WebSocket Server Settings
+```python
+# In terminal_stream.py
+WEBSOCKET_HOST = 'localhost'
+WEBSOCKET_PORT = 8765
+```
+### Terminal Appearance
+Customize the terminal appearance by modifying the CSS in `static/terminal.html`:
+```css
+/* Main terminal colors */
+.terminal-container {
+    background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
+}
+/* Command prompt */
+.prompt {
+    color: #58a6ff;
+}
+```
+## 📡 WebSocket API
+### Client → Server Messages
+#### Execute Command
+```json
+{
+    "type": "command",
+    "command": "ls -la"
+}
+```
+#### Interrupt Command
+```json
+{
+    "type": "interrupt"
+}
+```
+### Server → Client Messages
+#### Command Output
+```json
+{
+    "type": "output",
+    "data": "file1.txt\nfile2.txt",
+    "stream": "stdout",
+    "timestamp": "2024-01-01T12:00:00.000Z"
+}
+```
+#### Command Completion
+```json
+{
+    "type": "command_complete",
+    "exit_code": 0,
+    "message": "Process exited with code 0",
+    "timestamp": "2024-01-01T12:00:00.000Z"
+}
+```
+## 🛠️ Development
+### Adding New Features
+1. **Server-side**: Modify `terminal_stream.py`
+2. **Client-side**: Update `static/terminal.html`
+3. **Integration**: Adjust `app.py` if needed
+### Testing
+```bash
+# Test WebSocket server independently
+python -c "from terminal_stream import run_websocket_server; run_websocket_server()"
+# Test terminal interface
+# Open static/terminal.html in browser
+```
+## 🔍 Troubleshooting
+### Common Issues
+1. **WebSocket Connection Failed**
+   - Check if port 8765 is available
+   - Verify firewall settings
+   - Check server logs for errors
+2. **Commands Not Executing**
+   - Verify WebSocket connection status
+   - Check terminal logs for errors
+   - Ensure proper command syntax
+3. **Terminal Not Loading**
+   - Check if `static/terminal.html` exists
+   - Verify Gradio file serving configuration
+   - Check browser console for errors
+### Debug Mode
+Enable debug logging:
+```python
+import logging
+logging.getLogger('terminal_stream').setLevel(logging.DEBUG)
+```
+## 🚀 Advanced Usage
+### Custom Commands
+Add custom command handlers in `terminal_stream.py`:
+```python
+async def handle_custom_command(self, command):
+    if command.startswith('custom:'):
+        # Handle custom command
+        await self.broadcast({
+            'type': 'output',
+            'data': 'Custom command executed',
+            'stream': 'stdout'
+        })
+        return True
+    return False
+```
+### Integration with Workflow
+Stream workflow logs to terminal:
+```python
+# In workflow code
+from terminal_stream import terminal_manager
+async def log_to_terminal(message):
+    await terminal_manager.broadcast({
+        'type': 'output',
+        'data': message,
+        'stream': 'workflow'
+    })
+```
+## 📚 Dependencies
+- `websockets`: WebSocket server implementation
+- `asyncio`: Async programming support
+- `subprocess`: Command execution
+- `shlex`: Safe command parsing
+## 🎯 Future Enhancements
+- [ ] Command auto-completion
+- [ ] File upload/download via terminal
+- [ ] Terminal themes and customization
+- [ ] Multi-session support
+- [ ] Terminal recording and playback
+- [ ] Integration with workflow logging
+- [ ] SSH/remote terminal support
+## 📄 License
+This terminal implementation is part of the Data Extractor project and follows the same license terms.

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,2367 @@

+import gradio as gr
+import asyncio
+import json
+import time
+import os
+# Silence Matplotlib cache warnings on read-only filesystems
+os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl_cache")
+import logging
+from pathlib import Path
+import uuid
+from workflow.financial_workflow import FinancialDocumentWorkflow
+from agno.storage.sqlite import SqliteStorage
+from utils.file_handler import FileHandler
+from config.settings import settings
+import threading
+from queue import Queue
+import signal
+import sys
+import atexit
+from datetime import datetime, timedelta
+from terminal_stream import terminal_manager, run_websocket_server
+from collections import deque
+# Configure logging - Only INFO level and above, no httpcore/debug details
+# Use /tmp for file logging on Hugging Face Spaces or disable file logging if not writable
+import tempfile
+import os
+try:
+    # Try to create log file in /tmp directory (works on Hugging Face Spaces)
+    log_dir = "/tmp"
+    log_file = os.path.join(log_dir, "app.log")
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
+    )
+except (PermissionError, OSError):
+    # Fallback to console-only logging if file logging fails
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        handlers=[logging.StreamHandler()],
+    )
+# Disable httpcore and other verbose loggers
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("google").setLevel(logging.WARNING)
+logging.getLogger("google.auth").setLevel(logging.WARNING)
+logging.getLogger("google.api_core").setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
+# Auto-shutdown configuration
+INACTIVITY_TIMEOUT_MINUTES = 30  # Shutdown after 30 minutes of inactivity
+CHECK_INTERVAL_SECONDS = 60      # Check every minute
+class AutoShutdownManager:
+    """Manages automatic shutdown of the Gradio application."""
+    def __init__(self, timeout_minutes=INACTIVITY_TIMEOUT_MINUTES):
+        self.timeout_minutes = timeout_minutes
+        self.last_activity = datetime.now()
+        self.shutdown_timer = None
+        self.app_instance = None
+        self.is_shutting_down = False
+        # Setup signal handlers for graceful shutdown
+        signal.signal(signal.SIGINT, self._signal_handler)
+        signal.signal(signal.SIGTERM, self._signal_handler)
+        # Register cleanup function
+        atexit.register(self._cleanup)
+        logger.info(f"AutoShutdownManager initialized with {timeout_minutes} minute timeout")
+    def _signal_handler(self, signum, frame):
+        """Handle shutdown signals gracefully."""
+        logger.info(f"Received signal {signum}, initiating graceful shutdown...")
+        self._shutdown_server()
+        sys.exit(0)
+    def _cleanup(self):
+        """Cleanup function called on exit."""
+        if not self.is_shutting_down:
+            logger.info("Application cleanup initiated")
+            self._shutdown_server()
+    def update_activity(self):
+        """Update the last activity timestamp."""
+        self.last_activity = datetime.now()
+        logger.debug(f"Activity updated: {self.last_activity}")
+    def start_monitoring(self, app_instance):
+        """Start monitoring for inactivity."""
+        self.app_instance = app_instance
+        self._start_inactivity_timer()
+        logger.info("Inactivity monitoring started")
+    def _start_inactivity_timer(self):
+        """Start or restart the inactivity timer."""
+        if self.shutdown_timer:
+            self.shutdown_timer.cancel()
+        def check_inactivity():
+            if self.is_shutting_down:
+                return
+            time_since_activity = datetime.now() - self.last_activity
+            if time_since_activity > timedelta(minutes=self.timeout_minutes):
+                logger.info(f"No activity for {self.timeout_minutes} minutes, shutting down...")
+                self._shutdown_server()
+            else:
+                # Schedule next check
+                self._start_inactivity_timer()
+        self.shutdown_timer = threading.Timer(CHECK_INTERVAL_SECONDS, check_inactivity)
+        self.shutdown_timer.start()
+    def _shutdown_server(self):
+        """Shutdown the Gradio server gracefully."""
+        if self.is_shutting_down:
+            return
+        self.is_shutting_down = True
+        logger.info("Initiating server shutdown...")
+        try:
+            if self.shutdown_timer:
+                self.shutdown_timer.cancel()
+            if self.app_instance:
+                # Gradio doesn't have a direct shutdown method, so we'll use os._exit
+                logger.info("Shutting down Gradio application")
+                import os
+                os._exit(0)
+        except Exception as e:
+            logger.error(f"Error during shutdown: {e}")
+            import os
+            os._exit(1)
+# Global shutdown manager instance
+shutdown_manager = AutoShutdownManager()
+# Terminal Log Handler
+class TerminalLogHandler(logging.Handler):
+    """Custom logging handler that captures logs for terminal display."""
+    def __init__(self):
+        super().__init__()
+        self.logs = deque(maxlen=1000)  # Keep last 1000 log entries
+        self.session_logs = {}  # Per-session logs
+    def emit(self, record):
+        """Emit a log record."""
+        try:
+            # Skip some noisy logs but keep important ones
+            if record.levelname in ['DEBUG'] and record.name in ['httpcore', 'urllib3', 'requests']:
+                return
+            # Format the log message
+            message = record.getMessage()
+            # Skip empty or very short messages
+            if not message or len(message.strip()) < 3:
+                return
+            log_entry = {
+                'timestamp': datetime.fromtimestamp(record.created).strftime('%H:%M:%S'),
+                'level': record.levelname,
+                'message': message,
+                'logger': record.name,
+                'module': getattr(record, 'module', ''),
+                'funcName': getattr(record, 'funcName', '')
+            }
+            # Add to global logs
+            self.logs.append(log_entry)
+            # Add to session-specific logs if available
+            session_id = getattr(record, 'session_id', None)
+            if session_id:
+                if session_id not in self.session_logs:
+                    self.session_logs[session_id] = deque(maxlen=500)
+                self.session_logs[session_id].append(log_entry)
+        except Exception as e:
+            # Prevent logging errors from breaking the application
+            print(f"TerminalLogHandler error: {e}")
+            pass
+    def get_logs(self, session_id=None, limit=50):
+        """Get recent logs, optionally filtered by session."""
+        if session_id and session_id in self.session_logs:
+            logs = list(self.session_logs[session_id])[-limit:]
+        else:
+            logs = list(self.logs)[-limit:]
+        return logs
+    def get_logs_as_html(self, session_id=None, limit=50):
+        """Get logs formatted as HTML for terminal display."""
+        logs = self.get_logs(session_id, limit)
+        html_lines = []
+        for log in logs:
+            level_class = {
+                'DEBUG': 'system-line',
+                'INFO': 'output-line',
+                'WARNING': 'system-line',
+                'ERROR': 'error-line',
+                'CRITICAL': 'error-line'
+            }.get(log['level'], 'output-line')
+            html_lines.append(f'''
+            <div class="terminal-line {level_class}">
+                <span class="timestamp">{log['timestamp']}</span>
+                <span>[{log['level']}] {log['logger']}: {log['message']}</span>
+            </div>
+            ''')
+        return ''.join(html_lines)
+# Global terminal log handler
+terminal_log_handler = TerminalLogHandler()
+# Configure the terminal handler log level
+terminal_log_handler.setLevel(logging.DEBUG)
+# Add the terminal handler to the root logger and specific loggers
+root_logger = logging.getLogger()
+root_logger.addHandler(terminal_log_handler)
+root_logger.setLevel(logging.DEBUG)  # Capture more logs
+# Also add to specific workflow loggers
+workflow_logger = logging.getLogger('workflow')
+workflow_logger.addHandler(terminal_log_handler)
+workflow_logger.setLevel(logging.DEBUG)
+agno_logger = logging.getLogger('agno')
+agno_logger.addHandler(terminal_log_handler)
+agno_logger.setLevel(logging.DEBUG)
+utils_logger = logging.getLogger('utils')
+utils_logger.addHandler(terminal_log_handler)
+utils_logger.setLevel(logging.DEBUG)
+# Keep httpx at INFO level to avoid spam
+httpx_logger = logging.getLogger('httpx')
+httpx_logger.addHandler(terminal_log_handler)
+httpx_logger.setLevel(logging.INFO)
+google_logger = logging.getLogger('google')
+google_logger.addHandler(terminal_log_handler)
+google_logger.setLevel(logging.INFO)
+# Prompt Gallery Loader
+class PromptGallery:
+    """Manages loading and accessing prompt gallery from JSON configuration."""
+    def __init__(self):
+        self.prompts = {}
+        self.load_prompts()
+    def load_prompts(self):
+        """Load prompts from JSON configuration file."""
+        try:
+            prompt_file = Path(settings.TEMP_DIR).parent / "config" / "prompt_gallery.json"
+            if prompt_file.exists():
+                with open(prompt_file, 'r', encoding='utf-8') as f:
+                    self.prompts = json.load(f)
+                logger.info(f"Loaded prompt gallery with {len(self.prompts.get('categories', {}))} categories")
+            else:
+                logger.warning(f"Prompt gallery file not found: {prompt_file}")
+                self.prompts = {"categories": {}}
+        except Exception as e:
+            logger.error(f"Error loading prompt gallery: {e}")
+            self.prompts = {"categories": {}}
+    def get_categories(self):
+        """Get all available prompt categories."""
+        return self.prompts.get('categories', {})
+    def get_prompts_for_category(self, category_id):
+        """Get all prompts for a specific category."""
+        return self.prompts.get('categories', {}).get(category_id, {}).get('prompts', [])
+    def get_prompt_by_id(self, category_id, prompt_id):
+        """Get a specific prompt by category and prompt ID."""
+        prompts = self.get_prompts_for_category(category_id)
+        for prompt in prompts:
+            if prompt.get('id') == prompt_id:
+                return prompt
+        return None
+# Global prompt gallery instance
+prompt_gallery = PromptGallery()
+# Custom CSS for beautiful multi-agent streaming interface
+custom_css = """
+/* Main container styling */
+.main-container {
+    max-width: 1400px;
+    margin: 0 auto;
+}
+/* Dynamic Single-Panel Workflow Layout */
+.workflow-progress-nav {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    background: var(--background-fill-secondary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 12px;
+    padding: 16px;
+    margin: 16px 0;
+    gap: 8px;
+}
+.progress-nav-item {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    padding: 12px 16px;
+    border-radius: 8px;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    flex: 1;
+    text-align: center;
+    position: relative;
+}
+.progress-nav-item.pending {
+    background: rgba(107, 114, 128, 0.1);
+    color: var(--body-text-color-subdued);
+}
+.progress-nav-item.active {
+    background: rgba(59, 130, 246, 0.1);
+    color: #3b82f6;
+    border: 2px solid #3b82f6;
+}
+.progress-nav-item.current {
+    background: rgba(102, 126, 234, 0.2);
+    color: #667eea;
+    border: 2px solid #667eea;
+    transform: scale(1.05);
+}
+.progress-nav-item.completed {
+    background: rgba(16, 185, 129, 0.1);
+    color: #10b981;
+    border: 2px solid #10b981;
+}
+.progress-nav-item.clickable:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+}
+.nav-icon {
+    font-size: 24px;
+    margin-bottom: 8px;
+}
+.nav-label {
+    font-size: 12px;
+    font-weight: 600;
+    margin-bottom: 4px;
+}
+.nav-status {
+    font-size: 10px;
+    opacity: 0.7;
+}
+.active-agent-panel {
+    background: var(--background-fill-secondary);
+    border: 2px solid var(--border-color-primary);
+    border-radius: 16px;
+    margin: 16px 0;
+    overflow: hidden;
+    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
+    transition: all 0.3s ease;
+}
+.agent-panel-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    padding: 20px 24px;
+    background: linear-gradient(135deg, var(--background-fill-primary) 0%, var(--background-fill-secondary) 100%);
+    border-bottom: 1px solid var(--border-color-primary);
+}
+.agent-info {
+    display: flex;
+    align-items: center;
+    gap: 16px;
+}
+.agent-icon-large {
+    font-size: 32px;
+    padding: 12px;
+    background: var(--background-fill-primary);
+    border-radius: 12px;
+    border: 2px solid var(--border-color-accent);
+}
+.agent-details h3.agent-title {
+    margin: 0 0 4px 0;
+    font-size: 20px;
+    font-weight: 700;
+    color: var(--body-text-color);
+}
+.agent-details p.agent-description {
+    margin: 0;
+    font-size: 14px;
+    color: var(--body-text-color-subdued);
+}
+.agent-status-badge {
+    padding: 8px 16px;
+    border-radius: 20px;
+    color: white;
+    font-weight: 600;
+    font-size: 12px;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+}
+.agent-content-area {
+    padding: 24px;
+    min-height: 200px;
+    max-height: 400px;
+    overflow-y: auto;
+}
+.agent-content {
+    font-family: var(--font-mono);
+    font-size: 14px;
+    line-height: 1.6;
+    color: var(--body-text-color);
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+.agent-content.streaming {
+    border-left: 3px solid #3b82f6;
+    padding-left: 12px;
+    background: rgba(59, 130, 246, 0.02);
+}
+.agent-waiting,
+.agent-starting,
+.agent-empty {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    height: 120px;
+    color: var(--body-text-color-subdued);
+    font-style: italic;
+    font-size: 16px;
+}
+.typing-cursor {
+    animation: blink 1s infinite;
+    color: #3b82f6;
+    font-weight: bold;
+}
+/* Legacy Multi-Agent Workflow Layout (kept for compatibility) */
+.workflow-container {
+    display: grid;
+    grid-template-columns: 1fr;
+    gap: 12px;
+    margin: 16px 0;
+}
+.agent-panel {
+    background: var(--background-fill-secondary);
+    border: 2px solid var(--border-color-primary);
+    border-radius: 12px;
+    padding: 16px;
+    margin: 8px 0;
+    transition: all 0.3s ease;
+    position: relative;
+    overflow: hidden;
+}
+.agent-panel.active {
+    border-color: var(--color-accent);
+    box-shadow: 0 4px 20px rgba(102, 126, 234, 0.2);
+    transform: translateY(-2px);
+}
+.agent-panel.completed {
+    border-color: var(--color-success);
+    background: rgba(17, 153, 142, 0.05);
+}
+.agent-panel.streaming {
+    border-color: var(--color-accent);
+    background: rgba(102, 126, 234, 0.05);
+}
+.agent-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    margin-bottom: 12px;
+    padding-bottom: 8px;
+    border-bottom: 1px solid var(--border-color-primary);
+}
+.agent-info {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+}
+.agent-icon {
+    font-size: 24px;
+    animation: pulse 2s infinite;
+}
+.agent-icon.active {
+    animation: bounce 1s infinite;
+}
+.agent-name {
+    font-size: 18px;
+    font-weight: 600;
+    color: var(--body-text-color);
+}
+.agent-description {
+    font-size: 14px;
+    color: var(--body-text-color-subdued);
+    margin-top: 4px;
+}
+.agent-status {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-size: 14px;
+    font-weight: 500;
+}
+.status-indicator {
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    animation: pulse 2s infinite;
+}
+.status-indicator.pending {
+    background: var(--color-neutral);
+}
+.status-indicator.starting {
+    background: var(--color-warning);
+    animation: flash 1s infinite;
+}
+.status-indicator.streaming {
+    background: var(--color-accent);
+    animation: pulse 1s infinite;
+}
+.status-indicator.completed {
+    background: var(--color-success);
+    animation: none;
+}
+.agent-thinking {
+    background: var(--background-fill-primary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 8px;
+    padding: 12px;
+    min-height: 120px;
+    max-height: 300px;
+    overflow-y: auto;
+    font-family: var(--font-mono);
+    font-size: 13px;
+    line-height: 1.5;
+    color: var(--body-text-color);
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+.agent-thinking.streaming {
+    border-color: var(--color-accent);
+    background: rgba(102, 126, 234, 0.02);
+}
+.agent-thinking.empty {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    color: var(--body-text-color-subdued);
+    font-style: italic;
+}
+.thinking-cursor {
+    display: inline-block;
+    width: 2px;
+    height: 16px;
+    background: var(--color-accent);
+    margin-left: 2px;
+    animation: blink 1s infinite;
+}
+/* Workflow Progress Overview */
+.workflow-progress {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    background: var(--background-fill-secondary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 8px;
+    padding: 16px;
+    margin: 16px 0;
+}
+.progress-step-mini {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 8px;
+    flex: 1;
+    position: relative;
+}
+.progress-step-mini::after {
+    content: '';
+    position: absolute;
+    top: 12px;
+    right: -50%;
+    width: 100%;
+    height: 2px;
+    background: var(--border-color-primary);
+    z-index: 1;
+}
+.progress-step-mini:last-child::after {
+    display: none;
+}
+.mini-icon {
+    font-size: 20px;
+    padding: 8px;
+    border-radius: 50%;
+    background: var(--background-fill-primary);
+    border: 2px solid var(--border-color-primary);
+    z-index: 2;
+    position: relative;
+}
+.mini-icon.active {
+    border-color: var(--color-accent);
+    background: var(--color-accent);
+    color: white;
+    animation: pulse 1s infinite;
+}
+.mini-icon.completed {
+    border-color: var(--color-success);
+    background: var(--color-success);
+    color: white;
+}
+.mini-label {
+    font-size: 12px;
+    font-weight: 500;
+    color: var(--body-text-color);
+    text-align: center;
+}
+/* Animations */
+@keyframes bounce {
+    0%, 20%, 50%, 80%, 100% { transform: translateY(0); }
+    40% { transform: translateY(-10px); }
+    60% { transform: translateY(-5px); }
+}
+@keyframes flash {
+    0%, 50%, 100% { opacity: 1; }
+    25%, 75% { opacity: 0.5; }
+}
+@keyframes blink {
+    0%, 50% { opacity: 1; }
+    51%, 100% { opacity: 0; }
+}
+@keyframes typewriter {
+    from { width: 0; }
+    to { width: 100%; }
+}
+/* Single step container styling */
+.single-step-container {
+    background: var(--background-fill-secondary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 8px;
+    padding: 16px;
+    margin: 8px 0;
+    font-family: var(--font-mono);
+}
+.steps-overview {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 8px;
+    margin-bottom: 16px;
+    padding-bottom: 12px;
+    border-bottom: 1px solid var(--border-color-primary);
+}
+.step-overview-item {
+    padding: 4px 8px;
+    border-radius: 4px;
+    font-size: 12px;
+    font-weight: 500;
+    background: var(--background-fill-primary);
+    border: 1px solid var(--border-color-primary);
+}
+.step-overview-item.current-step {
+    background: var(--color-accent);
+    color: white;
+    border-color: var(--color-accent);
+}
+.step-overview-item.completed-step {
+    background: var(--color-success);
+    color: white;
+    border-color: var(--color-success);
+    cursor: pointer;
+    transition: all 0.2s ease;
+}
+.step-overview-item.completed-step:hover {
+    transform: translateY(-1px);
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+}
+.step-overview-item.clickable {
+    cursor: pointer;
+    user-select: none;
+}
+.step-overview-item.other-step {
+    opacity: 0.7;
+}
+/* Content formatting styles */
+.code-content, .json-content, .text-content {
+    background: var(--background-fill-primary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 4px;
+    margin: 8px 0;
+}
+.code-header, .content-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    background: var(--background-fill-secondary);
+    padding: 8px 12px;
+    border-bottom: 1px solid var(--border-color-primary);
+    font-size: 12px;
+    font-weight: 600;
+}
+.code-label, .content-label {
+    color: var(--body-text-color);
+}
+.code-language, .content-type {
+    background: var(--color-accent);
+    color: white;
+    padding: 2px 6px;
+    border-radius: 3px;
+    font-size: 10px;
+}
+.code-block, .json-block, .text-block {
+    margin: 0;
+    padding: 12px;
+    font-family: var(--font-mono);
+    font-size: 12px;
+    line-height: 1.4;
+    overflow-x: auto;
+    background: var(--background-fill-primary);
+    color: var(--body-text-color);
+}
+.empty-content {
+    padding: 20px;
+    text-align: center;
+    color: var(--body-text-color-subdued);
+    font-style: italic;
+}
+/* New step content wrapper styles */
+.step-content-wrapper {
+    background: var(--background-fill-primary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 8px;
+    margin: 12px 0;
+    overflow: hidden;
+}
+.step-content-header {
+    background: var(--background-fill-secondary);
+    padding: 12px 16px;
+    border-bottom: 1px solid var(--border-color-primary);
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-weight: 600;
+    font-size: 14px;
+}
+.step-icon {
+    font-size: 18px;
+}
+.step-label {
+    color: var(--body-text-color);
+}
+.step-content-body {
+    padding: 16px;
+    line-height: 1.6;
+}
+.markdown-content {
+    font-family: var(--font-sans);
+    color: var(--body-text-color);
+}
+.markdown-content h1, .markdown-content h2, .markdown-content h3,
+.markdown-content h4, .markdown-content h5, .markdown-content h6 {
+    margin: 16px 0 8px 0;
+    font-weight: 600;
+    color: var(--body-text-color);
+}
+.markdown-content h1 { font-size: 24px; }
+.markdown-content h2 { font-size: 20px; }
+.markdown-content h3 { font-size: 18px; }
+.markdown-content h4 { font-size: 16px; }
+.markdown-content h5 { font-size: 14px; }
+.markdown-content h6 { font-size: 12px; }
+.markdown-content p {
+    margin: 8px 0;
+    color: var(--body-text-color);
+}
+.markdown-content li {
+    margin: 4px 0;
+    padding-left: 8px;
+    list-style-type: disc;
+    color: var(--body-text-color);
+}
+.markdown-content ul {
+    margin: 8px 0;
+    padding-left: 20px;
+}
+.markdown-content ol {
+    margin: 8px 0;
+    padding-left: 20px;
+}
+.markdown-content strong {
+    font-weight: 600;
+    color: var(--body-text-color);
+}
+.markdown-content em {
+    font-style: italic;
+    color: var(--body-text-color-subdued);
+}
+.markdown-content code {
+    background: var(--background-fill-secondary);
+    padding: 2px 4px;
+    border-radius: 3px;
+    font-family: var(--font-mono);
+    font-size: 13px;
+    color: var(--body-text-color);
+}
+.formatted-content {
+    font-family: var(--font-sans);
+    line-height: 1.6;
+    color: var(--body-text-color);
+}
+.error-content {
+    background: #fee;
+    border: 1px solid #fcc;
+    border-radius: 4px;
+    padding: 12px;
+    color: #c33;
+    font-family: var(--font-mono);
+    font-size: 12px;
+}
+/* Step type specific styling */
+.code-step .step-content-header {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+}
+.data-step .step-content-header {
+    background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
+    color: white;
+}
+.prompts-step .step-content-header {
+    background: linear-gradient(135deg, #ff6b6b 0%, #feca57 100%);
+    color: white;
+}
+.default-step .step-content-header {
+    background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
+    color: white;
+}
+.current-step-details {
+    background: var(--background-fill-primary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 4px;
+    padding: 12px;
+}
+.step-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 8px;
+    padding-bottom: 8px;
+    border-bottom: 1px solid var(--border-color-primary);
+}
+.step-title {
+    font-weight: 600;
+    font-size: 14px;
+    color: var(--body-text-color);
+}
+.step-progress {
+    font-size: 12px;
+    font-weight: 500;
+    color: var(--body-text-color-subdued);
+}
+.step-description {
+    font-size: 12px;
+    color: var(--body-text-color-subdued);
+    margin-bottom: 8px;
+    font-style: italic;
+}
+.step-content {
+    background: var(--background-fill-secondary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 4px;
+    padding: 12px;
+    margin-top: 8px;
+    max-height: 200px;
+    overflow-y: auto;
+}
+.step-content pre {
+    margin: 0;
+    font-family: var(--font-mono);
+    font-size: 12px;
+    line-height: 1.4;
+    color: var(--body-text-color);
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+/* Progress bar styling */
+.progress-container {
+    margin: 20px 0;
+}
+.progress-step {
+    display: flex;
+    align-items: center;
+    margin: 10px 0;
+    padding: 10px;
+    border-radius: 10px;
+    background: rgba(255, 255, 255, 0.05);
+    transition: all 0.3s ease;
+}
+.progress-step.active {
+    background: rgba(102, 126, 234, 0.2);
+    transform: scale(1.02);
+}
+.progress-step.completed {
+    background: rgba(17, 153, 142, 0.2);
+}
+.step-icon {
+    font-size: 24px;
+    margin-right: 15px;
+    animation: pulse 2s infinite;
+}
+@keyframes pulse {
+    0% { transform: scale(1); }
+    50% { transform: scale(1.1); }
+    100% { transform: scale(1); }
+}
+/* Fade in animation */
+.fade-in {
+    animation: fadeIn 0.5s ease-in;
+}
+@keyframes fadeIn {
+    from { opacity: 0; transform: translateY(20px); }
+    to { opacity: 1; transform: translateY(0); }
+}
+/* Typing indicator */
+.typing-indicator {
+    display: inline-block;
+    width: 20px;
+    height: 10px;
+}
+.typing-indicator span {
+    display: inline-block;
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: #667eea;
+    margin: 0 2px;
+    animation: typing 1.4s infinite ease-in-out;
+}
+.typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
+.typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
+@keyframes typing {
+    0%, 80%, 100% { transform: scale(0.8); opacity: 0.5; }
+    40% { transform: scale(1); opacity: 1; }
+}
+/* Header styling */
+.header-title {
+    font-size: 1.2rem;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    margin: 0;
+    text-align: left;
+    padding: 0.5rem 0;
+}
+/* Status indicators */
+.status-success {
+    color: #38ef7d;
+    font-weight: bold;
+}
+.status-error {
+    color: #ff6b6b;
+    font-weight: bold;
+}
+.status-processing {
+    color: #667eea;
+    font-weight: bold;
+}
+/* Download button styling */
+.download-section {
+    text-align: center;
+    margin: 20px 0;
+}
+.download-btn {
+    background: linear-gradient(135deg, #38ef7d, #11998e);
+    color: white;
+    border: none;
+    padding: 12px 24px;
+    border-radius: 8px;
+    font-size: 16px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    box-shadow: 0 4px 12px rgba(56, 239, 125, 0.3);
+}
+.download-btn:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 6px 16px rgba(56, 239, 125, 0.4);
+}
+.download-btn:active {
+    transform: translateY(2px);
+    box-shadow: 0 2px 6px rgba(56, 239, 125, 0.2);
+}
+/* Terminal Component Styling */
+.terminal-container {
+    display: flex;
+    flex-direction: column;
+    height: 750px;
+    background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
+    border: 1px solid #30363d;
+    border-radius: 8px;
+    font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+    overflow: hidden;
+    margin: 0;
+}
+.terminal-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 12px 16px;
+    background: #161b22;
+    border-bottom: 1px solid #30363d;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+}
+.terminal-title {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-size: 14px;
+    font-weight: 600;
+    color: #f0f6fc;
+}
+.terminal-icon {
+    width: 16px;
+    height: 16px;
+    background: #238636;
+    border-radius: 50%;
+    position: relative;
+}
+.terminal-icon::after {
+    content: '>';
+    position: absolute;
+    top: 50%;
+    left: 50%;
+    transform: translate(-50%, -50%);
+    font-size: 10px;
+    color: white;
+    font-weight: bold;
+}
+.terminal-controls {
+    display: flex;
+    gap: 8px;
+}
+.control-btn {
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    border: none;
+    cursor: pointer;
+    transition: opacity 0.2s;
+}
+.control-btn:hover {
+    opacity: 0.8;
+}
+.close { background: #ff5f56; }
+.minimize { background: #ffbd2e; }
+.maximize { background: #27ca3f; }
+.terminal-body {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+}
+.terminal-output {
+    flex: 1;
+    padding: 8px;
+    overflow-y: auto;
+    font-size: 10px;
+    line-height: 1.2;
+    background: #0d1117;
+    color: #c9d1d9;
+    scrollbar-width: thin;
+    scrollbar-color: #30363d #0d1117;
+    height: 100%;
+    word-wrap: break-word;
+    white-space: pre-wrap;
+}
+.terminal-output::-webkit-scrollbar {
+    width: 8px;
+}
+.terminal-output::-webkit-scrollbar-track {
+    background: #0d1117;
+}
+.terminal-output::-webkit-scrollbar-thumb {
+    background: #30363d;
+    border-radius: 4px;
+}
+.terminal-output::-webkit-scrollbar-thumb:hover {
+    background: #484f58;
+}
+.terminal-line {
+    margin-bottom: 1px;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+    display: block;
+    width: 100%;
+}
+.command-line {
+    color: #58a6ff;
+    font-weight: 600;
+}
+.output-line {
+    color: #c9d1d9;
+}
+.error-line {
+    color: #f85149;
+}
+.success-line {
+    color: #56d364;
+}
+.system-line {
+    color: #ffa657;
+    font-style: italic;
+}
+.timestamp {
+    color: #7d8590;
+    font-size: 8px;
+    margin-right: 4px;
+    display: inline-block;
+    min-width: 60px;
+}
+.terminal-input {
+    display: flex;
+    align-items: center;
+    padding: 12px 16px;
+    background: #161b22;
+    border-top: 1px solid #30363d;
+}
+.prompt {
+    color: #58a6ff;
+    margin-right: 8px;
+    font-weight: 600;
+}
+.input-field {
+    flex: 1;
+    background: transparent;
+    border: none;
+    color: #c9d1d9;
+    font-family: inherit;
+    font-size: 11px;
+    outline: none;
+}
+.input-field::placeholder {
+    color: #7d8590;
+}
+.status-indicator {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    margin-left: 12px;
+}
+.status-dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: #7d8590;
+    transition: background-color 0.3s;
+}
+.status-dot.connected {
+    background: #56d364;
+    box-shadow: 0 0 8px rgba(86, 211, 100, 0.5);
+}
+.status-dot.running {
+    background: #ffa657;
+    animation: pulse 1.5s infinite;
+}
+.status-dot.error {
+    background: #f85149;
+}
+@keyframes terminal-pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.5; }
+}
+/* Prompt Gallery Styling */
+.prompt-gallery {
+    background: var(--background-fill-secondary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 8px;
+    padding: 16px;
+    margin: 8px 0;
+}
+.prompt-card {
+    background: var(--background-fill-primary);
+    border: 1px solid var(--border-color-accent);
+    border-radius: 6px;
+    padding: 12px;
+    margin: 8px 0;
+    cursor: pointer;
+    transition: all 0.3s ease;
+}
+.prompt-card:hover {
+    background: var(--background-fill-secondary);
+    border-color: var(--color-accent);
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+}
+.prompt-card-header {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    margin-bottom: 8px;
+}
+.prompt-card-title {
+    font-weight: 600;
+    color: var(--body-text-color);
+    margin: 0;
+}
+.prompt-card-description {
+    color: var(--body-text-color-subdued);
+    font-size: 0.9em;
+    margin: 0;
+}
+.prompt-preview {
+    background: var(--background-fill-secondary);
+    border: 1px solid var(--border-color-primary);
+    border-radius: 4px;
+    padding: 8px;
+    margin-top: 8px;
+    font-size: 0.85em;
+    color: var(--body-text-color-subdued);
+    max-height: 100px;
+    overflow-y: auto;
+}
+.gallery-category {
+    margin-bottom: 16px;
+}
+.category-header {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    margin-bottom: 12px;
+    padding-bottom: 8px;
+    border-bottom: 2px solid var(--border-color-accent);
+}
+.category-title {
+    font-size: 1.1em;
+    font-weight: 600;
+    color: var(--body-text-color);
+    margin: 0;
+}
+.use-prompt-btn {
+    background: linear-gradient(135deg, #667eea, #764ba2);
+    color: white;
+    border: none;
+    padding: 6px 12px;
+    border-radius: 4px;
+    font-size: 0.85em;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    margin-top: 8px;
+}
+.use-prompt-btn:hover {
+    background: linear-gradient(135deg, #764ba2, #667eea);
+    transform: translateY(-1px);
+    box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3);
+}
+"""
+class WorkflowUI:
+    def __init__(self):
+        self.file_handler = FileHandler()
+        self.session_id = str(uuid.uuid4())[:8]  # Generate our own session ID
+        # Create workflow with database storage for caching
+        self.workflow = FinancialDocumentWorkflow(
+            session_id=self.session_id,
+            storage=SqliteStorage(
+                table_name="financial_workflows",
+                db_file=str(Path(settings.TEMP_DIR) / "workflows.db")
+            )
+        )
+        self.processing_started = False
+        self.selected_prompt = None
+        # Simple step configuration for UI display
+        self.steps_config = {
+            "extraction": {
+                "name": "Financial Data Extraction",
+                "description": "Extracting financial data points from document",
+                "icon": "🔍"
+            },
+            "arrangement": {
+                "name": "Data Analysis & Organization",
+                "description": "Organizing and analyzing extracted financial data",
+                "icon": "📊"
+            },
+            "code_generation": {
+                "name": "Excel Code Generation",
+                "description": "Generating Python code for Excel reports",
+                "icon": "💻"
+            },
+            "execution": {
+                "name": "Excel Report Creation",
+                "description": "Executing code to create Excel workbook",
+                "icon": "📊"
+            }
+        }
+    def validate_file(self, file_path):
+        """Validate uploaded file."""
+        logger.info(f"Validating file: {file_path}")
+        if not file_path:
+            logger.warning("No file uploaded")
+            return {"valid": False, "error": "No file uploaded"}
+        path = Path(file_path)
+        if not path.exists():
+            logger.error(f"File does not exist: {file_path}")
+            return {"valid": False, "error": "File does not exist"}
+        file_extension = path.suffix.lower().lstrip(".")
+        if file_extension not in settings.SUPPORTED_FILE_TYPES:
+            logger.error(f"Unsupported file type: {file_extension}")
+            return {
+                "valid": False,
+                "error": f"Unsupported file type. Supported: {', '.join(settings.SUPPORTED_FILE_TYPES)}",
+            }
+        file_size_mb = path.stat().st_size / (1024 * 1024)
+        if file_size_mb > 50:  # 50MB limit
+            logger.error(f"File too large: {file_size_mb}MB")
+            return {"valid": False, "error": "File too large (max 50MB)"}
+        logger.info(
+            f"File validation successful: {path.name} ({file_extension}, {file_size_mb}MB)"
+        )
+        return {
+            "valid": True,
+            "file_info": {
+                "name": path.name,
+                "type": file_extension,
+                "size_mb": round(file_size_mb, 2),
+            },
+        }
+        file_size_mb = path.stat().st_size / (1024 * 1024)
+        if file_size_mb > 50:  # 50MB limit
+            return {"valid": False, "error": "File too large (max 50MB)"}
+        return {
+            "valid": True,
+            "file_info": {
+                "name": path.name,
+                "type": file_extension,
+                "size_mb": round(file_size_mb, 2),
+            },
+        }
+    def get_file_preview(self, file_path):
+        """Get file preview."""
+        try:
+            path = Path(file_path)
+            if path.suffix.lower() in [".txt", ".md", ".py", ".json"]:
+                with open(path, "r", encoding="utf-8") as f:
+                    content = f.read()
+                    return content[:1000] + "..." if len(content) > 1000 else content
+            else:
+                return f"Binary file: {path.name} ({path.suffix})"
+        except Exception as e:
+            return f"Error reading file: {str(e)}"
+    def get_prompt_text(self, category_id, prompt_id):
+        """Get the full text of a specific prompt."""
+        prompt = prompt_gallery.get_prompt_by_id(category_id, prompt_id)
+        return prompt.get('prompt', '') if prompt else ''
+    def download_processed_files(self):
+        """Create a zip file of all processed files and return for download."""
+        # Update activity for auto-shutdown monitoring
+        shutdown_manager.update_activity()
+        try:
+            import zipfile
+            import tempfile
+            import os
+            import shutil
+            from datetime import datetime
+            # Get session output directory - now using workflow's output directory
+            session_output_dir = self.workflow.session_output_dir
+            if not session_output_dir.exists():
+                logger.warning(f"Output directory does not exist: {session_output_dir}")
+                return None
+            # Create a properly named zip file in a temporary location that Gradio can access
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            zip_filename = f"processed_files_{self.session_id}_{timestamp}.zip"
+            # Use Python's tempfile to create a file in the system temp directory
+            # This ensures Gradio can access it properly
+            temp_dir = tempfile.gettempdir()
+            zip_path = Path(temp_dir) / zip_filename
+            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                # Add all files from output directory
+                file_count = 0
+                for file_path in session_output_dir.rglob('*'):
+                    if file_path.is_file():
+                        # Calculate relative path for zip
+                        arcname = file_path.relative_to(session_output_dir)
+                        zipf.write(file_path, arcname)
+                        file_count += 1
+                        logger.debug(f"Added to zip: {arcname}")
+            if file_count == 0:
+                logger.warning("No files found to download")
+                # Debug: List all files in session directory
+                session_dir = Path(settings.TEMP_DIR) / self.session_id
+                if session_dir.exists():
+                    logger.info(f"Session directory exists: {session_dir}")
+                    for subdir in ['input', 'output', 'temp']:
+                        subdir_path = session_dir / subdir
+                        if subdir_path.exists():
+                            files = list(subdir_path.glob('*'))
+                            logger.info(f"{subdir} directory has {len(files)} files: {[f.name for f in files]}")
+                        else:
+                            logger.info(f"{subdir} directory does not exist")
+                else:
+                    logger.warning(f"Session directory does not exist: {session_dir}")
+                # Clean up empty zip file
+                if zip_path.exists():
+                    zip_path.unlink()
+                return None
+            logger.info(f"Created zip file with {file_count} files: {zip_path}")
+            # Ensure the file exists and has content
+            if zip_path.exists() and zip_path.stat().st_size > 0:
+                # For Gradio file downloads, we need to return the file path in a specific way
+                abs_path = str(zip_path.absolute())
+                logger.info(f"Returning zip file path for download: {abs_path}")
+                logger.info(f"File size: {zip_path.stat().st_size} bytes")
+                # Try to make the file accessible by setting proper permissions
+                os.chmod(abs_path, 0o644)
+                # Return the file path for Gradio to handle
+                # Make sure to return the path in a way Gradio can process
+                return abs_path
+            else:
+                logger.error("Zip file was created but is empty or doesn't exist")
+                return None
+        except Exception as e:
+            logger.error(f"Error creating download: {str(e)}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
+            return None
+def create_gradio_app():
+    """Create the main Gradio application."""
+    # Start WebSocket server for terminal streaming
+    try:
+        run_websocket_server()
+        logger.info("Terminal WebSocket server started on port 8765")
+    except Exception as e:
+        logger.error(f"Failed to start terminal WebSocket server: {e}")
+    def initialize_session():
+        """Initialize a new session with fresh WorkflowUI instance."""
+        return WorkflowUI()
+    def process_file(file, verbose_print, session_state, progress=gr.Progress()):
+        """Process uploaded file with step-by-step execution and progress updates."""
+        # Get or create session-specific UI instance
+        if session_state is None:
+            session_state = WorkflowUI()
+        ui = session_state
+        logger.info(f"🚀 PROCESSING STARTED - File: {file.name if file else 'None'}, Verbose: {verbose_print}")
+        logger.info(f"📋 Session ID: {ui.session_id}")
+        # Update activity for auto-shutdown monitoring
+        shutdown_manager.update_activity()
+        if not file:
+            logger.warning("Missing file")
+            return "", "", "", None, session_state
+        # Validate file (file.name contains Gradio's temp path)
+        logger.info(f"🔍 VALIDATING FILE: {file.name}")
+        validation = ui.validate_file(file.name)
+        logger.info(f"✅ File validation result: {validation}")
+        if not validation["valid"]:
+            logger.error(f"❌ FILE VALIDATION FAILED: {validation['error']}")
+            return "", "", "", None, session_state
+        # Save file to our session directory
+        logger.info("💾 Saving uploaded file to session directory...")
+        temp_path = ui.file_handler.save_uploaded_file(file, ui.session_id)
+        logger.info(f"✅ File saved to: {temp_path}")
+        logger.info(f"📊 File size: {validation.get('file_info', {}).get('size_mb', 'Unknown')} MB")
+        def create_step_html(current_step):
+            """Create HTML for step progress display"""
+            steps = [
+                {"key": "extraction", "name": "Data Extraction", "icon": "🔍"},
+                {"key": "arrangement", "name": "Organization", "icon": "📊"},
+                {"key": "code_generation", "name": "Code Generation", "icon": "💻"},
+                {"key": "execution", "name": "Excel Creation", "icon": "����"}
+            ]
+            step_html = '<div style="display: flex; gap: 10px; margin-top: 15px;">'
+            for step in steps:
+                if step["key"] == current_step:
+                    # Current step - blue with animation
+                    step_html += f'''
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(59, 130, 246, 0.2); border: 2px solid #3b82f6; position: relative; overflow: hidden;">
+                        <div style="position: absolute; top: 0; left: -100%; width: 100%; height: 100%; background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent); animation: shimmer 2s infinite;"></div>
+                        {step["icon"]} {step["name"]} ⚡
+                    </div>
+                    '''
+                elif any(s["key"] == step["key"] and steps.index(s) < steps.index(next(s for s in steps if s["key"] == current_step)) for s in steps):
+                    # Completed step - green
+                    step_html += f'''
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
+                        ✅ {step["name"]}
+                    </div>
+                    '''
+                else:
+                    # Pending step - gray
+                    step_html += f'''
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(107, 114, 128, 0.1); border: 1px solid #6b7280;">
+                        {step["icon"]} {step["name"]}
+                    </div>
+                    '''
+            step_html += '</div>'
+            return f'''
+            <div style="padding: 20px; background: var(--background-fill-secondary); border-radius: 8px;">
+                <h3>📊 Financial Document Analysis Workflow</h3>
+                {step_html}
+                <p style="margin-top: 15px; color: var(--body-text-color-subdued);">
+                    Current step: <strong>{next(s["name"] for s in steps if s["key"] == current_step)}</strong>
+                </p>
+                <style>
+                @keyframes shimmer {{
+                    0% {{ transform: translateX(-100%); }}
+                    100% {{ transform: translateX(200%); }}
+                }}
+                </style>
+            </div>
+            '''
+        try:
+            import time
+            from pathlib import Path
+            from agno.media import File
+            # Step 0: Initialize
+            progress_html = "🚀 <strong>Initializing financial document processing...</strong>"
+            logger.info(f"🎯 WORKFLOW INITIALIZATION - Session: {ui.session_id}")
+            logger.info(f"📝 Document: {temp_path}")
+            logger.info("⚡ Starting multi-step financial analysis workflow...")
+            yield (progress_html, create_step_html("extraction"), "", gr.Column(visible=False))
+            time.sleep(1)  # Brief pause for UI update
+            # Step 1: Data Extraction
+            logger.info("=" * 60)
+            logger.info("🔍 STEP 1/4: DATA EXTRACTION PHASE")
+            logger.info("=" * 60)
+            logger.info("📋 Initializing financial data extraction agent...")
+            progress_html = "🔍 <strong>Step 1/4: Extracting financial data from document...</strong>"
+            yield (progress_html, create_step_html("extraction"), "", gr.Column(visible=False))
+            # Check for cached extraction
+            if "extracted_data" in ui.workflow.session_state:
+                logger.info("💾 Using cached extraction data from previous run")
+                logger.info("⏩ Skipping extraction step - data already available")
+                time.sleep(0.5)  # Brief pause to show step
+            else:
+                logger.info(f"🔄 Starting fresh data extraction from document: {temp_path}")
+                logger.info("📄 Creating document object for analysis...")
+                # Perform data extraction
+                document = File(filepath=temp_path)
+                logger.info("✅ Document object created successfully")
+                extraction_prompt = f"""
+                Analyze this financial document and extract all relevant financial data points.
+                Focus on:
+                - Company identification and reporting period
+                - Revenue, expenses, profits, and losses
+                - Assets, liabilities, and equity
+                - Cash flows and financial ratios
+                - Any other key financial metrics
+                Document path: {temp_path}
+                """
+                logger.info("🤖 Calling data extractor agent with financial analysis prompt")
+                logger.info("⏳ This may take 30-60 seconds depending on document complexity...")
+                extraction_response = ui.workflow.data_extractor.run(
+                    extraction_prompt,
+                    files=[document]
+                )
+                extracted_data = extraction_response.content
+                logger.info("🎉 Data extraction agent completed successfully!")
+                logger.info(f"📊 Extracted {len(extracted_data.data_points)} financial data points")
+                # Cache the result
+                ui.workflow.session_state["extracted_data"] = extracted_data.model_dump()
+                logger.info(f"💾 Cached extraction results for session {ui.session_id}")
+                logger.info("✅ Step 1 COMPLETED - Data extraction successful")
+            # Step 2: Data Arrangement
+            logger.info("=" * 60)
+            logger.info("📊 STEP 2/4: DATA ORGANIZATION PHASE")
+            logger.info("=" * 60)
+            progress_html = "📊 <strong>Step 2/4: Organizing and analyzing financial data...</strong>"
+            yield (progress_html, create_step_html("arrangement"), "", gr.Column(visible=False))
+            if "arrangement_response" in ui.workflow.session_state:
+                logger.info("💾 Using cached data arrangement from previous run")
+                logger.info("⏩ Skipping organization step - data already structured")
+                time.sleep(0.5)  # Brief pause to show step
+            else:
+                logger.info("🔄 Starting fresh data organization and analysis")
+                # Get extracted data for arrangement
+                extracted_data_dict = ui.workflow.session_state["extracted_data"]
+                logger.info(f"📋 Retrieved {len(extracted_data_dict.get('data_points', []))} data points for organization")
+                logger.info("🏗️ Preparing to organize data into 12 financial categories...")
+                arrangement_prompt = f"""
+                You are given raw, extracted financial data. Your task is to reorganize it and prepare it for Excel-based reporting.
+                ========== WHAT TO DELIVER ==========
+                • A single JSON object saved as arranged_financial_data.json
+                • Fields required: categories, key_metrics, insights, summary
+                ========== HOW TO ORGANIZE ==========
+                Create 12 distinct, Excel-ready categories (one worksheet each):
+                1. Executive Summary & Key Metrics
+                2. Income Statement / P&L
+                3. Balance Sheet – Assets
+                4. Balance Sheet – Liabilities & Equity
+                5. Cash-Flow Statement
+                6. Financial Ratios & Analysis
+                7. Revenue Analysis
+                8. Expense Analysis
+                9. Profitability Analysis
+                10. Liquidity & Solvency
+                11. Operational Metrics
+                12. Risk Assessment & Notes
+                ========== STEP-BY-STEP ==========
+                1. Map every data point into the most appropriate category above.
+                2. Calculate or aggregate key financial metrics where possible.
+                3. Add concise insights for trends, anomalies, or red flags.
+                4. Write an executive summary that highlights the most important findings.
+                5. Assemble everything into the JSON schema described under "WHAT TO DELIVER."
+                6. Save the JSON as arranged_financial_data.json via save_file.
+                7. Use list_files to confirm the file exists, then read_file to validate its content.
+                8. If the file is missing or malformed, fix the issue and repeat steps 6 – 7.
+                9. Only report success after the file passes both existence and content checks.
+                10. Conclude with a short, plain-language summary of what was organized.
+                Extracted Data: {json.dumps(extracted_data_dict, indent=2)}
+                """
+                logger.info("Calling data arranger to organize financial data into 12 categories")
+                arrangement_response = ui.workflow.data_arranger.run(arrangement_prompt)
+                arrangement_content = arrangement_response.content
+                # Cache the result
+                ui.workflow.session_state["arrangement_response"] = arrangement_content
+                logger.info("Data organization completed successfully - financial data categorized")
+                logger.info(f"Cached arrangement results for session {ui.session_id}")
+            # Step 3: Code Generation
+            logger.info("Step 3: Starting code generation...")
+            progress_html = "💻 <strong>Step 3/4: Generating Python code for Excel reports...</strong>"
+            yield (progress_html, create_step_html("code_generation"), "", gr.Column(visible=False))
+            if "code_generation_response" in ui.workflow.session_state:
+                logger.info("Using cached code generation results from previous run")
+                code_generation_content = ui.workflow.session_state["code_generation_response"]
+                execution_success = ui.workflow.session_state.get("execution_success", False)
+                logger.info(f"Previous execution status: {'Success' if execution_success else 'Failed'}")
+                time.sleep(0.5)  # Brief pause to show step
+            else:
+                logger.info("Starting fresh Python code generation for Excel report creation")
+                code_prompt = f"""
+                Your objective: Turn the organized JSON data into a polished, multi-sheet Excel report—and prove that it works.
+                ========== INPUT ==========
+                File: arranged_financial_data.json
+                Tool to read it: read_file
+                ========== WHAT THE PYTHON SCRIPT MUST DO ==========
+                1. Load arranged_financial_data.json and parse its contents.
+                2. For each category in the JSON, create a dedicated worksheet using openpyxl.
+                3. Apply professional touches:
+                • Bold, centered headers
+                • Appropriate number formats
+                • Column-width auto-sizing
+                • Borders, cell styles, and freeze panes
+                4. Insert charts (bar, line, or pie) wherever the data lends itself to visualisation.
+                5. Embed key metrics and summary notes prominently in the Executive Summary sheet.
+                6. Name the workbook: Financial_Report_<YYYYMMDD_HHMMSS>.xlsx.
+                7. Wrap every file and workbook operation in robust try/except blocks.
+                8. Log all major steps and any exceptions for easy debugging.
+                9. Save the script via save_to_file_and_run and execute it immediately.
+                10. After execution, use list_files to ensure the Excel file was created.
+                11. Optionally inspect the file (e.g., size or first bytes via read_file) to confirm it is not empty.
+                12. If the workbook is missing or corrupted, refine the code, re-save, and re-run until success.
+                ========== OUTPUT ==========
+                • A fully formatted Excel workbook in the working directory.
+                • A concise summary of what ran, any issues encountered, and confirmation that the file exists and opens without error.
+                """
+                logger.info("Calling code generator to create Python Excel generation script")
+                code_response = ui.workflow.code_generator.run(code_prompt)
+                code_generation_content = code_response.content
+                # Simple check for execution success based on response content
+                execution_success = (
+                    "error" not in code_generation_content.lower() or
+                    "success" in code_generation_content.lower() or
+                    "completed" in code_generation_content.lower()
+                )
+                # Cache the results
+                ui.workflow.session_state["code_generation_response"] = code_generation_content
+                ui.workflow.session_state["execution_success"] = execution_success
+                logger.info(f"Code generation and execution completed: {'✅ Success' if execution_success else '❌ Failed'}")
+                logger.info(f"Cached code generation results for session {ui.session_id}")
+            # Step 4: Final Results
+            logger.info("Step 4: Preparing final results...")
+            progress_html = "📊 <strong>Step 4/4: Creating final Excel report...</strong>"
+            yield (progress_html, create_step_html("execution"), "", gr.Column(visible=False))
+            time.sleep(1)  # Brief pause to show step
+            # Prepare final results
+            logger.info("Scanning output directory for generated files")
+            output_files = []
+            if ui.workflow.session_output_dir.exists():
+                output_files = [f.name for f in ui.workflow.session_output_dir.iterdir() if f.is_file()]
+                logger.info(f"Found {len(output_files)} generated files: {', '.join(output_files)}")
+            else:
+                logger.warning(f"Output directory does not exist: {ui.workflow.session_output_dir}")
+            # Get cached data
+            extracted_data_dict = ui.workflow.session_state["extracted_data"]
+            arrangement_content = ui.workflow.session_state["arrangement_response"]
+            code_generation_content = ui.workflow.session_state["code_generation_response"]
+            execution_success = ui.workflow.session_state.get("execution_success", False)
+            results_summary = f"""
+# Financial Document Analysis Complete
+## Document Information
+- **Company**: {extracted_data_dict.get('company_name', 'Not specified') if extracted_data_dict else 'Not specified'}
+- **Document Type**: {extracted_data_dict.get('document_type', 'Unknown') if extracted_data_dict else 'Unknown'}
+- **Reporting Period**: {extracted_data_dict.get('reporting_period', 'Not specified') if extracted_data_dict else 'Not specified'}
+## Processing Summary
+- **Data Points Extracted**: {len(extracted_data_dict.get('data_points', [])) if extracted_data_dict else 0}
+- **Data Organization**: {'✅ Completed' if arrangement_content else '❌ Failed'}
+- **Excel Creation**: {'✅ Success' if execution_success else '❌ Failed'}
+## Data Organization Results
+{arrangement_content[:500] + '...' if arrangement_content and len(arrangement_content) > 500 else arrangement_content or 'No arrangement data available'}
+## Tool Execution Summary
+**Data Arranger**: Used FileTools to save organized data to JSON
+**Code Generator**: Used PythonTools and FileTools for Excel generation
+## Code Generation Results
+{code_generation_content[:500] + '...' if code_generation_content and len(code_generation_content) > 500 else code_generation_content or 'No code generation results available'}
+## Generated Files ({len(output_files)} files)
+{chr(10).join(f"- **{file}**" for file in output_files) if output_files else "- No files generated"}
+## Output Directory
+📁 `{ui.workflow.session_output_dir}`
+---
+*Generated using Agno Workflows with step-by-step execution*
+*Note: Each step was executed individually with progress updates*
+            """
+            # Cache final results
+            ui.workflow.session_state["final_results"] = results_summary
+            logger.info("Final results compiled and cached successfully")
+            logger.info(f"Processing workflow completed for session {ui.session_id}")
+            # Create completion HTML
+            final_progress_html = "✅ <strong>All steps completed successfully!</strong>"
+            final_steps_html = '''
+            <div style="padding: 20px; background: var(--background-fill-secondary); border-radius: 8px;">
+                <h3>✅ Workflow Completed Successfully</h3>
+                <div style="display: flex; gap: 10px; margin-top: 15px;">
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
+                        ✅ Data Extraction
+                    </div>
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
+                        ✅ Organization
+                    </div>
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
+                        ✅ Code Generation
+                    </div>
+                    <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
+                        ✅ Excel Creation
+                    </div>
+                </div>
+                <div style="margin-top: 15px; padding: 10px; background: rgba(16, 185, 129, 0.05); border-radius: 4px;">
+                    <strong>All steps executed successfully!</strong>
+                    <ul style="margin: 5px 0;">
+                        <li><strong>Data Extraction:</strong> Completed</li>
+                        <li><strong>Organization:</strong> Completed</li>
+                        <li><strong>Code Generation:</strong> Completed</li>
+                        <li><strong>Excel Creation:</strong> ''' + ('Completed' if execution_success else 'Partial') + '''</li>
+                    </ul>
+                </div>
+            </div>
+            '''
+            logger.info("Financial document processing completed successfully")
+            if verbose_print:
+                logger.info("Final workflow response:\n" + results_summary)
+            # Return final results with updated session state
+            yield (final_progress_html, final_steps_html, results_summary, gr.Column(visible=True), session_state)
+        except Exception as e:
+            logger.error(f"Processing failed: {str(e)}", exc_info=True)
+            error_progress = f"❌ <strong>Processing failed: {str(e)}</strong>"
+            error_steps = f"""
+            <div style="padding: 20px; background: rgba(239, 68, 68, 0.1); border: 1px solid #ef4444; border-radius: 8px;">
+                <h3>❌ Processing Failed</h3>
+                <p><strong>Error:</strong> {str(e)}</p>
+                <p>Please check the file and try again. If the problem persists, check the logs for more details.</p>
+            </div>
+            """
+            error_markdown = f"# ❌ Processing Error\n\n**Error:** {str(e)}\n\nPlease try again or check the logs for more details."
+            yield (error_progress, error_steps, error_markdown, gr.Column(visible=True), session_state)
+    def get_terminal_with_logs(session_state):
+        """Get the complete terminal HTML with real backend logs."""
+        try:
+            # Get session-specific logs if session exists
+            session_id = session_state.session_id if session_state else None
+            logs = terminal_log_handler.get_logs(session_id=session_id, limit=25)
+            # If no session-specific logs, get general logs
+            if not logs:
+                logs = terminal_log_handler.get_logs(session_id=None, limit=25)
+            log_lines = []
+            # Add initial messages if no logs
+            if not logs:
+                log_lines = [
+                    f'<div class="terminal-line system-line"><span class="timestamp">{datetime.now().strftime("%H:%M:%S")}</span><span>🎯 Terminal initialized - Monitoring backend logs</span></div>',
+                    f'<div class="terminal-line system-line"><span class="timestamp">{datetime.now().strftime("%H:%M:%S")}</span><span>💡 Backend processing logs will appear here in real-time</span></div>',
+                    f'<div class="terminal-line system-line"><span class="timestamp">{datetime.now().strftime("%H:%M:%S")}</span><span>📚 Session ID: {session_id or "Not initialized"}</span></div>'
+                ]
+            else:
+                for log in logs:
+                    level_class = {
+                        'DEBUG': 'system-line',
+                        'INFO': 'output-line',
+                        'WARNING': 'system-line',
+                        'ERROR': 'error-line',
+                        'CRITICAL': 'error-line'
+                    }.get(log['level'], 'output-line')
+                    # Escape HTML and preserve formatting
+                    message = log['message'].replace('<', '&lt;').replace('>', '&gt;')
+                    logger_name = log['logger'].replace('<', '&lt;').replace('>', '&gt;')
+                    log_lines.append(f'<div class="terminal-line {level_class}"><span class="timestamp">{log["timestamp"]}</span><span>[{log["level"]}] {logger_name}: {message}</span></div>')
+            # Create the complete terminal HTML
+            terminal_html = f"""
+            <div class="terminal-container">
+                <div class="terminal-header">
+                    <div class="terminal-title">
+                        <div class="terminal-icon"></div>
+                        <span>Terminal</span>
+                    </div>
+                    <div class="terminal-controls">
+                        <button class="control-btn close" onclick="clearTerminal()"></button>
+                        <button class="control-btn minimize" onclick="minimizeTerminal()"></button>
+                        <button class="control-btn maximize" onclick="maximizeTerminal()"></button>
+                    </div>
+                </div>
+                <div class="terminal-body">
+                    <div class="terminal-output" id="terminalOutput">
+                        {''.join(log_lines)}
+                    </div>
+                </div>
+            </div>
+            <script>
+            // Simple read-only terminal for backend log display
+            class LogTerminal {{
+                constructor() {{
+                    this.output = document.getElementById('terminalOutput');
+                    this.autoScroll = true;
+                    this.userScrolled = false;
+                    this.init();
+                }}
+                init() {{
+                    // Add scroll event listener to detect manual scrolling
+                    if (this.output) {{
+                        this.output.addEventListener('scroll', (e) => this.handleScroll(e));
+                    }}
+                    this.scrollToBottom();
+                }}
+                handleScroll(e) {{
+                    const element = e.target;
+                    const isScrolledToBottom = element.scrollHeight - element.clientHeight <= element.scrollTop + 1;
+                    // If user scrolled away from bottom, disable auto-scroll
+                    if (!isScrolledToBottom && this.autoScroll) {{
+                        this.userScrolled = true;
+                        this.autoScroll = false;
+                    }} else if (isScrolledToBottom && !this.autoScroll) {{
+                        // If user scrolled back to bottom, re-enable auto-scroll
+                        this.userScrolled = false;
+                        this.autoScroll = true;
+                    }}
+                }}
+                scrollToBottom() {{
+                    if (this.output && this.autoScroll) {{
+                        this.output.scrollTop = this.output.scrollHeight;
+                    }}
+                }}
+                clear() {{
+                    if (this.output) {{
+                        this.output.innerHTML = '';
+                        this.autoScroll = true;
+                        this.userScrolled = false;
+                    }}
+                }}
+            }}
+            // Initialize terminal with auto-scroll preservation
+            function initTerminal() {{
+                if (window.logTerminal) {{
+                    // Preserve scroll state if terminal exists
+                    window.logTerminal.init();
+                }} else {{
+                    window.logTerminal = new LogTerminal();
+                }}
+                // Enable auto-scroll for new content
+                if (window.logTerminal && window.logTerminal.autoScroll) {{
+                    setTimeout(() => {{
+                        window.logTerminal.scrollToBottom();
+                    }}, 100);
+                }}
+            }}
+            // Initialize immediately and on DOM changes
+            initTerminal();
+            // Reinitialize when terminal content updates
+            setTimeout(initTerminal, 200);
+            // Terminal control functions
+            function clearTerminal() {{
+                if (window.logTerminal) {{
+                    window.logTerminal.clear();
+                }}
+            }}
+            function minimizeTerminal() {{
+                console.log('Minimize terminal');
+            }}
+            function maximizeTerminal() {{
+                console.log('Maximize terminal');
+            }}
+            </script>
+            """
+            return terminal_html
+        except Exception as e:
+            logger.error(f"Error creating terminal with logs: {e}")
+            return f"""
+            <div class="terminal-container">
+                <div class="terminal-line error-line">
+                    <span class="timestamp">{datetime.now().strftime('%H:%M:%S')}</span>
+                    <span>Error loading terminal: {str(e)}</span>
+                </div>
+            </div>
+            """
+    def reset_session(session_state):
+        """Reset the current session."""
+        # Create completely new WorkflowUI instance
+        new_session = WorkflowUI()
+        logger.info(f"Session reset - New session ID: {new_session.session_id}")
+        return ("", "", "", None, new_session, new_session.session_id)
+    def update_session_display(session_state):
+        """Update session display with current session ID."""
+        if session_state is None:
+            session_state = WorkflowUI()
+        return session_state.session_id, session_state
+    # Create Gradio interface
+    with gr.Blocks(css=custom_css, title="📊 Data Extractor Using Gemini") as app:
+        # Session state to maintain per-user data
+        session_state = gr.State()
+        # Header
+        gr.HTML("""
+        <div class="header-title">
+            📊 Data Extractor Using Gemini
+        </div>
+        """)
+        # Main interface with integrated terminal (Manus AI style)
+        with gr.Row():
+            # Left side - Main processing interface
+            with gr.Column(scale=2):
+                # Configuration Panel
+                gr.Markdown("## ⚙️ Configuration")
+                # Session info - will be updated when session initializes
+                session_info = gr.Textbox(
+                    label="Session ID", value="Initializing...", interactive=False
+                )
+                # File upload
+                gr.Markdown("### 📄 Upload Document")
+                file_input = gr.File(
+                    label="Choose a file",
+                    file_types=[f".{ext}" for ext in settings.SUPPORTED_FILE_TYPES],
+                )
+                # Info about automated processing
+                gr.Markdown("### 🎯 Automated Financial Data Extraction")
+                gr.Markdown("This application automatically extracts financial data points from uploaded documents and generates comprehensive analysis reports. No additional input required!")
+                # Control buttons
+                with gr.Row():
+                    process_btn = gr.Button(
+                        "🚀 Start Processing", variant="primary", scale=2
+                    )
+                    reset_btn = gr.Button("🔄 Reset Session", scale=1)
+                # Processing Panel
+                gr.Markdown("## ⚡ Processing Status")
+                # Progress bar
+                progress_display = gr.HTML(label="Progress")
+                # Steps display
+                steps_display = gr.HTML(label="Processing Steps")
+                # Results - Hidden initially, shown when processing completes
+                verbose_checkbox = gr.Checkbox(label="Print model response", value=False)
+                # Results section
+                results_section = gr.Column(visible=False)
+                with results_section:
+                    gr.Markdown("### 📊 Results")
+                    results_display = gr.Code(
+                        label="Final Results", language="markdown", lines=10
+                    )
+                    # Download section
+                    gr.Markdown("### ⬇️ Download Processed Files")
+                    download_btn = gr.Button("📥 Download All Files", variant="primary")
+                    download_output = gr.File(
+                        label="Download Files",
+                        file_count="single",
+                        file_types=[".zip"],
+                        interactive=False,
+                        visible=True
+                    )
+            # Right side - Integrated Terminal Panel
+            with gr.Column(scale=3):
+                gr.Markdown("## 💻 Terminal")
+                # Terminal component with real backend logs
+                terminal_html = gr.HTML()
+        # Event handlers
+        process_btn.click(
+            fn=process_file,
+            inputs=[file_input, verbose_checkbox, session_state],
+            outputs=[progress_display, steps_display, results_display, results_section, session_state],
+        )
+        def session_download(session_state):
+            """Session-aware download function."""
+            if session_state is None:
+                return None
+            return session_state.download_processed_files()
+        download_btn.click(
+            fn=session_download,
+            inputs=[session_state],
+            outputs=[download_output],
+            show_progress=True
+        )
+        reset_btn.click(
+            fn=reset_session,
+            inputs=[session_state],
+            outputs=[progress_display, steps_display, results_display, download_output, session_state, session_info],
+        )
+        # Initialize session and terminal on load
+        def initialize_app():
+            """Initialize app with fresh session."""
+            new_session = WorkflowUI()
+            terminal_html_content = get_terminal_with_logs(new_session)
+            return new_session, new_session.session_id, terminal_html_content
+        app.load(
+            fn=initialize_app,
+            outputs=[session_state, session_info, terminal_html],
+        )
+        # Auto-refresh timer component (hidden)
+        refresh_timer = gr.Timer(value=3.0, active=True)  # Refresh every 3 seconds
+        # Timer event to auto-refresh terminal with session awareness
+        refresh_timer.tick(
+            fn=get_terminal_with_logs,
+            inputs=[session_state],
+            outputs=[terminal_html],
+        )
+    return app
+def main():
+    """Main application entry point."""
+    app = create_gradio_app()
+    # Start auto-shutdown monitoring
+    shutdown_manager.start_monitoring(app)
+    logger.info("Starting Gradio application with auto-shutdown enabled")
+    logger.info(f"Auto-shutdown timeout: {INACTIVITY_TIMEOUT_MINUTES} minutes")
+    logger.info("Press Ctrl+C to stop the server manually")
+    try:
+        # Launch the app
+        app.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=True,
+            debug=False,
+            show_error=True,
+        )
+    except KeyboardInterrupt:
+        logger.info("Received keyboard interrupt, shutting down...")
+        shutdown_manager._shutdown_server()
+    except Exception as e:
+        logger.error(f"Error during app launch: {e}")
+        shutdown_manager._shutdown_server()
+if __name__ == "__main__":
+    main()

config/__init__.py ADDED Viewed

File without changes

config/prompt_gallery.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "categories": {
+    "financial": {
+      "name": "Financial Content Extraction (Simple Structure)",
+      "icon": "📊",
+      "description": "Extract all tables and sectioned data from annual reports, placing each type in separate Excel sheets, without calculations.",
+      "prompts": [
+        {
+          "id": "extract_all_tables_simple",
+          "title": "Extract All Tables & Sections (No Charts, No Calculations)",
+          "icon": "📄",
+          "description": "Extract every table and structured data section from the annual report PDF and organize into clearly named Excel sheets. No calculations or charts—just pure content.",
+          "prompt": "For the provided annual report, extract EVERY table and structured content section found (including financial statements, notes, schedules, management discussion tables, segmental/line/regional breakdowns, etc.) and output into an Excel (.xlsx) file. Each sheet should be named after the report section or table heading, matching the document (examples: 'Income Statement', 'Balance Sheet', 'Segment Information', 'Risk Table', 'Notes to FS - Table 4', etc). Maintain all original row/column structure and include all source footnotes, captions, and section headers in the appropriate positions for context. \n\nHeader Row Formatting: Bold, fill light gray (RGB 230,230,230), font size 11. Freeze top row in every sheet. Wrap text in all columns if content overflows. Maintain all cell alignments as close to original as possible. \n\nInsert a cover sheet named 'Extracted Sections Index' that lists every sheet name, the original page number/range, and a short description ('Income Statement – p. 23 – Consolidated company-wide income', etc). Do not perform or add any numerical calculations or analytics. The focus is pure, lossless data extraction and organization."
+        },
+        {
+          "id": "extract_all_tables_with_charts",
+          "title": "Extract All Tables & Sections (Add Simple Charts)",
+          "icon": "📊",
+          "description": "Extract all tables and structured content, with optional basic Excel charts for major financial statements, but no derived calculations.",
+          "prompt": "Extract every table and section of structured data from the annual report into a multi-sheet Excel (.xlsx) file. Sheet names should match those of the tables' original titles in the report (e.g., 'Cash Flow Statement', 'Product Sales', 'Management Table 2'). For the three core statements ('Income Statement', 'Balance Sheet', 'Cash Flow Statement'), create a second sheet with the same name plus ' Chart' (e.g. 'Income Statement Chart'), placing a default bar or line chart visualizing the table's top-level rows by year (with no extra calculations or commentary—just raw data charted as-is). \n\nAll other sheet formatting rules: Header row bold, pale blue fill (RGB 217,228,240), font 11. Freeze top row. Wrap text in all columns. Add a first sheet called 'Sections Directory' with a table listing all subsequent sheet names, their corresponding report page(s), and a short summary for user navigation. No calculated fields or analytics—output is strictly direct report extraction with optional reference charts only for core statements."
+        }
+      ]
+    }
+  },
+  "metadata": {
+    "version": "1.0-simple",
+    "last_updated": "2025-07-18",
+    "description": "Intuitive and simple financial document extraction prompts: choose lossless structure-only or add basic charts—no calculations."
+  }
+}

config/settings.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+class Settings:
+    GOOGLE_AI_API_KEY = os.getenv("GOOGLE_AI_API_KEY")
+    MAX_FILE_SIZE_MB = 50
+    SUPPORTED_FILE_TYPES = [
+        "pdf",
+        "txt",
+        "png",
+        "jpg",
+        "jpeg",
+        "docx",
+        "xlsx",
+        "csv",
+        "md",
+        "json",
+        "xml",
+        "html",
+        "py",
+        "js",
+        "ts",
+        "doc",
+        "xls",
+        "ppt",
+        "pptx",
+    ]
+    # Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
+    TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
+    DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
+    COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
+    PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
+    DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
+    DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.5-pro")
+    CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.5-pro")
+    COORDINATOR_MODEL_THINKING_BUDGET=2048
+    PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
+    DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
+    DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
+    CODE_GENERATOR_MODEL_THINKING_BUDGET=3072
+    @classmethod
+    def validate_config(cls):
+        if not cls.GOOGLE_AI_API_KEY:
+            raise ValueError("GOOGLE_AI_API_KEY required")
+        cls.TEMP_DIR.mkdir(exist_ok=True)
+settings = Settings()

models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Data models for structured agent communication

models/data_models.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+class FileInfo(BaseModel):
+    """Information about the file being processed."""
+    name: str = Field(description="File name")
+    type: str = Field(description="File type/extension")
+    size_mb: float = Field(description="File size in MB")
+    path: str = Field(description="Full file path")
+class SimplifiedAgentConfig(BaseModel):
+    """Simplified configuration for agent creation without complex nesting."""
+    instructions: str = Field(description="Single string instructions")
+    requirement_type: str = Field(default="standard", description="Type of requirements")
+    custom_notes: List[str] = Field(default_factory=list, description="Simple notes")
+class ProcessingPlan(BaseModel):
+    """Simplified processing plan for document analysis."""
+    # Basic plan information
+    document_type: str = Field(description="Document type (financial, legal, technical, etc.)")
+    analysis_objective: str = Field(description="Primary analysis objective")
+    complexity: str = Field(default="moderate", description="Complexity level")
+    processing_strategy: str = Field(description="Overall processing strategy")
+    # Essential configurations (simplified)
+    agent_configs: Dict[str, str] = Field(
+        default_factory=dict,
+        description="Simple agent configuration summaries"
+    )
+    # Simple schema suggestions using basic types
+    data_fields: List[str] = Field(description="List of suggested data fields to extract")
+    validation_rules: List[str] = Field(default_factory=list, description="Validation rules")
+    output_formats: List[str] = Field(default_factory=list, description="Required output formats")
+    # Simple notes and requirements
+    requirements: List[str] = Field(default_factory=list, description="Processing requirements")
+    notes: str = Field(default="", description="Additional notes")
+class AgentConfiguration(BaseModel):
+    """Configuration for a dynamically created agent."""
+    instructions: List[str] = Field(description="Specific instructions for this agent")
+    custom_prompt_template: Optional[str] = Field(default="", description="Custom prompt template for this agent")
+    special_requirements: List[str] = Field(default_factory=list, description="Special requirements or constraints")
+class DataPoint(BaseModel):
+    """Individual data point extracted from document."""
+    field_name: str = Field(description="Name of the data field")
+    value: str = Field(description="Value of the field")
+    data_type: Optional[str] = Field(default="", description="Type of data (text, number, date, etc.)")
+    category: Optional[str] = Field(default="", description="Category or section this data belongs to")
+    unit: Optional[str] = Field(default="", description="Unit of measurement if applicable")
+    period: Optional[str] = Field(default="", description="Time period if applicable")
+    confidence_score: float = Field(description="Confidence score for the extraction (0-1)")
+    source_location: Optional[str] = Field(default="", description="Location in document where data was found")
+class ExtractedData(BaseModel):
+    """Structured data extracted from the document."""
+    data_points: List[DataPoint] = Field(description="List of extracted data points")
+    extraction_notes: str = Field(default="", description="Notes about the extraction process")
+    confidence_score: float = Field(description="Overall confidence score for the extraction")
+    extraction_timestamp: datetime = Field(default_factory=datetime.now, description="When extraction was performed")
+    document_summary: Optional[str] = Field(default="", description="Brief summary of the document content")
+class DataInsight(BaseModel):
+    """Individual insight from data analysis."""
+    insight_type: str = Field(description="Type of insight (trend, comparison, etc.)")
+    description: str = Field(description="Description of the insight")
+    supporting_data: List[str] = Field(description="Data points that support this insight")
+    importance_level: str = Field(description="Importance level (high, medium, low)")
+class DataCategory(BaseModel):
+    """A category of organized data."""
+    category_name: str = Field(description="Name of the data category")
+    data_points: Dict[str, str] = Field(description="Key-value pairs of data in this category")
+class ArrangedData(BaseModel):
+    """Organized and analyzed data."""
+    organized_categories: List[DataCategory] = Field(
+        description="Data organized into logical categories"
+    )
+    insights: List[DataInsight] = Field(description="Insights generated from the data")
+    summary: str = Field(description="Summary of the arranged data")
+    arrangement_notes: str = Field(description="Notes about the arrangement process")
+class CodeGenerationResult(BaseModel):
+    """Result of code generation and execution."""
+    generated_code: str = Field(description="The generated Python code")
+    execution_result: str = Field(description="Result of code execution")
+    output_files: List[str] = Field(description="List of output files created")
+    execution_success: bool = Field(description="Whether code execution was successful")
+    error_messages: List[str] = Field(default_factory=list, description="Any error messages encountered")
+class DocumentAnalysisResult(BaseModel):
+    """Complete result of document analysis team workflow."""
+    document_type: str = Field(description="Type of document analyzed")
+    analysis_objective: str = Field(description="Original analysis objective")
+    processing_summary: str = Field(description="Summary of the entire processing workflow")
+    # Results from each stage
+    planning_notes: str = Field(description="Notes from the planning stage")
+    prompts_created: str = Field(description="Summary of prompts and schemas created")
+    data_extracted: str = Field(description="Summary of data extraction results")
+    data_arranged: str = Field(description="Summary of data arrangement and insights")
+    code_generated: str = Field(description="Summary of code generation and execution")
+    # Final outputs
+    key_findings: List[str] = Field(description="Key findings from the analysis")
+    output_files_created: List[str] = Field(description="List of output files created")
+    success: bool = Field(description="Whether the analysis completed successfully")
+    recommendations: List[str] = Field(default_factory=list, description="Recommendations based on analysis")
+class ExtractionField(BaseModel):
+    """Individual field specification for data extraction."""
+    field_name: str = Field(description="Name of the field to extract")
+    field_type: str = Field(description="Type of data (text, number, date, etc.)")
+    description: str = Field(description="Description of what this field represents")
+    required: bool = Field(default=True, description="Whether this field is required")
+class AgentPrompt(BaseModel):
+    """Prompt configuration for a specific agent."""
+    agent_name: str = Field(description="Name of the agent")
+    specialized_instructions: List[str] = Field(description="Specialized instructions for this agent")
+    input_requirements: List[str] = Field(description="What input this agent needs")
+    output_requirements: List[str] = Field(description="What output this agent should produce")
+    success_criteria: List[str] = Field(description="Criteria for successful completion")
+class PromptsAndSchemas(BaseModel):
+    """Prompts and schemas for all agents in the workflow."""
+    # Data extraction specific
+    extraction_prompt: str = Field(description="Optimized prompt for data extraction")
+    extraction_fields: List[ExtractionField] = Field(
+        description="List of fields to extract from the document"
+    )
+    arrangement_rules: List[str] = Field(description="Rules for organizing extracted data")
+    validation_criteria: List[str] = Field(description="Criteria for validating extracted data")
+    # All agent prompts
+    agent_prompts: List[AgentPrompt] = Field(description="Specialized prompts for each agent")
+    workflow_coordination: List[str] = Field(description="Instructions for coordinating between agents")
+    quality_assurance: List[str] = Field(description="Quality assurance guidelines for all agents")

prompt_gallery.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "categories": {
+    "financial": {
+      "name": "Financial Content Extraction (Simple Structure)",
+      "icon": "📊",
+      "description": "Extract all tables and sectioned data from annual reports, placing each type in separate Excel sheets, without calculations.",
+      "prompts": [
+        {
+          "id": "extract_all_tables_simple",
+          "title": "Extract All Tables & Sections (No Charts, No Calculations)",
+          "icon": "📄",
+          "description": "Extract every table and structured data section from the annual report PDF and organize into clearly named Excel sheets. No calculations or charts—just pure content.",
+          "prompt": "For the provided annual report, extract EVERY table and structured content section found (including financial statements, notes, schedules, management discussion tables, segmental/line/regional breakdowns, etc.) and output into an Excel (.xlsx) file. Each sheet should be named after the report section or table heading, matching the document (examples: 'Income Statement', 'Balance Sheet', 'Segment Information', 'Risk Table', 'Notes to FS - Table 4', etc). Maintain all original row/column structure and include all source footnotes, captions, and section headers in the appropriate positions for context. \n\nHeader Row Formatting: Bold, fill light gray (RGB 230,230,230), font size 11. Freeze top row in every sheet. Wrap text in all columns if content overflows. Maintain all cell alignments as close to original as possible. \n\nInsert a cover sheet named 'Extracted Sections Index' that lists every sheet name, the original page number/range, and a short description ('Income Statement – p. 23 – Consolidated company-wide income', etc). Do not perform or add any numerical calculations or analytics. The focus is pure, lossless data extraction and organization."
+        },
+        {
+          "id": "extract_all_tables_with_charts",
+          "title": "Extract All Tables & Sections (Add Simple Charts)",
+          "icon": "📊",
+          "description": "Extract all tables and structured content, with optional basic Excel charts for major financial statements, but no derived calculations.",
+          "prompt": "Extract every table and section of structured data from the annual report into a multi-sheet Excel (.xlsx) file. Sheet names should match those of the tables' original titles in the report (e.g., 'Cash Flow Statement', 'Product Sales', 'Management Table 2'). For the three core statements ('Income Statement', 'Balance Sheet', 'Cash Flow Statement'), create a second sheet with the same name plus ' Chart' (e.g. 'Income Statement Chart'), placing a default bar or line chart visualizing the table's top-level rows by year (with no extra calculations or commentary—just raw data charted as-is). \n\nAll other sheet formatting rules: Header row bold, pale blue fill (RGB 217,228,240), font 11. Freeze top row. Wrap text in all columns. Add a first sheet called 'Sections Directory' with a table listing all subsequent sheet names, their corresponding report page(s), and a short summary for user navigation. No calculated fields or analytics—output is strictly direct report extraction with optional reference charts only for core statements."
+        }
+      ]
+    }
+  },
+  "metadata": {
+    "version": "1.0-simple",
+    "last_updated": "2025-07-18",
+    "description": "Intuitive and simple financial document extraction prompts: choose lossless structure-only or add basic charts—no calculations."
+  }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+agno>=1.7.4
+gradio
+google-generativeai
+PyPDF2
+Pillow
+python-dotenv
+pandas
+matplotlib
+openpyxl
+python-docx
+lxml
+markdown
+requests
+google-genai
+seaborn
+sqlalchemy
+websockets

settings.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+class Settings:
+    GOOGLE_AI_API_KEY = os.getenv("GOOGLE_AI_API_KEY")
+    MAX_FILE_SIZE_MB = 50
+    SUPPORTED_FILE_TYPES = [
+        "pdf",
+        "txt",
+        "png",
+        "jpg",
+        "jpeg",
+        "docx",
+        "xlsx",
+        "csv",
+        "md",
+        "json",
+        "xml",
+        "html",
+        "py",
+        "js",
+        "ts",
+        "doc",
+        "xls",
+        "ppt",
+        "pptx",
+    ]
+    # Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
+    TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
+    DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
+    COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
+    PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
+    DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
+    DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.5-pro")
+    CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.5-pro")
+    COORDINATOR_MODEL_THINKING_BUDGET=2048
+    PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
+    DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
+    DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
+    CODE_GENERATOR_MODEL_THINKING_BUDGET=3072
+    @classmethod
+    def validate_config(cls):
+        if not cls.GOOGLE_AI_API_KEY:
+            raise ValueError("GOOGLE_AI_API_KEY required")
+        cls.TEMP_DIR.mkdir(exist_ok=True)
+settings = Settings()

static/terminal.html ADDED Viewed

	@@ -0,0 +1,588 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Manus AI Terminal</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+            background: #0d1117;
+            color: #c9d1d9;
+            height: 100vh;
+            overflow: hidden;
+        }
+        .terminal-container {
+            display: flex;
+            flex-direction: column;
+            height: 100vh;
+            background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
+            border: 1px solid #30363d;
+        }
+        .terminal-header {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            padding: 12px 16px;
+            background: #161b22;
+            border-bottom: 1px solid #30363d;
+            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+        }
+        .terminal-title {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            font-size: 14px;
+            font-weight: 600;
+            color: #f0f6fc;
+        }
+        .terminal-icon {
+            width: 16px;
+            height: 16px;
+            background: #238636;
+            border-radius: 50%;
+            position: relative;
+        }
+        .terminal-icon::after {
+            content: '>';
+            position: absolute;
+            top: 50%;
+            left: 50%;
+            transform: translate(-50%, -50%);
+            font-size: 10px;
+            color: white;
+            font-weight: bold;
+        }
+        .terminal-controls {
+            display: flex;
+            gap: 8px;
+        }
+        .control-btn {
+            width: 12px;
+            height: 12px;
+            border-radius: 50%;
+            border: none;
+            cursor: pointer;
+            transition: opacity 0.2s;
+        }
+        .control-btn:hover {
+            opacity: 0.8;
+        }
+        .close { background: #ff5f56; }
+        .minimize { background: #ffbd2e; }
+        .maximize { background: #27ca3f; }
+        .terminal-body {
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+        }
+        .terminal-output {
+            flex: 1;
+            padding: 16px;
+            overflow-y: auto;
+            font-size: 13px;
+            line-height: 1.4;
+            background: #0d1117;
+            scrollbar-width: thin;
+            scrollbar-color: #30363d #0d1117;
+        }
+        .terminal-output::-webkit-scrollbar {
+            width: 8px;
+        }
+        .terminal-output::-webkit-scrollbar-track {
+            background: #0d1117;
+        }
+        .terminal-output::-webkit-scrollbar-thumb {
+            background: #30363d;
+            border-radius: 4px;
+        }
+        .terminal-output::-webkit-scrollbar-thumb:hover {
+            background: #484f58;
+        }
+        .terminal-line {
+            margin-bottom: 2px;
+            white-space: pre-wrap;
+            word-wrap: break-word;
+        }
+        .command-line {
+            color: #58a6ff;
+            font-weight: 600;
+        }
+        .output-line {
+            color: #c9d1d9;
+        }
+        .error-line {
+            color: #f85149;
+        }
+        .success-line {
+            color: #56d364;
+        }
+        .system-line {
+            color: #ffa657;
+            font-style: italic;
+        }
+        .timestamp {
+            color: #7d8590;
+            font-size: 11px;
+            margin-right: 8px;
+        }
+        .terminal-input {
+            display: flex;
+            align-items: center;
+            padding: 12px 16px;
+            background: #161b22;
+            border-top: 1px solid #30363d;
+        }
+        .prompt {
+            color: #58a6ff;
+            margin-right: 8px;
+            font-weight: 600;
+        }
+        .input-field {
+            flex: 1;
+            background: transparent;
+            border: none;
+            color: #c9d1d9;
+            font-family: inherit;
+            font-size: 13px;
+            outline: none;
+        }
+        .input-field::placeholder {
+            color: #7d8590;
+        }
+        .status-indicator {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            margin-left: 12px;
+        }
+        .status-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            background: #7d8590;
+            transition: background-color 0.3s;
+        }
+        .status-dot.connected {
+            background: #56d364;
+            box-shadow: 0 0 8px rgba(86, 211, 100, 0.5);
+        }
+        .status-dot.running {
+            background: #ffa657;
+            animation: pulse 1.5s infinite;
+        }
+        .status-dot.error {
+            background: #f85149;
+        }
+        @keyframes pulse {
+            0%, 100% { opacity: 1; }
+            50% { opacity: 0.5; }
+        }
+        .typing-indicator {
+            display: none;
+            color: #7d8590;
+            font-style: italic;
+            animation: blink 1s infinite;
+        }
+        @keyframes blink {
+            0%, 50% { opacity: 1; }
+            51%, 100% { opacity: 0; }
+        }
+        .command-history {
+            position: absolute;
+            bottom: 60px;
+            left: 16px;
+            right: 16px;
+            background: #21262d;
+            border: 1px solid #30363d;
+            border-radius: 6px;
+            max-height: 200px;
+            overflow-y: auto;
+            display: none;
+            z-index: 1000;
+        }
+        .history-item {
+            padding: 8px 12px;
+            cursor: pointer;
+            border-bottom: 1px solid #30363d;
+            transition: background-color 0.2s;
+        }
+        .history-item:hover {
+            background: #30363d;
+        }
+        .history-item:last-child {
+            border-bottom: none;
+        }
+        /* Responsive design */
+        @media (max-width: 768px) {
+            .terminal-header {
+                padding: 8px 12px;
+            }
+            .terminal-output {
+                padding: 12px;
+                font-size: 12px;
+            }
+            .terminal-input {
+                padding: 8px 12px;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="terminal-container">
+        <div class="terminal-header">
+            <div class="terminal-title">
+                <div class="terminal-icon"></div>
+                <span>Manus AI Terminal</span>
+            </div>
+            <div class="terminal-controls">
+                <button class="control-btn close" onclick="closeTerminal()"></button>
+                <button class="control-btn minimize" onclick="minimizeTerminal()"></button>
+                <button class="control-btn maximize" onclick="maximizeTerminal()"></button>
+            </div>
+        </div>
+        <div class="terminal-body">
+            <div class="terminal-output" id="output"></div>
+            <div class="command-history" id="history"></div>
+            <div class="terminal-input">
+                <span class="prompt">$</span>
+                <input type="text" class="input-field" id="commandInput"
+                       placeholder="Type a command and press Enter..."
+                       autocomplete="off" spellcheck="false">
+                <div class="status-indicator">
+                    <div class="status-dot" id="statusDot"></div>
+                    <span id="statusText">Disconnected</span>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        class ManusTerminal {
+            constructor() {
+                this.ws = null;
+                this.output = document.getElementById('output');
+                this.input = document.getElementById('commandInput');
+                this.statusDot = document.getElementById('statusDot');
+                this.statusText = document.getElementById('statusText');
+                this.history = document.getElementById('history');
+                this.commandHistory = [];
+                this.historyIndex = -1;
+                this.isConnected = false;
+                this.isRunning = false;
+                this.init();
+            }
+            init() {
+                this.setupEventListeners();
+                this.connect();
+                this.addWelcomeMessage();
+            }
+            setupEventListeners() {
+                this.input.addEventListener('keydown', (e) => this.handleKeyDown(e));
+                this.input.addEventListener('keyup', (e) => this.handleKeyUp(e));
+                // Auto-reconnect on window focus
+                window.addEventListener('focus', () => {
+                    if (!this.isConnected) {
+                        this.connect();
+                    }
+                });
+            }
+            connect() {
+                try {
+                    this.ws = new WebSocket('ws://localhost:8765');
+                    this.ws.onopen = () => {
+                        this.isConnected = true;
+                        this.updateStatus('connected', 'Connected');
+                        this.addSystemMessage('🚀 Connected to terminal server');
+                    };
+                    this.ws.onmessage = (event) => {
+                        const data = JSON.parse(event.data);
+                        this.handleMessage(data);
+                    };
+                    this.ws.onclose = () => {
+                        this.isConnected = false;
+                        this.isRunning = false;
+                        this.updateStatus('error', 'Disconnected');
+                        this.addSystemMessage('❌ Connection lost. Attempting to reconnect...');
+                        // Auto-reconnect after 3 seconds
+                        setTimeout(() => this.connect(), 3000);
+                    };
+                    this.ws.onerror = (error) => {
+                        this.addSystemMessage('⚠️ Connection error. Check if the server is running.');
+                    };
+                } catch (error) {
+                    this.addSystemMessage('❌ Failed to connect to terminal server');
+                }
+            }
+            handleMessage(data) {
+                const timestamp = new Date(data.timestamp).toLocaleTimeString();
+                switch (data.type) {
+                    case 'connected':
+                        this.addSystemMessage(data.message);
+                        break;
+                    case 'command_start':
+                        this.isRunning = true;
+                        this.updateStatus('running', 'Running');
+                        this.addCommandLine(data.message);
+                        break;
+                    case 'output':
+                        this.addOutputLine(data.data, data.stream);
+                        break;
+                    case 'command_complete':
+                        this.isRunning = false;
+                        this.updateStatus('connected', 'Connected');
+                        this.addSystemMessage(`Process completed with exit code ${data.exit_code}`);
+                        break;
+                    case 'error':
+                        this.addErrorLine(data.data);
+                        break;
+                    case 'interrupted':
+                        this.isRunning = false;
+                        this.updateStatus('connected', 'Connected');
+                        this.addSystemMessage(data.message);
+                        break;
+                }
+            }
+            handleKeyDown(e) {
+                switch (e.key) {
+                    case 'Enter':
+                        e.preventDefault();
+                        this.executeCommand();
+                        break;
+                    case 'ArrowUp':
+                        e.preventDefault();
+                        this.navigateHistory(-1);
+                        break;
+                    case 'ArrowDown':
+                        e.preventDefault();
+                        this.navigateHistory(1);
+                        break;
+                    case 'Tab':
+                        e.preventDefault();
+                        // TODO: Implement command completion
+                        break;
+                    case 'c':
+                        if (e.ctrlKey) {
+                            e.preventDefault();
+                            this.interruptCommand();
+                        }
+                        break;
+                }
+            }
+            handleKeyUp(e) {
+                // Show typing indicator
+                if (e.target.value.length > 0) {
+                    // TODO: Implement typing indicator
+                }
+            }
+            executeCommand() {
+                const command = this.input.value.trim();
+                if (!command || !this.isConnected) return;
+                // Add to history
+                if (this.commandHistory[this.commandHistory.length - 1] !== command) {
+                    this.commandHistory.push(command);
+                }
+                this.historyIndex = this.commandHistory.length;
+                // Send command
+                this.ws.send(JSON.stringify({
+                    type: 'command',
+                    command: command
+                }));
+                // Clear input
+                this.input.value = '';
+            }
+            interruptCommand() {
+                if (this.isRunning && this.isConnected) {
+                    this.ws.send(JSON.stringify({
+                        type: 'interrupt'
+                    }));
+                }
+            }
+            navigateHistory(direction) {
+                if (this.commandHistory.length === 0) return;
+                this.historyIndex += direction;
+                if (this.historyIndex < 0) {
+                    this.historyIndex = 0;
+                } else if (this.historyIndex >= this.commandHistory.length) {
+                    this.historyIndex = this.commandHistory.length;
+                    this.input.value = '';
+                    return;
+                }
+                this.input.value = this.commandHistory[this.historyIndex] || '';
+            }
+            updateStatus(status, text) {
+                this.statusDot.className = `status-dot ${status}`;
+                this.statusText.textContent = text;
+            }
+            addWelcomeMessage() {
+                this.addSystemMessage('🎯 Manus AI Terminal - Ready for commands');
+                this.addSystemMessage('💡 Use Ctrl+C to interrupt running commands');
+                this.addSystemMessage('📚 Use ↑/↓ arrows to navigate command history');
+            }
+            addCommandLine(text) {
+                this.addLine(text, 'command-line');
+            }
+            addOutputLine(text, stream = 'stdout') {
+                const className = stream === 'stderr' ? 'error-line' : 'output-line';
+                this.addLine(text, className);
+            }
+            addErrorLine(text) {
+                this.addLine(text, 'error-line');
+            }
+            addSystemMessage(text) {
+                this.addLine(text, 'system-line');
+            }
+            addLine(text, className = 'output-line') {
+                const line = document.createElement('div');
+                line.className = `terminal-line ${className}`;
+                const timestamp = document.createElement('span');
+                timestamp.className = 'timestamp';
+                timestamp.textContent = new Date().toLocaleTimeString();
+                const content = document.createElement('span');
+                content.textContent = text;
+                line.appendChild(timestamp);
+                line.appendChild(content);
+                this.output.appendChild(line);
+                this.scrollToBottom();
+            }
+            scrollToBottom() {
+                this.output.scrollTop = this.output.scrollHeight;
+            }
+            clear() {
+                this.output.innerHTML = '';
+                this.addWelcomeMessage();
+            }
+        }
+        // Terminal control functions
+        function closeTerminal() {
+            if (confirm('Are you sure you want to close the terminal?')) {
+                window.close();
+            }
+        }
+        function minimizeTerminal() {
+            // Implement minimize functionality
+            console.log('Minimize terminal');
+        }
+        function maximizeTerminal() {
+            if (document.fullscreenElement) {
+                document.exitFullscreen();
+            } else {
+                document.documentElement.requestFullscreen();
+            }
+        }
+        // Initialize terminal when page loads
+        document.addEventListener('DOMContentLoaded', () => {
+            window.terminal = new ManusTerminal();
+        });
+        // Add global commands
+        window.addEventListener('keydown', (e) => {
+            if (e.ctrlKey && e.key === 'l') {
+                e.preventDefault();
+                window.terminal.clear();
+            }
+        });
+    </script>
+</body>
+</html>

terminal_stream.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import asyncio
+import json
+import websockets
+from typing import Dict, Any, Set
+import subprocess
+import shlex
+from queue import Queue
+import threading
+import time
+import logging
+from datetime import datetime
+logger = logging.getLogger(__name__)
+class TerminalStreamManager:
+    """Manages real-time terminal streaming with WebSocket connections."""
+    def __init__(self):
+        self.clients: Set[websockets.WebSocketServerProtocol] = set()
+        self.command_queue = Queue()
+        self.is_running = False
+        self.current_process = None
+    async def register_client(self, websocket):
+        """Register a new WebSocket client."""
+        self.clients.add(websocket)
+        await websocket.send(json.dumps({
+            'type': 'connected',
+            'message': '🚀 Terminal connected successfully',
+            'timestamp': datetime.now().isoformat()
+        }))
+        logger.info(f"Terminal client connected. Total clients: {len(self.clients)}")
+    async def unregister_client(self, websocket):
+        """Unregister a WebSocket client."""
+        self.clients.discard(websocket)
+        logger.info(f"Terminal client disconnected. Total clients: {len(self.clients)}")
+    async def broadcast(self, message: Dict[str, Any]):
+        """Broadcast message to all connected clients."""
+        if self.clients:
+            disconnected = set()
+            message['timestamp'] = datetime.now().isoformat()
+            for client in self.clients:
+                try:
+                    await client.send(json.dumps(message))
+                except websockets.exceptions.ConnectionClosed:
+                    disconnected.add(client)
+                except Exception as e:
+                    logger.error(f"Error broadcasting to client: {e}")
+                    disconnected.add(client)
+            # Clean up disconnected clients
+            for client in disconnected:
+                self.clients.discard(client)
+    async def execute_command(self, command: str):
+        """Execute a command and stream output in real-time."""
+        await self.broadcast({
+            'type': 'command_start',
+            'command': command,
+            'message': f'$ {command}'
+        })
+        try:
+            # Security: Use shell=False and sanitize input
+            safe_command = shlex.split(command)
+            self.current_process = subprocess.Popen(
+                safe_command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                bufsize=1,
+                universal_newlines=True
+            )
+            # Stream output in real-time
+            while True:
+                # Check if process is still running
+                if self.current_process.poll() is not None:
+                    # Process finished, read remaining output
+                    remaining_stdout = self.current_process.stdout.read()
+                    remaining_stderr = self.current_process.stderr.read()
+                    if remaining_stdout:
+                        await self.broadcast({
+                            'type': 'output',
+                            'data': remaining_stdout,
+                            'stream': 'stdout'
+                        })
+                    if remaining_stderr:
+                        await self.broadcast({
+                            'type': 'output',
+                            'data': remaining_stderr,
+                            'stream': 'stderr'
+                        })
+                    break
+                # Read available output
+                try:
+                    # Use select or polling for non-blocking read
+                    import select
+                    ready, _, _ = select.select([self.current_process.stdout, self.current_process.stderr], [], [], 0.1)
+                    for stream in ready:
+                        if stream == self.current_process.stdout:
+                            line = stream.readline()
+                            if line:
+                                await self.broadcast({
+                                    'type': 'output',
+                                    'data': line,
+                                    'stream': 'stdout'
+                                })
+                        elif stream == self.current_process.stderr:
+                            line = stream.readline()
+                            if line:
+                                await self.broadcast({
+                                    'type': 'output',
+                                    'data': line,
+                                    'stream': 'stderr'
+                                })
+                except:
+                    # Fallback for systems without select
+                    await asyncio.sleep(0.1)
+            # Send completion message
+            await self.broadcast({
+                'type': 'command_complete',
+                'exit_code': self.current_process.returncode,
+                'message': f'Process exited with code {self.current_process.returncode}'
+            })
+        except Exception as e:
+            await self.broadcast({
+                'type': 'error',
+                'data': str(e),
+                'stream': 'system'
+            })
+        finally:
+            self.current_process = None
+    async def handle_client(self, websocket, path):
+        """Handle WebSocket client connections."""
+        await self.register_client(websocket)
+        try:
+            async for message in websocket:
+                try:
+                    data = json.loads(message)
+                    if data.get('type') == 'command':
+                        command = data.get('command', '').strip()
+                        if command:
+                            await self.execute_command(command)
+                    elif data.get('type') == 'interrupt':
+                        if self.current_process:
+                            self.current_process.terminate()
+                            await self.broadcast({
+                                'type': 'interrupted',
+                                'message': 'Process interrupted by user'
+                            })
+                except json.JSONDecodeError:
+                    await websocket.send(json.dumps({
+                        'type': 'error',
+                        'message': 'Invalid JSON message'
+                    }))
+        except websockets.exceptions.ConnectionClosed:
+            pass
+        finally:
+            await self.unregister_client(websocket)
+# Global terminal manager instance
+terminal_manager = TerminalStreamManager()
+async def start_websocket_server(host='localhost', port=8765):
+    """Start the WebSocket server for terminal streaming."""
+    logger.info(f"Starting terminal WebSocket server on {host}:{port}")
+    async def handler(websocket, path):
+        await terminal_manager.handle_client(websocket, path)
+    return await websockets.serve(handler, host, port)
+def run_websocket_server():
+    """Run WebSocket server in a separate thread."""
+    def start_server():
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            server = loop.run_until_complete(start_websocket_server())
+            logger.info("Terminal WebSocket server started successfully")
+            loop.run_forever()
+        except Exception as e:
+            logger.error(f"Error starting WebSocket server: {e}")
+    thread = threading.Thread(target=start_server, daemon=True)
+    thread.start()
+    return thread

utils/__init__.py ADDED Viewed

File without changes

utils/file_handler.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from pathlib import Path
+from PIL import Image
+import PyPDF2
+from config.settings import settings
+from typing import Dict
+import tempfile
+import os
+class FileHandler:
+    def __init__(self):
+        self.temp_dir = Path(settings.TEMP_DIR)
+        self.max_size_mb = settings.MAX_FILE_SIZE_MB
+    def validate_file(self, uploaded_file) -> Dict:
+        validation = {"valid": False, "error": None, "file_info": None}
+        if not uploaded_file:
+            validation["error"] = "No file"
+            return validation
+        file_size_mb = len(uploaded_file.getbuffer()) / (1024 * 1024)
+        if file_size_mb > self.max_size_mb:
+            validation["error"] = "File too large"
+            return validation
+        file_extension = uploaded_file.name.split('.')[-1].lower()
+        if file_extension not in settings.SUPPORTED_FILE_TYPES:
+            validation["error"] = "Unsupported type"
+            return validation
+        validation["valid"] = True
+        # Extract just filename for display (uploaded_file.name contains full Gradio temp path)
+        import os
+        filename = os.path.basename(uploaded_file.name)
+        validation["file_info"] = {"name": filename, "size_mb": file_size_mb, "type": file_extension}
+        return validation
+    def save_uploaded_file(self, uploaded_file, session_id: str) -> str:
+        # Handle None session_id gracefully
+        if not session_id:
+            import uuid
+            session_id = str(uuid.uuid4())[:8]
+        # Create session directory in temp
+        session_dir = self.temp_dir / session_id / "input"
+        session_dir.mkdir(parents=True, exist_ok=True)
+        # Extract just the filename from the full path (uploaded_file.name contains full Gradio temp path)
+        import os
+        import logging
+        logger = logging.getLogger(__name__)
+        filename = os.path.basename(uploaded_file.name)
+        file_path = session_dir / filename
+        logger.info(f"Moving file from Gradio temp: {uploaded_file.name}")
+        logger.info(f"To session directory: {file_path}")
+        with open(file_path, "wb") as f:
+            # Handle different types of file upload objects
+            if hasattr(uploaded_file, 'getbuffer'):
+                f.write(uploaded_file.getbuffer())
+            elif hasattr(uploaded_file, 'read'):
+                f.write(uploaded_file.read())
+            else:
+                # For NamedString or similar objects, read from the file path
+                with open(uploaded_file.name, 'rb') as src:  # Use uploaded_file.name (Gradio temp path) to read
+                    f.write(src.read())
+        return str(file_path)
+    def get_file_preview(self, file_path: str, file_type: str) -> str:
+        if file_type == 'pdf':
+            try:
+                with open(file_path, 'rb') as file:
+                    reader = PyPDF2.PdfReader(file)
+                    if len(reader.pages) > 0:
+                        text = reader.pages[0].extract_text()
+                        return text[:500] + "..." if len(text) > 500 else text
+            except Exception:
+                return "PDF preview not available"
+        elif file_type == 'txt':
+            try:
+                with open(file_path, 'r', encoding='utf-8') as file:
+                    text = file.read()
+                    return text[:500] + "..." if len(text) > 500 else text
+            except Exception:
+                return "Text preview not available"
+        # Similar for image types could be added
+        return "Preview not available"
+    def cleanup_temp_files(self):
+        """Clean up old temporary files."""
+        try:
+            import time
+            current_time = time.time()
+            # Clean up sessions older than 24 hours
+            for session_dir in self.temp_dir.iterdir():
+                if session_dir.is_dir():
+                    # Check if directory is older than 24 hours
+                    dir_age = current_time - session_dir.stat().st_mtime
+                    if dir_age > 24 * 3600:  # 24 hours in seconds
+                        import shutil
+                        shutil.rmtree(session_dir)
+        except Exception:
+            pass  # Ignore cleanup errors

utils/logger.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import logging
+from datetime import datetime
+from pathlib import Path
+class AgentLogger:
+    def __init__(self, log_dir="logs"):
+        self.log_dir = Path(log_dir)
+        self.log_dir.mkdir(exist_ok=True)
+        self.logger = logging.getLogger("agent_logger")
+        self.logger.setLevel(logging.DEBUG)
+        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)
+        console_handler.setFormatter(formatter)
+        file_handler = logging.FileHandler(self.log_dir / f"agents_{datetime.now().strftime('%Y%m%d')}.log")
+        file_handler.setLevel(logging.DEBUG)
+        file_handler.setFormatter(formatter)
+        self.logger.addHandler(console_handler)
+        self.logger.addHandler(file_handler)
+    def log_workflow_step(self, agent_name, message):
+        self.logger.info(f"{agent_name}: {message}")
+    def log_agent_output(self, agent_name, output, method, duration):
+        self.logger.debug(f"{agent_name} {method} output: {output} ({duration}s)")
+    def log_inter_agent_pass(self, from_agent, to_agent, data_size):
+        self.logger.info(f"🔗 PASS: {from_agent} → {to_agent} | Size: {data_size}")
+agent_logger = AgentLogger()

workflow/__init__.py ADDED Viewed

File without changes

workflow/financial_workflow.py ADDED Viewed

	@@ -0,0 +1,505 @@

+"""
+Financial Document Analysis Workflow using Agno Workflows
+Clean, pure-python implementation with structured outputs to avoid JSON parsing issues
+"""
+import json
+from pathlib import Path
+from typing import Dict, List, Optional, Iterator
+from pydantic import BaseModel, Field
+from agno.agent import Agent, RunResponse
+from agno.models.google import Gemini
+from agno.media import File
+from agno.tools.file import FileTools
+from agno.tools.python import PythonTools
+from agno.workflow import Workflow
+from agno.utils.log import logger
+from agno.tools.shell import ShellTools
+from config.settings import settings
+# Structured Output Models to avoid JSON parsing issues
+class DataPoint(BaseModel):
+    """Individual financial data point"""
+    field_name: str = Field(..., description="Name of the financial data field")
+    value: str = Field(..., description="Value of the field")
+    category: str = Field(..., description="Financial category (revenue, expenses, assets, etc.)")
+    period: str = Field(default="", description="Time period if applicable")
+    unit: str = Field(default="", description="Currency or measurement unit")
+    confidence: float = Field(default=0.9, description="Confidence score 0-1")
+class ExtractedFinancialData(BaseModel):
+    """Structured output for data extraction phase"""
+    company_name: str = Field(default="", description="Company name")
+    document_type: str = Field(..., description="Type of financial document")
+    reporting_period: str = Field(default="", description="Reporting period")
+    data_points: List[DataPoint] = Field(..., description="All extracted financial data points")
+    summary: str = Field(..., description="Brief summary of extracted data")
+class FinancialCategory(BaseModel):
+    """A category of organized financial data"""
+    category_name: str = Field(..., description="Name of the financial category")
+    description: str = Field(..., description="Description of what this category contains")
+    data_items: Dict[str, str] = Field(..., description="Key-value pairs of financial data")
+    totals: Dict[str, str] = Field(default_factory=dict, description="Any calculated totals")
+class ArrangedFinancialData(BaseModel):
+    """Structured output for data arrangement phase"""
+    categories: List[FinancialCategory] = Field(..., description="Organized financial categories")
+    key_metrics: Dict[str, str] = Field(default_factory=dict, description="Key financial metrics")
+    insights: List[str] = Field(default_factory=list, description="Financial insights and analysis")
+    summary: str = Field(..., description="Summary of arranged data")
+class GeneratedCode(BaseModel):
+    """Structured output for code generation phase"""
+    code: str = Field(..., description="Generated Python code for Excel creation")
+    description: str = Field(..., description="Description of what the code does")
+    output_filename: str = Field(..., description="Expected output filename")
+    execution_notes: str = Field(default="", description="Notes about code execution")
+class FinancialDocumentWorkflow(Workflow):
+    """
+    Pure Python workflow for financial document analysis
+    Uses structured outputs to eliminate JSON parsing issues
+    """
+    description: str = "Financial document analysis workflow with data extraction, organization, and Excel generation"
+    # Data Extractor Agent - Structured output eliminates JSON parsing issues
+    data_extractor: Agent = Agent(
+        model=Gemini(id=settings.DATA_EXTRACTOR_MODEL,thinking_budget=settings.DATA_EXTRACTOR_MODEL_THINKING_BUDGET,api_key=settings.GOOGLE_AI_API_KEY),
+        description="Expert financial data extraction specialist",
+        instructions=[
+                "Extract comprehensive financial data from documents with these priorities:",
+                "Identify and classify the document type: Income Statement, Balance Sheet, Cash Flow Statement, 10-K, 10-Q, Annual Report, Quarterly/Interim Report, Prospectus, Earnings Release, Proxy Statement, Investor Presentation, Press Release, or other",
+                "Extract report version: audited, unaudited, restated, pro forma",
+                "Capture language, country/jurisdiction, and file format (PDF, XLSX, HTML, etc.)",
+                "Extract company name and unique identifiers: LEI, CIK, ISIN, Ticker",
+                "Extract reporting entity: consolidated, subsidiary, segment",
+                "Extract fiscal year and period covered (start and end dates)",
+                "Extract all reporting, publication, and filing dates",
+                "Extract currency and any currency translation notes",
+                "Extract auditors name, if present",
+                "Identify financial statement presentation style: single-step, multi-step, consolidated, segmental",
+                "Capture table and note references for each data point",
+                "Extract total revenue/net sales (with by-product/service, segment, and geography breakdowns if disclosed)",
+                "Extract COGS or cost of sales",
+                "Extract gross profit and gross margin",
+                "Extract operating expenses: R&D, SG&A, advertising, depreciation, amortization",
+                "Extract operating income (EBIT) and EBIT margin",
+                "Extract non-operating items: interest income/expense, other income/expenses",
+                "Extract pretax income, income tax expense, and net income (with breakdowns: continuing, discontinued ops)",
+                "Extract basic and diluted EPS",
+                "Extract comprehensive and other comprehensive income items",
+                "Extract YoY and sequential income comparisons (if available)",
+                "Extract current assets: cash and equivalents, marketable securities, accounts receivable (gross/net), inventory (raw, WIP, finished), prepaid expenses, other",
+                "Extract non-current assets: PP&E (gross/net), intangible assets, goodwill, LT investments, deferred tax assets, right-of-use assets, other",
+                "Extract current liabilities: accounts payable, accrued expenses, short-term debt, lease liabilities, taxes payable, other",
+                "Extract non-current liabilities: long-term debt, deferred tax liabilities, pensions, lease obligations, other",
+                "Extract total shareholders equity: common/ordinary stock, retained earnings, additional paid-in capital, treasury stock, accumulated OCI, minority interest",
+                "Extract book value per share",
+                "Extract cash flows: net cash from operating, investing, and financing activities",
+                "Extract key cash flow line items: net cash from ops, capex, acquisitions/disposals, dividends, share buybacks, debt activities",
+                "Extract non-cash adjustments: depreciation, amortization, SBC, deferred taxes, impairments, gain/loss on sale",
+                "Extract profitability ratios: gross margin, operating margin, net margin, EBITDA margin",
+                "Extract return ratios: ROE, ROA, ROIC",
+                "Extract liquidity/solvency: current ratio, quick ratio, debt/equity, interest coverage",
+                "Extract efficiency: asset turnover, inventory turnover, receivables turnover",
+                "Extract per-share metrics: EPS (basic/diluted), BVPS, FCF per share",
+                "Extract segmental/geographical/operational ratios and breakdowns",
+                "Extract shares outstanding, share class details, voting rights",
+                "Extract dividends declared/paid (amount, dates)",
+                "Extract buyback authorization/utilization details",
+                "Extract employee count (average, period-end)",
+                "Extract store/branch/office count",
+                "Extract customer/user/subscriber numbers (active/paying, ARPU, churn, MAU/DAU)",
+                "Extract units shipped/sold, production volumes, operational stats",
+                "Extract key management guidance/forecasts if present",
+                "Extract risk factors, uncertainties, and forward-looking statements",
+                "Extract ESG/sustainability data where available (emissions, board diversity, etc.)",
+                "Flag any restatements, adjustments, or one-off items",
+                "Highlight material non-recurring, extraordinary, or unusual items (gains/losses, litigation, impairments, restructuring)",
+                "Identify related-party transactions and accounting policy changes",
+                "For each data point, provide a confidence score (0–1) based on clarity and documentation",
+                "Include table/note reference numbers where possible",
+                "Note any ambiguity or extraction limitations for specific data",
+                "List all units, scales (millions, thousands), and any conversion performed",
+                "Normalize date and currency formats across extracted data",
+                "Validate calculations (e.g., assets = liabilities + equity), and flag inconsistencies",
+                "Return data in a structured format (JSON/table), with reference and confidence annotation"
+            ],
+        response_model=ExtractedFinancialData,
+        structured_outputs=True,
+        debug_mode=True,
+    )
+    # Data Arranger Agent - Organizes data into categories for Excel
+    data_arranger: Agent = Agent(
+        model=Gemini(id=settings.DATA_ARRANGER_MODEL,thinking_budget=settings.DATA_ARRANGER_MODEL_THINKING_BUDGET,api_key=settings.GOOGLE_AI_API_KEY),
+        description="Financial data organization and analysis expert",
+        instructions=[
+            'Organize the extracted financial data into logical categories based on financial statement types (Income Statement, Balance Sheet, Cash Flow Statement, etc.).',
+            'Group related financial items together (e.g., all revenue items, all expense items, all asset items).',
+            'Ensure each category has a clear, descriptive name that would work as an Excel worksheet tab.',
+            'Always add appropriate headers for Excel templates including: Years (e.g., 2021, 2022, 2023, 2024), Company names or entity identifiers, Financial line item names, and Units of measurement (e.g., "in millions", "in thousands").',
+            'Create column headers that clearly identify what each data column represents.',
+            'Include row headers that clearly identify each financial line item.',
+            'Design categories suitable for comprehensive Excel worksheets, such as: Income Statement Data, Balance Sheet Data, Cash Flow Data, Key Metrics, and Company Information.',
+            'Maintain data integrity - do not modify, calculate, or analyze the original data values.',
+            'Preserve original data formats and units.',
+            'Ensure data is organized in a tabular format suitable for Excel import.',
+            'Include metadata about data sources and reporting periods where available.',
+            'Package everything into a JSON object with the fields: categories (object containing organized data by category), headers (object containing appropriate headers for each category), and metadata (object containing information about data sources, periods, and units).',
+            'Never perform any analysis on the data.',
+            'Do not calculate ratios, growth rates, or trends.',
+            'Do not provide insights or interpretations.',
+            'Do not modify the actual data values.',
+            'Focus solely on organization and proper formatting.',
+            'Save this JSON as \'arranged_financial_data.json\' using the save_file tool.',
+            'Run list_files to verify that the file now exists in the working directory.',
+            'Use read_file to ensure the JSON content was written correctly.',
+            'If the file is missing or the content is incorrect, debug, re-save, and repeat steps',
+            'Only report success after the files presence and validity are fully confirmed.'
+        ],
+        tools=[FileTools()],  # FileTools for saving arranged data
+        # NOTE: Cannot use structured_outputs with tools in Gemini - choosing tools over structured outputs
+        markdown=True,
+        debug_mode=True,
+        add_memory_references=True,
+        add_session_summary_references=True,
+        exponential_backoff=True,
+        retries=10,
+    )
+    # Code Generator Agent - Creates Excel generation code
+    code_generator = Agent(
+        model=Gemini(
+            id=settings.CODE_GENERATOR_MODEL,
+            thinking_budget=settings.CODE_GENERATOR_MODEL_THINKING_BUDGET,
+            api_key=settings.GOOGLE_AI_API_KEY
+        ),
+        description="Excel report generator that analyzes JSON data and creates formatted workbooks using shell execution on any OS",
+        goal="Generate a professional Excel report from arranged_financial_data.json with multiple worksheets, formatting, and charts",
+        instructions=[
+            "EXECUTION RULE: Always use run_shell_command() for Python execution. Never use save_to_file_and_run().",
+            "",
+            "CRITICAL: Always read the file to understand the struction of the JSON First"
+            "FIRST, use read_file tool to load 'arranged_financial_data.json'.",
+            "SECOND, analyze its structure deeply. Identify all keys, data types, nested structures, and any inconsistencies.",
+            "THIRD, create analysis.py to programmatically examine the JSON. Execute using run_shell_command().",
+            "FOURTH, based on  the analysis, design your Excel structure. Plan worksheets, formatting, and charts needed.",
+            "FIFTH, implement generate_excel_report.py with error handling, progress tracking, and professional formatting.",
+            "",
+            "CRITICAL: Always start Python scripts with:",
+            "import os",
+            "os.chdir(os.path.dirname(os.path.abspath(__file__)) or '.')",
+            "This ensures the script runs in the correct directory regardless of OS.",
+            "",
+            "Available Tools:",
+            "- FileTools: read_file, save_file, list_files",
+            "- PythonTools: pip_install_package (ONLY for package installation)",
+            "- ShellTools: run_shell_command (PRIMARY execution tool)",
+            "",
+            "Cross-Platform Execution:",
+            "- Try: run_shell_command('python script.py 2>&1')",
+            "- If fails on Windows: run_shell_command('python.exe script.py 2>&1')",
+            "- PowerShell alternative: run_shell_command('powershell -Command \"python script.py\" 2>&1')",
+            "",
+            "Verification Commands (Linux/Mac):",
+            "- run_shell_command('ls -la *.xlsx')",
+            "- run_shell_command('file Financial_Report*.xlsx')",
+            "- run_shell_command('du -h *.xlsx')",
+            "",
+            "Verification Commands (Windows/PowerShell):",
+            "- run_shell_command('dir *.xlsx')",
+            "- run_shell_command('powershell -Command \"Get-ChildItem *.xlsx\"')",
+            "- run_shell_command('powershell -Command \"(Get-Item *.xlsx).Length\"')",
+            "",
+            "Debug Commands (Cross-Platform):",
+            "- Current directory: run_shell_command('pwd') or run_shell_command('cd')",
+            "- Python location: run_shell_command('where python') or run_shell_command('which python')",
+            "- List files: run_shell_command('dir') or run_shell_command('ls')",
+            "",
+            "Package Installation:",
+            "- pip_install_package('openpyxl')",
+            "- Or via shell: run_shell_command('pip install openpyxl')",
+            "- Windows: run_shell_command('python -m pip install openpyxl')",
+            "",
+            "Success Criteria: Excel file exists, size >5KB, no errors in output."
+        ],
+        expected_output="A Financial_Report_YYYYMMDD_HHMMSS.xlsx file containing formatted data from the JSON with multiple worksheets, professional styling, and relevant charts",
+        additional_context="This agent must work on Windows, Mac, and Linux. Always use os.path for file operations and handle path separators correctly. Include proper error handling for cross-platform compatibility.",
+        tools=[
+            ShellTools(),
+            FileTools(save_files=True, read_files=True, list_files=True),
+            PythonTools(pip_install=True, save_and_run=False, run_code=False)
+        ],
+        markdown=True,
+        show_tool_calls=True,
+        debug_mode=True,
+        retries=10,
+        add_datetime_to_instructions=True,
+        delay_between_retries=10
+            )
+    def __init__(self, session_id: str = None, **kwargs):
+        super().__init__(session_id=session_id, **kwargs)
+        self.session_id = session_id or f"financial_workflow_{int(__import__('time').time())}"
+        self.session_output_dir = Path(settings.TEMP_DIR) / self.session_id / "output"
+        self.session_output_dir.mkdir(parents=True, exist_ok=True)
+        # Configure tools with correct base directories after initialization
+        self._configure_agent_tools()
+        logger.info(f"FinancialDocumentWorkflow initialized with session: {self.session_id}")
+    def _configure_agent_tools(self):
+        """Configure agent tools with the correct base directories"""
+        # Configure data arranger's FileTools with session output directory
+        if hasattr(self.data_arranger, 'tools') and self.data_arranger.tools:
+            for tool in self.data_arranger.tools:
+                if isinstance(tool, FileTools):
+                    tool.base_dir = self.session_output_dir
+        # Configure code generator's tools with session output directory
+        if hasattr(self.code_generator, 'tools') and self.code_generator.tools:
+            for tool in self.code_generator.tools:
+                if isinstance(tool, FileTools):
+                    tool.base_dir = self.session_output_dir
+                elif isinstance(tool, PythonTools):
+                    tool.base_dir = self.session_output_dir
+    def run(self, file_path: str, use_cache: bool = True) -> RunResponse:
+        """
+        Pure Python workflow execution - no streaming, no JSON parsing issues
+        """
+        logger.info(f"Processing financial document: {file_path}")
+        # Check cache first if enabled
+        if use_cache and "final_results" in self.session_state:
+            logger.info("Returning cached results")
+            return RunResponse(
+                run_id=self.run_id,
+                content=self.session_state["final_results"]
+            )
+        try:
+            # Step 1: Extract Financial Data
+            logger.info("Step 1: Extracting financial data...")
+            # Check for cached extraction
+            if use_cache and "extracted_data" in self.session_state:
+                extracted_data = ExtractedFinancialData.model_validate(
+                    self.session_state["extracted_data"]
+                )
+                logger.info("Using cached extraction data")
+            else:
+                document = File(filepath=file_path)
+                extraction_prompt = f"""
+                Analyze this financial document and extract all relevant financial data points.
+                Focus on:
+                - Company identification, including company name, entity identifiers (e.g., Ticker, CIK, ISIN, LEI), and reporting entity type (consolidated/subsidiary/segment).
+                - All reporting period information: fiscal year, period start and end dates, reporting date, publication date, and currency used.
+                - Revenue data: total revenue/net sales, breakdown by product/service, segment, and geography if available, and year-over-year growth rates.
+                - Expense data: COGS, operating expenses (R&D, SG&A, advertising, depreciation/amortization), interest expenses, taxes, and any non-operating items.
+                - Profit data: gross profit, operating income (EBIT/EBITDA), pretax profit, net income, basic and diluted earnings per share (EPS), comprehensive income.
+                - Balance sheet items: current assets (cash, securities, receivables, inventories), non-current assets (PP&E, intangibles, goodwill), current liabilities, non-current liabilities, and all categories of shareholders’ equity.
+                - Cash flow details: cash from operations, investing, and financing; capex, dividends, buybacks; non-cash adjustments (depreciation, SBC, etc.).
+                - Financial ratios: profitability (gross margin, operating margin, net margin), return (ROE, ROA, ROIC), liquidity (current/quick ratio), leverage (debt/equity, interest coverage), efficiency (asset/inventory/receivables turnover), per-share metrics.
+                - Capital and shareholder information: shares outstanding, share class details, dividends, and buyback information.
+                - Non-financial and operational metrics: employee, store, customer/user counts, production volumes, and operational breakdowns.
+                - Extract any additional material metrics, key management guidance, risks, uncertainties, ESG indicators, or forward-looking statements.
+                - Flag/annotate any unusual or non-recurring items, restatements, or related-party transactions.
+                - For each data point, provide a confidence score (0–1) and, where possible, include reference identifiers (table/note numbers).
+                - If units or currencies differ throughout, normalize and annotate the data accordingly.
+                Return your extraction in a structured, machine-readable format with references and confidence levels for each field.
+                Document path: {file_path}
+                """
+                extraction_response: RunResponse = self.data_extractor.run(
+                    extraction_prompt,
+                    files=[document]
+                )
+                extracted_data: ExtractedFinancialData = extraction_response.content
+                # Cache the result
+                self.session_state["extracted_data"] = extracted_data.model_dump()
+                logger.info(f"Extracted {len(extracted_data.data_points)} data points")
+            # Step 2: Arrange and Organize Data
+            logger.info("Step 2: Organizing financial data...")
+            if use_cache and "arrangement_response" in self.session_state:
+                arrangement_content = self.session_state["arrangement_response"]
+                logger.info("Using cached arrangement data")
+            else:
+                arrangement_prompt = f"""
+                You are given raw, extracted financial data. Your task is to reorganize it and prepare it for Excel-based reporting.
+                ========== WHAT TO DELIVER ==========
+                • A single JSON object saved as arranged_financial_data.json
+                • Fields required: categories, headers, metadata
+                ========== HOW TO ORGANIZE ==========
+                Create distinct, Excel-ready categories (one worksheet each) for logical grouping of financial data. Examples include:
+                1. Income Statement Data
+                2. Balance Sheet Data
+                3. Cash Flow Data
+                4. Company Information / General Data
+                ========== STEP-BY-STEP ==========
+                1. Map every data point into the most appropriate category above.
+                2. For each category, identify and include all necessary headers for an Excel template, such as years, company names, financial line item names, and units of measurement (e.g., "in millions").
+                3. Ensure data integrity by not modifying, calculating, or analyzing the original data values.
+                4. Preserve original data formats and units.
+                5. Organize data in a tabular format suitable for direct Excel import.
+                6. Include metadata about data sources and reporting periods where available.
+                7. Assemble everything into the JSON schema described under “WHAT TO DELIVER.”
+                8. Save the JSON as arranged_financial_data.json via save_file.
+                9. Use list_files to confirm the file exists, then read_file to validate its content.
+                10. If the file is missing or malformed, fix the issue and repeat steps 8 – 9.
+                11. Only report success after the file passes both existence and content checks.
+                ========== IMPORTANT RESTRICTIONS ==========
+                - Never perform any analysis on the data.
+                - Do not calculate ratios, growth rates, or trends.
+                - Do not provide insights or interpretations.
+                - Do not modify the actual data values.
+                - Focus solely on organization and proper formatting.
+                Extracted Data: {extracted_data.model_dump_json(indent=2)}
+                """
+                arrangement_response: RunResponse = self.data_arranger.run(arrangement_prompt)
+                arrangement_content = arrangement_response.content
+                # Cache the result
+                self.session_state["arrangement_response"] = arrangement_content
+                logger.info("Data organization completed - check output directory for arranged_financial_data.json")
+            # Step 3: Generate and Execute Excel Code
+            logger.info("Step 3: Generating and executing Excel code...")
+            if use_cache and "code_generation_response" in self.session_state:
+                code_generation_content = self.session_state["code_generation_response"]
+                execution_success = self.session_state.get("execution_success", False)
+                logger.info("Using cached code generation results")
+            else:
+                code_prompt = f"""
+                Your objective: Turn the organized JSON data into a polished, multi-sheet Excel report—and prove that it works.
+                ========== INPUT ==========
+                File: arranged_financial_data.json
+                Tool to read it: read_file
+                ========== WHAT THE PYTHON SCRIPT MUST DO ==========
+                1. Load arranged_financial_data.json and parse its contents.
+                2. For each category in the JSON, create a dedicated worksheet using openpyxl.
+                3. Apply professional touches:
+                • Bold, centered headers
+                • Appropriate number formats
+                • Column-width auto-sizing
+                • Borders, cell styles, and freeze panes
+                4. Insert charts (bar, line, or pie) wherever the data lends itself to visualisation.
+                5. Embed key metrics and summary notes prominently in the Executive Summary sheet.
+                6. Name the workbook: Financial_Report_<YYYYMMDD_HHMMSS>.xlsx.
+                7. Wrap every file and workbook operation in robust try/except blocks.
+                8. Log all major steps and any exceptions for easy debugging.
+                9. Save the script via save_to_file_and_run and execute it immediately.
+                10. After execution, use list_files to ensure the Excel file was created.
+                11. Optionally inspect the file (e.g., size or first bytes via read_file) to confirm it is not empty.
+                12. If the workbook is missing or corrupted, refine the code, re-save, and re-run until success.
+                ========== OUTPUT ==========
+                • A fully formatted Excel workbook in the working directory.
+                • A concise summary of what ran, any issues encountered, and confirmation that the file exists and opens without error.
+                """
+                code_response: RunResponse = self.code_generator.run(code_prompt)
+                code_generation_content = code_response.content
+                # Simple check for execution success based on response content
+                execution_success = (
+                    "error" not in code_generation_content.lower() or
+                    "success" in code_generation_content.lower() or
+                    "completed" in code_generation_content.lower()
+                )
+                # Cache the results
+                self.session_state["code_generation_response"] = code_generation_content
+                self.session_state["execution_success"] = execution_success
+                logger.info(f"Code generation and execution completed: {'✅ Success' if execution_success else '❌ Failed'}")
+            # Prepare final results
+            # List actual output files
+            output_files = []
+            if self.session_output_dir.exists():
+                output_files = [f.name for f in self.session_output_dir.iterdir() if f.is_file()]
+            results_summary = f"""
+# Financial Document Analysis Complete
+## Document Information
+- **Company**: {extracted_data.company_name or 'Not specified'}
+- **Document Type**: {extracted_data.document_type}
+- **Reporting Period**: {extracted_data.reporting_period or 'Not specified'}
+## Processing Summary
+- **Data Points Extracted**: {len(extracted_data.data_points)}
+- **Data Organization**: {'✅ Completed' if arrangement_content else '❌ Failed'}
+- **Excel Creation**: {'✅ Success' if execution_success else '❌ Failed'}
+## Data Organization Results
+{arrangement_content[:500] + '...' if arrangement_content and len(arrangement_content) > 500 else arrangement_content or 'No arrangement data available'}
+## Tool Execution Summary
+**Data Arranger**: Used FileTools to save organized data to JSON
+**Code Generator**: Used PythonTools and FileTools for Excel generation
+## Code Generation Results
+{code_generation_content[:500] + '...' if code_generation_content and len(code_generation_content) > 500 else code_generation_content or 'No code generation results available'}
+## Generated Files ({len(output_files)} files)
+{chr(10).join(f"- **{file}**" for file in output_files) if output_files else "- No files generated"}
+## Output Directory
+📁 `{self.session_output_dir}`
+---
+*Generated using Agno Workflows with FileTools and PythonTools integration*
+*Note: Due to Gemini limitations, structured outputs were used for data extraction only*
+            """
+            # Cache final results
+            self.session_state["final_results"] = results_summary
+            return RunResponse(
+                run_id=self.run_id,
+                content=results_summary
+            )
+        except Exception as e:
+            error_message = f"❌ Workflow failed: {str(e)}"
+            logger.error(f"Financial workflow error: {e}", exc_info=True)
+            return RunResponse(
+                run_id=self.run_id,
+                content=error_message
+            )
+    def get_processing_status(self) -> Dict[str, str]:
+        """Get the current processing status"""
+        status = {
+            "extraction": "completed" if "extracted_data" in self.session_state else "pending",
+            "arrangement": "completed" if "arranged_data" in self.session_state else "pending",
+            "code_generation": "completed" if "generated_code" in self.session_state else "pending",
+            "final_results": "completed" if "final_results" in self.session_state else "pending"
+        }
+        return status