methunraj commited on
Commit
cfeb3a6
·
1 Parent(s): e09cfd6

feat: initialize project structure with core components

Browse files

build: add Dockerfile and CI/CD configuration
docs: update README with installation and usage instructions
style: format code and add consistent file structure

.claude/settings.local.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(mkdir:*)",
5
+ "Bash(python test:*)",
6
+ "Bash(/usr/local/bin/python3:*)",
7
+ "Bash(ls:*)",
8
+ "Bash(rm:*)",
9
+ "Bash(python:*)",
10
+ "Bash(find:*)",
11
+ "mcp__zen__analyze",
12
+ "Bash(pkill:*)",
13
+ "Bash(touch:*)",
14
+ "Bash(docker build:*)",
15
+ "Bash(/dev/null)"
16
+ ],
17
+ "deny": []
18
+ }
19
+ }
.dockerignore ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Environment
24
+ .env
25
+ .venv
26
+ env/
27
+ venv/
28
+ ENV/
29
+ env.bak/
30
+ venv.bak/
31
+
32
+ # IDE
33
+ .vscode/
34
+ .idea/
35
+ *.swp
36
+ *.swo
37
+ *~
38
+
39
+ # OS
40
+ .DS_Store
41
+ .DS_Store?
42
+ ._*
43
+ .Spotlight-V100
44
+ .Trashes
45
+ ehthumbs.db
46
+ Thumbs.db
47
+
48
+ # Logs
49
+ *.log
50
+ logs/
51
+ app.log
52
+
53
+ # Cache
54
+ .ruff_cache/
55
+ .pytest_cache/
56
+ .coverage
57
+
58
+ # Temporary files
59
+ temp/
60
+ tmp/
61
+
62
+ # Git
63
+ .git/
64
+ .gitignore
65
+
66
+ # Documentation
67
+ README.md
68
+ *.md
69
+
70
+ # Claude
71
+ .claude/
.gitignore ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ pip-wheel-metadata/
20
+ share/python-wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+ MANIFEST
25
+
26
+ # PyInstaller
27
+ *.manifest
28
+ *.spec
29
+
30
+ # Installer logs
31
+ pip-log.txt
32
+ pip-delete-this-directory.txt
33
+
34
+ # Unit test / coverage reports
35
+ htmlcov/
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ nosetests.xml
42
+ coverage.xml
43
+ *.cover
44
+ *.py,cover
45
+ .hypothesis/
46
+ .pytest_cache/
47
+
48
+ # Translations
49
+ *.mo
50
+ *.pot
51
+
52
+ # Django stuff:
53
+ *.log
54
+ local_settings.py
55
+ db.sqlite3
56
+ db.sqlite3-journal
57
+
58
+ # Flask stuff:
59
+ instance/
60
+ .webassets-cache
61
+
62
+ # Scrapy stuff:
63
+ .scrapy
64
+
65
+ # Sphinx documentation
66
+ docs/_build/
67
+
68
+ # PyBuilder
69
+ target/
70
+
71
+ # Jupyter Notebook
72
+ .ipynb_checkpoints
73
+
74
+ # IPython
75
+ profile_default/
76
+ ipython_config.py
77
+
78
+ # pyenv
79
+ .python-version
80
+
81
+ # pipenv
82
+ Pipfile.lock
83
+
84
+ # PEP 582
85
+ __pypackages__/
86
+
87
+ # Celery stuff
88
+ celerybeat-schedule
89
+ celerybeat.pid
90
+
91
+ # SageMath parsed files
92
+ *.sage.py
93
+
94
+ # Environments
95
+ .env
96
+ .venv
97
+ env/
98
+ venv/
99
+ ENV/
100
+ env.bak/
101
+ venv.bak/
102
+ .env.production
103
+
104
+ # Spyder project settings
105
+ .spyderproject
106
+ .spyproject
107
+
108
+ # Rope project settings
109
+ .ropeproject
110
+
111
+ # mkdocs documentation
112
+ /site
113
+
114
+ # mypy
115
+ .mypy_cache/
116
+ .dmypy.json
117
+ dmypy.json
118
+
119
+ # Pyre type checker
120
+ .pyre/
121
+
122
+ # macOS
123
+ .DS_Store
124
+ .AppleDouble
125
+ .LSOverride
126
+
127
+ # Thumbnails
128
+ ._*
129
+
130
+ # Files that might appear in the root of a volume
131
+ .DocumentRevisions-V100
132
+ .fseventsd
133
+ .Spotlight-V100
134
+ .TemporaryItems
135
+ .Trashes
136
+ .VolumeIcon.icns
137
+ .com.apple.timemachine.donotpresent
138
+
139
+ # Directories potentially created on remote AFP share
140
+ .AppleDB
141
+ .AppleDesktop
142
+ Network Trash Folder
143
+ Temporary Items
144
+ .apdisk
145
+
146
+ # Application specific
147
+ logs/
148
+ temp/
149
+ *.log
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
.vercel/project.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"projectName":"trae_tl66rxeh"}
Dockerfile ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.11 slim image for better compatibility
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables for optimal Python and Gradio behavior
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV PYTHONDONTWRITEBYTECODE=1
7
+ ENV PIP_NO_CACHE_DIR=1
8
+ ENV PIP_DISABLE_PIP_VERSION_CHECK=1
9
+ ENV DEBIAN_FRONTEND=noninteractive
10
+
11
+ # Create app user for security (but run as root for HF Spaces compatibility)
12
+ RUN useradd --create-home --shell /bin/bash app
13
+
14
+ # Set the working directory inside the container
15
+ WORKDIR /app
16
+
17
+ # Install system dependencies required for multi-user AI application
18
+ RUN apt-get update && apt-get install -y --no-install-recommends \
19
+ # Build tools
20
+ build-essential \
21
+ gcc \
22
+ g++ \
23
+ make \
24
+ cmake \
25
+ pkg-config \
26
+ # Network and download tools
27
+ curl \
28
+ wget \
29
+ git \
30
+ # Development libraries
31
+ libffi-dev \
32
+ libssl-dev \
33
+ # Image processing libraries
34
+ libjpeg-dev \
35
+ libpng-dev \
36
+ libfreetype6-dev \
37
+ libtiff5-dev \
38
+ libopenjp2-7-dev \
39
+ # Document processing libraries
40
+ libxml2-dev \
41
+ libxslt1-dev \
42
+ zlib1g-dev \
43
+ # OCR and PDF processing
44
+ tesseract-ocr \
45
+ tesseract-ocr-eng \
46
+ poppler-utils \
47
+ # SQLite for session storage
48
+ sqlite3 \
49
+ libsqlite3-dev \
50
+ # Cleanup
51
+ && apt-get clean \
52
+ && rm -rf /var/lib/apt/lists/* \
53
+ && rm -rf /var/cache/apt/*
54
+
55
+ # Upgrade pip to latest version
56
+ RUN python -m pip install --upgrade pip setuptools wheel
57
+
58
+ # Set pip configuration for better performance and reliability
59
+ RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \
60
+ && pip config set global.no-cache-dir true \
61
+ && pip config set global.disable-pip-version-check true
62
+
63
+ # Copy the requirements file first to leverage Docker's build cache
64
+ COPY requirements.txt .
65
+
66
+ # Install Python dependencies with optimizations for concurrent usage
67
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt \
68
+ && pip install --no-cache-dir \
69
+ # Additional packages for multi-user support
70
+ gunicorn \
71
+ uvloop \
72
+ # Performance monitoring
73
+ psutil \
74
+ && pip list --outdated
75
+
76
+ # Copy the rest of the application code
77
+ COPY . .
78
+
79
+ # Create comprehensive directory structure for multi-user application
80
+ RUN mkdir -p \
81
+ # Core application directories
82
+ temp logs uploads downloads cache \
83
+ # Multi-user session directories (will be created dynamically)
84
+ /tmp/data_extractor_temp \
85
+ # WebSocket and terminal directories
86
+ static \
87
+ # Database directory for session storage
88
+ data \
89
+ && chmod -R 755 /app
90
+
91
+ # Set optimized permissions for multi-user concurrent access
92
+ RUN chmod -R 777 temp logs uploads downloads cache /tmp \
93
+ && chmod -R 755 static \
94
+ && chmod 755 app.py \
95
+ && chmod -R 755 config utils workflow models
96
+
97
+ # Create non-root user but keep root permissions for HF Spaces
98
+ RUN chown -R app:app /app \
99
+ && chown -R app:app /tmp/data_extractor_temp
100
+
101
+ # Set comprehensive environment variables for multi-user application
102
+ ENV PYTHONPATH=/app
103
+ ENV GRADIO_SERVER_NAME=0.0.0.0
104
+ ENV GRADIO_SERVER_PORT=7860
105
+ ENV GRADIO_SHARE=False
106
+ ENV GRADIO_DEBUG=False
107
+
108
+ # Matplotlib configuration for headless operation
109
+ ENV MPLBACKEND=Agg
110
+ ENV MPLCONFIGDIR=/tmp/mpl_cache
111
+
112
+ # Optimize for multi-user concurrent access
113
+ ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10
114
+ ENV GRADIO_MAX_THREADS=20
115
+
116
+ # WebSocket and networking configuration
117
+ ENV WEBSOCKET_HOST=0.0.0.0
118
+ ENV WEBSOCKET_PORT=8765
119
+
120
+ # Session and temporary file configuration
121
+ ENV TEMP_DIR=/tmp/data_extractor_temp
122
+ ENV SESSION_TIMEOUT=1800
123
+ ENV MAX_FILE_SIZE_MB=50
124
+
125
+ # AI model configuration (will be overridden by user env vars)
126
+ ENV COORDINATOR_MODEL=gemini-2.5-pro
127
+ ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro
128
+ ENV DATA_ARRANGER_MODEL=gemini-2.5-pro
129
+ ENV CODE_GENERATOR_MODEL=gemini-2.5-pro
130
+
131
+ # Security and performance settings
132
+ ENV PYTHONSAFEPATH=1
133
+ ENV PYTHONHASHSEED=random
134
+
135
+ # Expose the port that the Gradio application will run on
136
+ EXPOSE 7860
137
+ EXPOSE 8765
138
+
139
+ # Health check for container monitoring
140
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
141
+ CMD curl -f http://localhost:7860/ || exit 1
142
+
143
+ # Run as root for Hugging Face Spaces compatibility
144
+ USER root
145
+
146
+ # Create startup script for better error handling and logging
147
+ RUN echo '#!/bin/bash\n\
148
+ set -e\n\
149
+ echo "🚀 Starting Data Extractor Multi-User Application..."\n\
150
+ echo "📊 Python version: $(python --version)"\n\
151
+ echo "🌐 Server: 0.0.0.0:7860"\n\
152
+ echo "👥 Multi-user concurrency: Enabled"\n\
153
+ echo "🔒 Session isolation: Active"\n\
154
+ echo "💾 Temp directory: $TEMP_DIR"\n\
155
+ \n\
156
+ # Create runtime directories\n\
157
+ mkdir -p "$TEMP_DIR"\n\
158
+ mkdir -p /tmp/mpl_cache\n\
159
+ chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\
160
+ \n\
161
+ # Start the application\n\
162
+ exec python app.py\n\
163
+ ' > /app/start.sh && chmod +x /app/start.sh
164
+
165
+ # The command to run when the container starts
166
+ CMD ["/app/start.sh"]
README.md CHANGED
@@ -1,11 +1,162 @@
1
  ---
2
- title: Data Extractor Using Gemini
3
- emoji: 🐢
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
- short_description: Document processing application built with Agno v1.7.4 featu
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Agno Document Analysis
3
+ emoji: 📄
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # Agno Document Analysis Workflow
12
+
13
+ A sophisticated document processing application built with Agno v1.7.4 featuring a multi-agent workflow for intelligent document analysis and data extraction.
14
+
15
+ ## Features
16
+
17
+ - **5-Agent Workflow**: Coordinator, Prompt Engineer, Data Extractor, Data Arranger, Code Generator
18
+ - **Multi-format Support**: PDF, TXT, PNG, JPG, JPEG, DOCX, XLSX, CSV, MD, JSON, XML, HTML, PY, JS, TS, DOC, XLS, PPT, PPTX
19
+ - **Real-time Processing**: Streaming interface with live updates
20
+ - **Sandboxed Execution**: Safe code execution environment
21
+ - **Beautiful UI**: Modern Gradio interface with custom animations
22
+
23
+ ## Quick Start
24
+
25
+ ### Automated Installation
26
+
27
+ ```bash
28
+ # Clone the repository
29
+ git clone <repository-url>
30
+ cd Data_Extractor
31
+
32
+ # Quick installation (recommended)
33
+ ./install.sh
34
+
35
+ # Or use Python setup script
36
+ python setup.py
37
+ ```
38
+
39
+ ### Manual Installation
40
+
41
+ ```bash
42
+ # Create virtual environment
43
+ python -m venv data_extractor_env
44
+ source data_extractor_env/bin/activate # On Windows: data_extractor_env\Scripts\activate
45
+
46
+ # Install dependencies
47
+ pip install -r requirements.txt
48
+
49
+ # Create environment file
50
+ cp .env.example .env # Update with your API keys
51
+
52
+ # Run the application
53
+ python app.py
54
+ ```
55
+
56
+ ## Installation Options
57
+
58
+ ### Requirements Files
59
+
60
+ - **`requirements-minimal.txt`**: Essential dependencies only (~50 packages)
61
+ ```bash
62
+ pip install -r requirements-minimal.txt
63
+ ```
64
+
65
+ - **`requirements.txt`**: Complete feature set (~200+ packages)
66
+ ```bash
67
+ pip install -r requirements.txt
68
+ ```
69
+
70
+ - **`requirements-dev.txt`**: Development dependencies with testing tools
71
+ ```bash
72
+ pip install -r requirements-dev.txt
73
+ ```
74
+
75
+ ### System Dependencies
76
+
77
+ Some features require system-level dependencies:
78
+
79
+ **macOS:**
80
+ ```bash
81
+ brew install tesseract imagemagick poppler
82
+ ```
83
+
84
+ **Ubuntu/Debian:**
85
+ ```bash
86
+ sudo apt-get install tesseract-ocr libmagickwand-dev poppler-utils
87
+ ```
88
+
89
+ **Windows:**
90
+ ```bash
91
+ choco install tesseract imagemagick poppler
92
+ ```
93
+
94
+ ## Usage
95
+
96
+ 1. **Setup Environment**: Follow installation instructions above
97
+ 2. **Configure API Keys**: Update `.env` file with your API keys
98
+ 3. **Upload Document**: Support for 20+ file formats
99
+ 4. **Select Analysis**: Choose from predefined types or custom prompts
100
+ 5. **Process**: Watch the multi-agent workflow in real-time
101
+ 6. **Download Results**: Get structured data and generated Excel reports
102
+
103
+ ## Environment Variables
104
+
105
+ Create a `.env` file with the following variables:
106
+
107
+ ```bash
108
+ # Required API Keys
109
+ GOOGLE_API_KEY=your_google_api_key_here
110
+ OPENAI_API_KEY=your_openai_api_key_here # Optional
111
+
112
+ # Application Settings
113
+ DEBUG=False
114
+ LOG_LEVEL=INFO
115
+ SESSION_TIMEOUT=3600
116
+
117
+ # File Processing
118
+ MAX_FILE_SIZE=50MB
119
+ SUPPORTED_FORMATS=pdf,docx,xlsx,txt
120
+
121
+ # Database (Optional)
122
+ DATABASE_URL=sqlite:///data_extractor.db
123
+ ```
124
+
125
+ ## Advanced Features
126
+
127
+ ### Financial Document Processing
128
+ - Comprehensive financial data extraction
129
+ - 13-category data organization
130
+ - Excel report generation with charts
131
+ - XBRL and SEC filing support
132
+
133
+ ### OCR and Image Processing
134
+ - EasyOCR and PaddleOCR integration
135
+ - Tesseract OCR support
136
+ - Advanced image preprocessing
137
+
138
+ ### Machine Learning Integration
139
+ - TensorFlow and PyTorch support
140
+ - Scikit-learn for data analysis
141
+ - XGBoost and LightGBM for predictions
142
+
143
+ ## Troubleshooting
144
+
145
+ For detailed troubleshooting and installation issues, see:
146
+ - [`INSTALLATION.md`](INSTALLATION.md) - Comprehensive installation guide
147
+ - [`FIXES_SUMMARY.md`](FIXES_SUMMARY.md) - Known issues and solutions
148
+
149
+ ### Common Issues
150
+
151
+ 1. **Import Errors**: Try minimal installation first
152
+ 2. **OCR Issues**: Install system dependencies
153
+ 3. **Memory Issues**: Use smaller batch sizes
154
+ 4. **API Errors**: Verify API keys in `.env` file
155
+
156
+ ## Docker Support
157
+
158
+ ```dockerfile
159
+ # Build and run with Docker
160
+ docker build -t data-extractor .
161
+ docker run -p 7860:7860 --env-file .env data-extractor
162
+ ```
TERMINAL_README.md ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Manus AI-Style Terminal Integration
2
+
3
+ This document explains the real-time terminal streaming functionality added to the Data Extractor application.
4
+
5
+ ## 📋 Overview
6
+
7
+ The terminal integration provides a **Manus AI-style terminal interface** with real-time command execution and streaming output, seamlessly integrated into the existing Gradio application.
8
+
9
+ ## 🏗️ Architecture
10
+
11
+ ### Components
12
+
13
+ 1. **WebSocket Server** (`terminal_stream.py`)
14
+ - Handles real-time communication between frontend and backend
15
+ - Manages command execution with streaming output
16
+ - Supports multiple concurrent connections
17
+ - Runs on port 8765
18
+
19
+ 2. **Frontend Terminal** (`static/terminal.html`)
20
+ - Beautiful Manus AI-inspired terminal interface
21
+ - Real-time output streaming via WebSocket
22
+ - Command history navigation
23
+ - Keyboard shortcuts and controls
24
+
25
+ 3. **Gradio Integration** (Modified `app.py`)
26
+ - Added terminal tab to existing interface
27
+ - Embedded terminal as iframe component
28
+ - Auto-starts WebSocket server on application launch
29
+
30
+ ## 🎨 Features
31
+
32
+ ### Terminal Interface
33
+ - **Real-time Streaming**: Live command output as it happens
34
+ - **Command History**: Navigate with ↑/↓ arrow keys
35
+ - **Interrupt Support**: Ctrl+C to stop running commands
36
+ - **Auto-reconnect**: Automatically reconnects on connection loss
37
+ - **Status Indicators**: Visual connection and execution status
38
+ - **Responsive Design**: Works on desktop and mobile
39
+
40
+ ### Security
41
+ - **Command Sanitization**: Uses `shlex.split()` for safe command parsing
42
+ - **Process Isolation**: Commands run in separate processes
43
+ - **Error Handling**: Robust error handling and logging
44
+
45
+ ## 🚀 Usage
46
+
47
+ ### Starting the Application
48
+ ```bash
49
+ python app.py
50
+ ```
51
+
52
+ The terminal WebSocket server automatically starts on port 8765.
53
+
54
+ ### Accessing the Terminal
55
+ 1. Open the Gradio interface (usually http://localhost:7860)
56
+ 2. Click on the "💻 Terminal" tab
57
+ 3. Start typing commands in the terminal interface
58
+
59
+ ### Keyboard Shortcuts
60
+ - **Enter**: Execute command
61
+ - **↑/↓**: Navigate command history
62
+ - **Ctrl+C**: Interrupt running command
63
+ - **Ctrl+L**: Clear terminal screen
64
+ - **Tab**: Command completion (planned feature)
65
+
66
+ ## 🔧 Configuration
67
+
68
+ ### WebSocket Server Settings
69
+ ```python
70
+ # In terminal_stream.py
71
+ WEBSOCKET_HOST = 'localhost'
72
+ WEBSOCKET_PORT = 8765
73
+ ```
74
+
75
+ ### Terminal Appearance
76
+ Customize the terminal appearance by modifying the CSS in `static/terminal.html`:
77
+
78
+ ```css
79
+ /* Main terminal colors */
80
+ .terminal-container {
81
+ background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
82
+ }
83
+
84
+ /* Command prompt */
85
+ .prompt {
86
+ color: #58a6ff;
87
+ }
88
+ ```
89
+
90
+ ## 📡 WebSocket API
91
+
92
+ ### Client → Server Messages
93
+
94
+ #### Execute Command
95
+ ```json
96
+ {
97
+ "type": "command",
98
+ "command": "ls -la"
99
+ }
100
+ ```
101
+
102
+ #### Interrupt Command
103
+ ```json
104
+ {
105
+ "type": "interrupt"
106
+ }
107
+ ```
108
+
109
+ ### Server → Client Messages
110
+
111
+ #### Command Output
112
+ ```json
113
+ {
114
+ "type": "output",
115
+ "data": "file1.txt\nfile2.txt",
116
+ "stream": "stdout",
117
+ "timestamp": "2024-01-01T12:00:00.000Z"
118
+ }
119
+ ```
120
+
121
+ #### Command Completion
122
+ ```json
123
+ {
124
+ "type": "command_complete",
125
+ "exit_code": 0,
126
+ "message": "Process exited with code 0",
127
+ "timestamp": "2024-01-01T12:00:00.000Z"
128
+ }
129
+ ```
130
+
131
+ ## 🛠️ Development
132
+
133
+ ### Adding New Features
134
+
135
+ 1. **Server-side**: Modify `terminal_stream.py`
136
+ 2. **Client-side**: Update `static/terminal.html`
137
+ 3. **Integration**: Adjust `app.py` if needed
138
+
139
+ ### Testing
140
+
141
+ ```bash
142
+ # Test WebSocket server independently
143
+ python -c "from terminal_stream import run_websocket_server; run_websocket_server()"
144
+
145
+ # Test terminal interface
146
+ # Open static/terminal.html in browser
147
+ ```
148
+
149
+ ## 🔍 Troubleshooting
150
+
151
+ ### Common Issues
152
+
153
+ 1. **WebSocket Connection Failed**
154
+ - Check if port 8765 is available
155
+ - Verify firewall settings
156
+ - Check server logs for errors
157
+
158
+ 2. **Commands Not Executing**
159
+ - Verify WebSocket connection status
160
+ - Check terminal logs for errors
161
+ - Ensure proper command syntax
162
+
163
+ 3. **Terminal Not Loading**
164
+ - Check if `static/terminal.html` exists
165
+ - Verify Gradio file serving configuration
166
+ - Check browser console for errors
167
+
168
+ ### Debug Mode
169
+
170
+ Enable debug logging:
171
+ ```python
172
+ import logging
173
+ logging.getLogger('terminal_stream').setLevel(logging.DEBUG)
174
+ ```
175
+
176
+ ## 🚀 Advanced Usage
177
+
178
+ ### Custom Commands
179
+
180
+ Add custom command handlers in `terminal_stream.py`:
181
+
182
+ ```python
183
+ async def handle_custom_command(self, command):
184
+ if command.startswith('custom:'):
185
+ # Handle custom command
186
+ await self.broadcast({
187
+ 'type': 'output',
188
+ 'data': 'Custom command executed',
189
+ 'stream': 'stdout'
190
+ })
191
+ return True
192
+ return False
193
+ ```
194
+
195
+ ### Integration with Workflow
196
+
197
+ Stream workflow logs to terminal:
198
+
199
+ ```python
200
+ # In workflow code
201
+ from terminal_stream import terminal_manager
202
+
203
+ async def log_to_terminal(message):
204
+ await terminal_manager.broadcast({
205
+ 'type': 'output',
206
+ 'data': message,
207
+ 'stream': 'workflow'
208
+ })
209
+ ```
210
+
211
+ ## 📚 Dependencies
212
+
213
+ - `websockets`: WebSocket server implementation
214
+ - `asyncio`: Async programming support
215
+ - `subprocess`: Command execution
216
+ - `shlex`: Safe command parsing
217
+
218
+ ## 🎯 Future Enhancements
219
+
220
+ - [ ] Command auto-completion
221
+ - [ ] File upload/download via terminal
222
+ - [ ] Terminal themes and customization
223
+ - [ ] Multi-session support
224
+ - [ ] Terminal recording and playback
225
+ - [ ] Integration with workflow logging
226
+ - [ ] SSH/remote terminal support
227
+
228
+ ## 📄 License
229
+
230
+ This terminal implementation is part of the Data Extractor project and follows the same license terms.
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,2367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import asyncio
3
+ import json
4
+ import time
5
+ import os
6
+ # Silence Matplotlib cache warnings on read-only filesystems
7
+ os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl_cache")
8
+ import logging
9
+ from pathlib import Path
10
+ import uuid
11
+ from workflow.financial_workflow import FinancialDocumentWorkflow
12
+ from agno.storage.sqlite import SqliteStorage
13
+ from utils.file_handler import FileHandler
14
+ from config.settings import settings
15
+ import threading
16
+ from queue import Queue
17
+ import signal
18
+ import sys
19
+ import atexit
20
+ from datetime import datetime, timedelta
21
+ from terminal_stream import terminal_manager, run_websocket_server
22
+ from collections import deque
23
+
24
+ # Configure logging - Only INFO level and above, no httpcore/debug details
25
+ # Use /tmp for file logging on Hugging Face Spaces or disable file logging if not writable
26
+ import tempfile
27
+ import os
28
+
29
+ try:
30
+ # Try to create log file in /tmp directory (works on Hugging Face Spaces)
31
+ log_dir = "/tmp"
32
+ log_file = os.path.join(log_dir, "app.log")
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
36
+ handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
37
+ )
38
+ except (PermissionError, OSError):
39
+ # Fallback to console-only logging if file logging fails
40
+ logging.basicConfig(
41
+ level=logging.INFO,
42
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
43
+ handlers=[logging.StreamHandler()],
44
+ )
45
+
46
+ # Disable httpcore and other verbose loggers
47
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
48
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
49
+ logging.getLogger("requests").setLevel(logging.WARNING)
50
+ logging.getLogger("google").setLevel(logging.WARNING)
51
+ logging.getLogger("google.auth").setLevel(logging.WARNING)
52
+ logging.getLogger("google.api_core").setLevel(logging.WARNING)
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+ # Auto-shutdown configuration
57
+ INACTIVITY_TIMEOUT_MINUTES = 30 # Shutdown after 30 minutes of inactivity
58
+ CHECK_INTERVAL_SECONDS = 60 # Check every minute
59
+
60
+ class AutoShutdownManager:
61
+ """Manages automatic shutdown of the Gradio application."""
62
+
63
+ def __init__(self, timeout_minutes=INACTIVITY_TIMEOUT_MINUTES):
64
+ self.timeout_minutes = timeout_minutes
65
+ self.last_activity = datetime.now()
66
+ self.shutdown_timer = None
67
+ self.app_instance = None
68
+ self.is_shutting_down = False
69
+
70
+ # Setup signal handlers for graceful shutdown
71
+ signal.signal(signal.SIGINT, self._signal_handler)
72
+ signal.signal(signal.SIGTERM, self._signal_handler)
73
+
74
+ # Register cleanup function
75
+ atexit.register(self._cleanup)
76
+
77
+ logger.info(f"AutoShutdownManager initialized with {timeout_minutes} minute timeout")
78
+
79
+ def _signal_handler(self, signum, frame):
80
+ """Handle shutdown signals gracefully."""
81
+ logger.info(f"Received signal {signum}, initiating graceful shutdown...")
82
+ self._shutdown_server()
83
+ sys.exit(0)
84
+
85
+ def _cleanup(self):
86
+ """Cleanup function called on exit."""
87
+ if not self.is_shutting_down:
88
+ logger.info("Application cleanup initiated")
89
+ self._shutdown_server()
90
+
91
+ def update_activity(self):
92
+ """Update the last activity timestamp."""
93
+ self.last_activity = datetime.now()
94
+ logger.debug(f"Activity updated: {self.last_activity}")
95
+
96
+ def start_monitoring(self, app_instance):
97
+ """Start monitoring for inactivity."""
98
+ self.app_instance = app_instance
99
+ self._start_inactivity_timer()
100
+ logger.info("Inactivity monitoring started")
101
+
102
+ def _start_inactivity_timer(self):
103
+ """Start or restart the inactivity timer."""
104
+ if self.shutdown_timer:
105
+ self.shutdown_timer.cancel()
106
+
107
+ def check_inactivity():
108
+ if self.is_shutting_down:
109
+ return
110
+
111
+ time_since_activity = datetime.now() - self.last_activity
112
+ if time_since_activity > timedelta(minutes=self.timeout_minutes):
113
+ logger.info(f"No activity for {self.timeout_minutes} minutes, shutting down...")
114
+ self._shutdown_server()
115
+ else:
116
+ # Schedule next check
117
+ self._start_inactivity_timer()
118
+
119
+ self.shutdown_timer = threading.Timer(CHECK_INTERVAL_SECONDS, check_inactivity)
120
+ self.shutdown_timer.start()
121
+
122
+ def _shutdown_server(self):
123
+ """Shutdown the Gradio server gracefully."""
124
+ if self.is_shutting_down:
125
+ return
126
+
127
+ self.is_shutting_down = True
128
+ logger.info("Initiating server shutdown...")
129
+
130
+ try:
131
+ if self.shutdown_timer:
132
+ self.shutdown_timer.cancel()
133
+
134
+ if self.app_instance:
135
+ # Gradio doesn't have a direct shutdown method, so we'll use os._exit
136
+ logger.info("Shutting down Gradio application")
137
+ import os
138
+ os._exit(0)
139
+ except Exception as e:
140
+ logger.error(f"Error during shutdown: {e}")
141
+ import os
142
+ os._exit(1)
143
+
144
+ # Global shutdown manager instance
145
+ shutdown_manager = AutoShutdownManager()
146
+
147
+ # Terminal Log Handler
148
+ class TerminalLogHandler(logging.Handler):
149
+ """Custom logging handler that captures logs for terminal display."""
150
+
151
+ def __init__(self):
152
+ super().__init__()
153
+ self.logs = deque(maxlen=1000) # Keep last 1000 log entries
154
+ self.session_logs = {} # Per-session logs
155
+
156
+ def emit(self, record):
157
+ """Emit a log record."""
158
+ try:
159
+ # Skip some noisy logs but keep important ones
160
+ if record.levelname in ['DEBUG'] and record.name in ['httpcore', 'urllib3', 'requests']:
161
+ return
162
+
163
+ # Format the log message
164
+ message = record.getMessage()
165
+
166
+ # Skip empty or very short messages
167
+ if not message or len(message.strip()) < 3:
168
+ return
169
+
170
+ log_entry = {
171
+ 'timestamp': datetime.fromtimestamp(record.created).strftime('%H:%M:%S'),
172
+ 'level': record.levelname,
173
+ 'message': message,
174
+ 'logger': record.name,
175
+ 'module': getattr(record, 'module', ''),
176
+ 'funcName': getattr(record, 'funcName', '')
177
+ }
178
+
179
+ # Add to global logs
180
+ self.logs.append(log_entry)
181
+
182
+ # Add to session-specific logs if available
183
+ session_id = getattr(record, 'session_id', None)
184
+ if session_id:
185
+ if session_id not in self.session_logs:
186
+ self.session_logs[session_id] = deque(maxlen=500)
187
+ self.session_logs[session_id].append(log_entry)
188
+
189
+ except Exception as e:
190
+ # Prevent logging errors from breaking the application
191
+ print(f"TerminalLogHandler error: {e}")
192
+ pass
193
+
194
+ def get_logs(self, session_id=None, limit=50):
195
+ """Get recent logs, optionally filtered by session."""
196
+ if session_id and session_id in self.session_logs:
197
+ logs = list(self.session_logs[session_id])[-limit:]
198
+ else:
199
+ logs = list(self.logs)[-limit:]
200
+ return logs
201
+
202
+ def get_logs_as_html(self, session_id=None, limit=50):
203
+ """Get logs formatted as HTML for terminal display."""
204
+ logs = self.get_logs(session_id, limit)
205
+ html_lines = []
206
+
207
+ for log in logs:
208
+ level_class = {
209
+ 'DEBUG': 'system-line',
210
+ 'INFO': 'output-line',
211
+ 'WARNING': 'system-line',
212
+ 'ERROR': 'error-line',
213
+ 'CRITICAL': 'error-line'
214
+ }.get(log['level'], 'output-line')
215
+
216
+ html_lines.append(f'''
217
+ <div class="terminal-line {level_class}">
218
+ <span class="timestamp">{log['timestamp']}</span>
219
+ <span>[{log['level']}] {log['logger']}: {log['message']}</span>
220
+ </div>
221
+ ''')
222
+
223
+ return ''.join(html_lines)
224
+
225
+ # Global terminal log handler
226
+ terminal_log_handler = TerminalLogHandler()
227
+
228
+ # Configure the terminal handler log level
229
+ terminal_log_handler.setLevel(logging.DEBUG)
230
+
231
+ # Add the terminal handler to the root logger and specific loggers
232
+ root_logger = logging.getLogger()
233
+ root_logger.addHandler(terminal_log_handler)
234
+ root_logger.setLevel(logging.DEBUG) # Capture more logs
235
+
236
+ # Also add to specific workflow loggers
237
+ workflow_logger = logging.getLogger('workflow')
238
+ workflow_logger.addHandler(terminal_log_handler)
239
+ workflow_logger.setLevel(logging.DEBUG)
240
+
241
+ agno_logger = logging.getLogger('agno')
242
+ agno_logger.addHandler(terminal_log_handler)
243
+ agno_logger.setLevel(logging.DEBUG)
244
+
245
+ utils_logger = logging.getLogger('utils')
246
+ utils_logger.addHandler(terminal_log_handler)
247
+ utils_logger.setLevel(logging.DEBUG)
248
+
249
+ # Keep httpx at INFO level to avoid spam
250
+ httpx_logger = logging.getLogger('httpx')
251
+ httpx_logger.addHandler(terminal_log_handler)
252
+ httpx_logger.setLevel(logging.INFO)
253
+
254
+ google_logger = logging.getLogger('google')
255
+ google_logger.addHandler(terminal_log_handler)
256
+ google_logger.setLevel(logging.INFO)
257
+
258
+ # Prompt Gallery Loader
259
+ class PromptGallery:
260
+ """Manages loading and accessing prompt gallery from JSON configuration."""
261
+
262
+ def __init__(self):
263
+ self.prompts = {}
264
+ self.load_prompts()
265
+
266
+ def load_prompts(self):
267
+ """Load prompts from JSON configuration file."""
268
+ try:
269
+ prompt_file = Path(settings.TEMP_DIR).parent / "config" / "prompt_gallery.json"
270
+ if prompt_file.exists():
271
+ with open(prompt_file, 'r', encoding='utf-8') as f:
272
+ self.prompts = json.load(f)
273
+ logger.info(f"Loaded prompt gallery with {len(self.prompts.get('categories', {}))} categories")
274
+ else:
275
+ logger.warning(f"Prompt gallery file not found: {prompt_file}")
276
+ self.prompts = {"categories": {}}
277
+ except Exception as e:
278
+ logger.error(f"Error loading prompt gallery: {e}")
279
+ self.prompts = {"categories": {}}
280
+
281
+ def get_categories(self):
282
+ """Get all available prompt categories."""
283
+ return self.prompts.get('categories', {})
284
+
285
+ def get_prompts_for_category(self, category_id):
286
+ """Get all prompts for a specific category."""
287
+ return self.prompts.get('categories', {}).get(category_id, {}).get('prompts', [])
288
+
289
+ def get_prompt_by_id(self, category_id, prompt_id):
290
+ """Get a specific prompt by category and prompt ID."""
291
+ prompts = self.get_prompts_for_category(category_id)
292
+ for prompt in prompts:
293
+ if prompt.get('id') == prompt_id:
294
+ return prompt
295
+ return None
296
+
297
+ # Global prompt gallery instance
298
+ prompt_gallery = PromptGallery()
299
+
300
+ # Custom CSS for beautiful multi-agent streaming interface
301
+ custom_css = """
302
+ /* Main container styling */
303
+ .main-container {
304
+ max-width: 1400px;
305
+ margin: 0 auto;
306
+ }
307
+
308
+ /* Dynamic Single-Panel Workflow Layout */
309
+ .workflow-progress-nav {
310
+ display: flex;
311
+ justify-content: space-between;
312
+ align-items: center;
313
+ background: var(--background-fill-secondary);
314
+ border: 1px solid var(--border-color-primary);
315
+ border-radius: 12px;
316
+ padding: 16px;
317
+ margin: 16px 0;
318
+ gap: 8px;
319
+ }
320
+
321
+ .progress-nav-item {
322
+ display: flex;
323
+ flex-direction: column;
324
+ align-items: center;
325
+ padding: 12px 16px;
326
+ border-radius: 8px;
327
+ cursor: pointer;
328
+ transition: all 0.3s ease;
329
+ flex: 1;
330
+ text-align: center;
331
+ position: relative;
332
+ }
333
+
334
+ .progress-nav-item.pending {
335
+ background: rgba(107, 114, 128, 0.1);
336
+ color: var(--body-text-color-subdued);
337
+ }
338
+
339
+ .progress-nav-item.active {
340
+ background: rgba(59, 130, 246, 0.1);
341
+ color: #3b82f6;
342
+ border: 2px solid #3b82f6;
343
+ }
344
+
345
+ .progress-nav-item.current {
346
+ background: rgba(102, 126, 234, 0.2);
347
+ color: #667eea;
348
+ border: 2px solid #667eea;
349
+ transform: scale(1.05);
350
+ }
351
+
352
+ .progress-nav-item.completed {
353
+ background: rgba(16, 185, 129, 0.1);
354
+ color: #10b981;
355
+ border: 2px solid #10b981;
356
+ }
357
+
358
+ .progress-nav-item.clickable:hover {
359
+ transform: translateY(-2px);
360
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
361
+ }
362
+
363
+ .nav-icon {
364
+ font-size: 24px;
365
+ margin-bottom: 8px;
366
+ }
367
+
368
+ .nav-label {
369
+ font-size: 12px;
370
+ font-weight: 600;
371
+ margin-bottom: 4px;
372
+ }
373
+
374
+ .nav-status {
375
+ font-size: 10px;
376
+ opacity: 0.7;
377
+ }
378
+
379
+ .active-agent-panel {
380
+ background: var(--background-fill-secondary);
381
+ border: 2px solid var(--border-color-primary);
382
+ border-radius: 16px;
383
+ margin: 16px 0;
384
+ overflow: hidden;
385
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
386
+ transition: all 0.3s ease;
387
+ }
388
+
389
+ .agent-panel-header {
390
+ display: flex;
391
+ justify-content: space-between;
392
+ align-items: center;
393
+ padding: 20px 24px;
394
+ background: linear-gradient(135deg, var(--background-fill-primary) 0%, var(--background-fill-secondary) 100%);
395
+ border-bottom: 1px solid var(--border-color-primary);
396
+ }
397
+
398
+ .agent-info {
399
+ display: flex;
400
+ align-items: center;
401
+ gap: 16px;
402
+ }
403
+
404
+ .agent-icon-large {
405
+ font-size: 32px;
406
+ padding: 12px;
407
+ background: var(--background-fill-primary);
408
+ border-radius: 12px;
409
+ border: 2px solid var(--border-color-accent);
410
+ }
411
+
412
+ .agent-details h3.agent-title {
413
+ margin: 0 0 4px 0;
414
+ font-size: 20px;
415
+ font-weight: 700;
416
+ color: var(--body-text-color);
417
+ }
418
+
419
+ .agent-details p.agent-description {
420
+ margin: 0;
421
+ font-size: 14px;
422
+ color: var(--body-text-color-subdued);
423
+ }
424
+
425
+ .agent-status-badge {
426
+ padding: 8px 16px;
427
+ border-radius: 20px;
428
+ color: white;
429
+ font-weight: 600;
430
+ font-size: 12px;
431
+ text-transform: uppercase;
432
+ letter-spacing: 0.5px;
433
+ }
434
+
435
+ .agent-content-area {
436
+ padding: 24px;
437
+ min-height: 200px;
438
+ max-height: 400px;
439
+ overflow-y: auto;
440
+ }
441
+
442
+ .agent-content {
443
+ font-family: var(--font-mono);
444
+ font-size: 14px;
445
+ line-height: 1.6;
446
+ color: var(--body-text-color);
447
+ white-space: pre-wrap;
448
+ word-wrap: break-word;
449
+ }
450
+
451
+ .agent-content.streaming {
452
+ border-left: 3px solid #3b82f6;
453
+ padding-left: 12px;
454
+ background: rgba(59, 130, 246, 0.02);
455
+ }
456
+
457
+ .agent-waiting,
458
+ .agent-starting,
459
+ .agent-empty {
460
+ display: flex;
461
+ align-items: center;
462
+ justify-content: center;
463
+ height: 120px;
464
+ color: var(--body-text-color-subdued);
465
+ font-style: italic;
466
+ font-size: 16px;
467
+ }
468
+
469
+ .typing-cursor {
470
+ animation: blink 1s infinite;
471
+ color: #3b82f6;
472
+ font-weight: bold;
473
+ }
474
+
475
+ /* Legacy Multi-Agent Workflow Layout (kept for compatibility) */
476
+ .workflow-container {
477
+ display: grid;
478
+ grid-template-columns: 1fr;
479
+ gap: 12px;
480
+ margin: 16px 0;
481
+ }
482
+
483
+ .agent-panel {
484
+ background: var(--background-fill-secondary);
485
+ border: 2px solid var(--border-color-primary);
486
+ border-radius: 12px;
487
+ padding: 16px;
488
+ margin: 8px 0;
489
+ transition: all 0.3s ease;
490
+ position: relative;
491
+ overflow: hidden;
492
+ }
493
+
494
+ .agent-panel.active {
495
+ border-color: var(--color-accent);
496
+ box-shadow: 0 4px 20px rgba(102, 126, 234, 0.2);
497
+ transform: translateY(-2px);
498
+ }
499
+
500
+ .agent-panel.completed {
501
+ border-color: var(--color-success);
502
+ background: rgba(17, 153, 142, 0.05);
503
+ }
504
+
505
+ .agent-panel.streaming {
506
+ border-color: var(--color-accent);
507
+ background: rgba(102, 126, 234, 0.05);
508
+ }
509
+
510
+ .agent-header {
511
+ display: flex;
512
+ align-items: center;
513
+ justify-content: space-between;
514
+ margin-bottom: 12px;
515
+ padding-bottom: 8px;
516
+ border-bottom: 1px solid var(--border-color-primary);
517
+ }
518
+
519
+ .agent-info {
520
+ display: flex;
521
+ align-items: center;
522
+ gap: 12px;
523
+ }
524
+
525
+ .agent-icon {
526
+ font-size: 24px;
527
+ animation: pulse 2s infinite;
528
+ }
529
+
530
+ .agent-icon.active {
531
+ animation: bounce 1s infinite;
532
+ }
533
+
534
+ .agent-name {
535
+ font-size: 18px;
536
+ font-weight: 600;
537
+ color: var(--body-text-color);
538
+ }
539
+
540
+ .agent-description {
541
+ font-size: 14px;
542
+ color: var(--body-text-color-subdued);
543
+ margin-top: 4px;
544
+ }
545
+
546
+ .agent-status {
547
+ display: flex;
548
+ align-items: center;
549
+ gap: 8px;
550
+ font-size: 14px;
551
+ font-weight: 500;
552
+ }
553
+
554
+ .status-indicator {
555
+ width: 12px;
556
+ height: 12px;
557
+ border-radius: 50%;
558
+ animation: pulse 2s infinite;
559
+ }
560
+
561
+ .status-indicator.pending {
562
+ background: var(--color-neutral);
563
+ }
564
+
565
+ .status-indicator.starting {
566
+ background: var(--color-warning);
567
+ animation: flash 1s infinite;
568
+ }
569
+
570
+ .status-indicator.streaming {
571
+ background: var(--color-accent);
572
+ animation: pulse 1s infinite;
573
+ }
574
+
575
+ .status-indicator.completed {
576
+ background: var(--color-success);
577
+ animation: none;
578
+ }
579
+
580
+ .agent-thinking {
581
+ background: var(--background-fill-primary);
582
+ border: 1px solid var(--border-color-primary);
583
+ border-radius: 8px;
584
+ padding: 12px;
585
+ min-height: 120px;
586
+ max-height: 300px;
587
+ overflow-y: auto;
588
+ font-family: var(--font-mono);
589
+ font-size: 13px;
590
+ line-height: 1.5;
591
+ color: var(--body-text-color);
592
+ white-space: pre-wrap;
593
+ word-wrap: break-word;
594
+ }
595
+
596
+ .agent-thinking.streaming {
597
+ border-color: var(--color-accent);
598
+ background: rgba(102, 126, 234, 0.02);
599
+ }
600
+
601
+ .agent-thinking.empty {
602
+ display: flex;
603
+ align-items: center;
604
+ justify-content: center;
605
+ color: var(--body-text-color-subdued);
606
+ font-style: italic;
607
+ }
608
+
609
+ .thinking-cursor {
610
+ display: inline-block;
611
+ width: 2px;
612
+ height: 16px;
613
+ background: var(--color-accent);
614
+ margin-left: 2px;
615
+ animation: blink 1s infinite;
616
+ }
617
+
618
+ /* Workflow Progress Overview */
619
+ .workflow-progress {
620
+ display: flex;
621
+ justify-content: space-between;
622
+ align-items: center;
623
+ background: var(--background-fill-secondary);
624
+ border: 1px solid var(--border-color-primary);
625
+ border-radius: 8px;
626
+ padding: 16px;
627
+ margin: 16px 0;
628
+ }
629
+
630
+ .progress-step-mini {
631
+ display: flex;
632
+ flex-direction: column;
633
+ align-items: center;
634
+ gap: 8px;
635
+ flex: 1;
636
+ position: relative;
637
+ }
638
+
639
+ .progress-step-mini::after {
640
+ content: '';
641
+ position: absolute;
642
+ top: 12px;
643
+ right: -50%;
644
+ width: 100%;
645
+ height: 2px;
646
+ background: var(--border-color-primary);
647
+ z-index: 1;
648
+ }
649
+
650
+ .progress-step-mini:last-child::after {
651
+ display: none;
652
+ }
653
+
654
+ .mini-icon {
655
+ font-size: 20px;
656
+ padding: 8px;
657
+ border-radius: 50%;
658
+ background: var(--background-fill-primary);
659
+ border: 2px solid var(--border-color-primary);
660
+ z-index: 2;
661
+ position: relative;
662
+ }
663
+
664
+ .mini-icon.active {
665
+ border-color: var(--color-accent);
666
+ background: var(--color-accent);
667
+ color: white;
668
+ animation: pulse 1s infinite;
669
+ }
670
+
671
+ .mini-icon.completed {
672
+ border-color: var(--color-success);
673
+ background: var(--color-success);
674
+ color: white;
675
+ }
676
+
677
+ .mini-label {
678
+ font-size: 12px;
679
+ font-weight: 500;
680
+ color: var(--body-text-color);
681
+ text-align: center;
682
+ }
683
+
684
+ /* Animations */
685
+ @keyframes bounce {
686
+ 0%, 20%, 50%, 80%, 100% { transform: translateY(0); }
687
+ 40% { transform: translateY(-10px); }
688
+ 60% { transform: translateY(-5px); }
689
+ }
690
+
691
+ @keyframes flash {
692
+ 0%, 50%, 100% { opacity: 1; }
693
+ 25%, 75% { opacity: 0.5; }
694
+ }
695
+
696
+ @keyframes blink {
697
+ 0%, 50% { opacity: 1; }
698
+ 51%, 100% { opacity: 0; }
699
+ }
700
+
701
+ @keyframes typewriter {
702
+ from { width: 0; }
703
+ to { width: 100%; }
704
+ }
705
+
706
+ /* Single step container styling */
707
+ .single-step-container {
708
+ background: var(--background-fill-secondary);
709
+ border: 1px solid var(--border-color-primary);
710
+ border-radius: 8px;
711
+ padding: 16px;
712
+ margin: 8px 0;
713
+ font-family: var(--font-mono);
714
+ }
715
+
716
+ .steps-overview {
717
+ display: flex;
718
+ flex-wrap: wrap;
719
+ gap: 8px;
720
+ margin-bottom: 16px;
721
+ padding-bottom: 12px;
722
+ border-bottom: 1px solid var(--border-color-primary);
723
+ }
724
+
725
+ .step-overview-item {
726
+ padding: 4px 8px;
727
+ border-radius: 4px;
728
+ font-size: 12px;
729
+ font-weight: 500;
730
+ background: var(--background-fill-primary);
731
+ border: 1px solid var(--border-color-primary);
732
+ }
733
+
734
+ .step-overview-item.current-step {
735
+ background: var(--color-accent);
736
+ color: white;
737
+ border-color: var(--color-accent);
738
+ }
739
+
740
+ .step-overview-item.completed-step {
741
+ background: var(--color-success);
742
+ color: white;
743
+ border-color: var(--color-success);
744
+ cursor: pointer;
745
+ transition: all 0.2s ease;
746
+ }
747
+
748
+ .step-overview-item.completed-step:hover {
749
+ transform: translateY(-1px);
750
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
751
+ }
752
+
753
+ .step-overview-item.clickable {
754
+ cursor: pointer;
755
+ user-select: none;
756
+ }
757
+
758
+ .step-overview-item.other-step {
759
+ opacity: 0.7;
760
+ }
761
+
762
+ /* Content formatting styles */
763
+ .code-content, .json-content, .text-content {
764
+ background: var(--background-fill-primary);
765
+ border: 1px solid var(--border-color-primary);
766
+ border-radius: 4px;
767
+ margin: 8px 0;
768
+ }
769
+
770
+ .code-header, .content-header {
771
+ display: flex;
772
+ justify-content: space-between;
773
+ align-items: center;
774
+ background: var(--background-fill-secondary);
775
+ padding: 8px 12px;
776
+ border-bottom: 1px solid var(--border-color-primary);
777
+ font-size: 12px;
778
+ font-weight: 600;
779
+ }
780
+
781
+ .code-label, .content-label {
782
+ color: var(--body-text-color);
783
+ }
784
+
785
+ .code-language, .content-type {
786
+ background: var(--color-accent);
787
+ color: white;
788
+ padding: 2px 6px;
789
+ border-radius: 3px;
790
+ font-size: 10px;
791
+ }
792
+
793
+ .code-block, .json-block, .text-block {
794
+ margin: 0;
795
+ padding: 12px;
796
+ font-family: var(--font-mono);
797
+ font-size: 12px;
798
+ line-height: 1.4;
799
+ overflow-x: auto;
800
+ background: var(--background-fill-primary);
801
+ color: var(--body-text-color);
802
+ }
803
+
804
+ .empty-content {
805
+ padding: 20px;
806
+ text-align: center;
807
+ color: var(--body-text-color-subdued);
808
+ font-style: italic;
809
+ }
810
+
811
+ /* New step content wrapper styles */
812
+ .step-content-wrapper {
813
+ background: var(--background-fill-primary);
814
+ border: 1px solid var(--border-color-primary);
815
+ border-radius: 8px;
816
+ margin: 12px 0;
817
+ overflow: hidden;
818
+ }
819
+
820
+ .step-content-header {
821
+ background: var(--background-fill-secondary);
822
+ padding: 12px 16px;
823
+ border-bottom: 1px solid var(--border-color-primary);
824
+ display: flex;
825
+ align-items: center;
826
+ gap: 8px;
827
+ font-weight: 600;
828
+ font-size: 14px;
829
+ }
830
+
831
+ .step-icon {
832
+ font-size: 18px;
833
+ }
834
+
835
+ .step-label {
836
+ color: var(--body-text-color);
837
+ }
838
+
839
+ .step-content-body {
840
+ padding: 16px;
841
+ line-height: 1.6;
842
+ }
843
+
844
+ .markdown-content {
845
+ font-family: var(--font-sans);
846
+ color: var(--body-text-color);
847
+ }
848
+
849
+ .markdown-content h1, .markdown-content h2, .markdown-content h3,
850
+ .markdown-content h4, .markdown-content h5, .markdown-content h6 {
851
+ margin: 16px 0 8px 0;
852
+ font-weight: 600;
853
+ color: var(--body-text-color);
854
+ }
855
+
856
+ .markdown-content h1 { font-size: 24px; }
857
+ .markdown-content h2 { font-size: 20px; }
858
+ .markdown-content h3 { font-size: 18px; }
859
+ .markdown-content h4 { font-size: 16px; }
860
+ .markdown-content h5 { font-size: 14px; }
861
+ .markdown-content h6 { font-size: 12px; }
862
+
863
+ .markdown-content p {
864
+ margin: 8px 0;
865
+ color: var(--body-text-color);
866
+ }
867
+
868
+ .markdown-content li {
869
+ margin: 4px 0;
870
+ padding-left: 8px;
871
+ list-style-type: disc;
872
+ color: var(--body-text-color);
873
+ }
874
+
875
+ .markdown-content ul {
876
+ margin: 8px 0;
877
+ padding-left: 20px;
878
+ }
879
+
880
+ .markdown-content ol {
881
+ margin: 8px 0;
882
+ padding-left: 20px;
883
+ }
884
+
885
+ .markdown-content strong {
886
+ font-weight: 600;
887
+ color: var(--body-text-color);
888
+ }
889
+
890
+ .markdown-content em {
891
+ font-style: italic;
892
+ color: var(--body-text-color-subdued);
893
+ }
894
+
895
+ .markdown-content code {
896
+ background: var(--background-fill-secondary);
897
+ padding: 2px 4px;
898
+ border-radius: 3px;
899
+ font-family: var(--font-mono);
900
+ font-size: 13px;
901
+ color: var(--body-text-color);
902
+ }
903
+
904
+ .formatted-content {
905
+ font-family: var(--font-sans);
906
+ line-height: 1.6;
907
+ color: var(--body-text-color);
908
+ }
909
+
910
+ .error-content {
911
+ background: #fee;
912
+ border: 1px solid #fcc;
913
+ border-radius: 4px;
914
+ padding: 12px;
915
+ color: #c33;
916
+ font-family: var(--font-mono);
917
+ font-size: 12px;
918
+ }
919
+
920
+ /* Step type specific styling */
921
+ .code-step .step-content-header {
922
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
923
+ color: white;
924
+ }
925
+
926
+ .data-step .step-content-header {
927
+ background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
928
+ color: white;
929
+ }
930
+
931
+ .prompts-step .step-content-header {
932
+ background: linear-gradient(135deg, #ff6b6b 0%, #feca57 100%);
933
+ color: white;
934
+ }
935
+
936
+ .default-step .step-content-header {
937
+ background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
938
+ color: white;
939
+ }
940
+
941
+ .current-step-details {
942
+ background: var(--background-fill-primary);
943
+ border: 1px solid var(--border-color-primary);
944
+ border-radius: 4px;
945
+ padding: 12px;
946
+ }
947
+
948
+ .step-header {
949
+ display: flex;
950
+ justify-content: space-between;
951
+ align-items: center;
952
+ margin-bottom: 8px;
953
+ padding-bottom: 8px;
954
+ border-bottom: 1px solid var(--border-color-primary);
955
+ }
956
+
957
+ .step-title {
958
+ font-weight: 600;
959
+ font-size: 14px;
960
+ color: var(--body-text-color);
961
+ }
962
+
963
+ .step-progress {
964
+ font-size: 12px;
965
+ font-weight: 500;
966
+ color: var(--body-text-color-subdued);
967
+ }
968
+
969
+ .step-description {
970
+ font-size: 12px;
971
+ color: var(--body-text-color-subdued);
972
+ margin-bottom: 8px;
973
+ font-style: italic;
974
+ }
975
+
976
+ .step-content {
977
+ background: var(--background-fill-secondary);
978
+ border: 1px solid var(--border-color-primary);
979
+ border-radius: 4px;
980
+ padding: 12px;
981
+ margin-top: 8px;
982
+ max-height: 200px;
983
+ overflow-y: auto;
984
+ }
985
+
986
+ .step-content pre {
987
+ margin: 0;
988
+ font-family: var(--font-mono);
989
+ font-size: 12px;
990
+ line-height: 1.4;
991
+ color: var(--body-text-color);
992
+ white-space: pre-wrap;
993
+ word-wrap: break-word;
994
+ }
995
+
996
+ /* Progress bar styling */
997
+ .progress-container {
998
+ margin: 20px 0;
999
+ }
1000
+
1001
+ .progress-step {
1002
+ display: flex;
1003
+ align-items: center;
1004
+ margin: 10px 0;
1005
+ padding: 10px;
1006
+ border-radius: 10px;
1007
+ background: rgba(255, 255, 255, 0.05);
1008
+ transition: all 0.3s ease;
1009
+ }
1010
+
1011
+ .progress-step.active {
1012
+ background: rgba(102, 126, 234, 0.2);
1013
+ transform: scale(1.02);
1014
+ }
1015
+
1016
+ .progress-step.completed {
1017
+ background: rgba(17, 153, 142, 0.2);
1018
+ }
1019
+
1020
+ .step-icon {
1021
+ font-size: 24px;
1022
+ margin-right: 15px;
1023
+ animation: pulse 2s infinite;
1024
+ }
1025
+
1026
+ @keyframes pulse {
1027
+ 0% { transform: scale(1); }
1028
+ 50% { transform: scale(1.1); }
1029
+ 100% { transform: scale(1); }
1030
+ }
1031
+
1032
+ /* Fade in animation */
1033
+ .fade-in {
1034
+ animation: fadeIn 0.5s ease-in;
1035
+ }
1036
+
1037
+ @keyframes fadeIn {
1038
+ from { opacity: 0; transform: translateY(20px); }
1039
+ to { opacity: 1; transform: translateY(0); }
1040
+ }
1041
+
1042
+ /* Typing indicator */
1043
+ .typing-indicator {
1044
+ display: inline-block;
1045
+ width: 20px;
1046
+ height: 10px;
1047
+ }
1048
+
1049
+ .typing-indicator span {
1050
+ display: inline-block;
1051
+ width: 8px;
1052
+ height: 8px;
1053
+ border-radius: 50%;
1054
+ background: #667eea;
1055
+ margin: 0 2px;
1056
+ animation: typing 1.4s infinite ease-in-out;
1057
+ }
1058
+
1059
+ .typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
1060
+ .typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
1061
+
1062
+ @keyframes typing {
1063
+ 0%, 80%, 100% { transform: scale(0.8); opacity: 0.5; }
1064
+ 40% { transform: scale(1); opacity: 1; }
1065
+ }
1066
+
1067
+ /* Header styling */
1068
+ .header-title {
1069
+ font-size: 1.2rem;
1070
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1071
+ -webkit-background-clip: text;
1072
+ -webkit-text-fill-color: transparent;
1073
+ background-clip: text;
1074
+ margin: 0;
1075
+ text-align: left;
1076
+ padding: 0.5rem 0;
1077
+ }
1078
+
1079
+ /* Status indicators */
1080
+ .status-success {
1081
+ color: #38ef7d;
1082
+ font-weight: bold;
1083
+ }
1084
+
1085
+ .status-error {
1086
+ color: #ff6b6b;
1087
+ font-weight: bold;
1088
+ }
1089
+
1090
+ .status-processing {
1091
+ color: #667eea;
1092
+ font-weight: bold;
1093
+ }
1094
+
1095
+ /* Download button styling */
1096
+ .download-section {
1097
+ text-align: center;
1098
+ margin: 20px 0;
1099
+ }
1100
+
1101
+ .download-btn {
1102
+ background: linear-gradient(135deg, #38ef7d, #11998e);
1103
+ color: white;
1104
+ border: none;
1105
+ padding: 12px 24px;
1106
+ border-radius: 8px;
1107
+ font-size: 16px;
1108
+ font-weight: 600;
1109
+ cursor: pointer;
1110
+ transition: all 0.3s ease;
1111
+ box-shadow: 0 4px 12px rgba(56, 239, 125, 0.3);
1112
+ }
1113
+
1114
+ .download-btn:hover {
1115
+ transform: translateY(-2px);
1116
+ box-shadow: 0 6px 16px rgba(56, 239, 125, 0.4);
1117
+ }
1118
+
1119
+ .download-btn:active {
1120
+ transform: translateY(2px);
1121
+ box-shadow: 0 2px 6px rgba(56, 239, 125, 0.2);
1122
+ }
1123
+
1124
+ /* Terminal Component Styling */
1125
+ .terminal-container {
1126
+ display: flex;
1127
+ flex-direction: column;
1128
+ height: 750px;
1129
+ background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
1130
+ border: 1px solid #30363d;
1131
+ border-radius: 8px;
1132
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
1133
+ overflow: hidden;
1134
+ margin: 0;
1135
+ }
1136
+
1137
+ .terminal-header {
1138
+ display: flex;
1139
+ align-items: center;
1140
+ justify-content: space-between;
1141
+ padding: 12px 16px;
1142
+ background: #161b22;
1143
+ border-bottom: 1px solid #30363d;
1144
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
1145
+ }
1146
+
1147
+ .terminal-title {
1148
+ display: flex;
1149
+ align-items: center;
1150
+ gap: 8px;
1151
+ font-size: 14px;
1152
+ font-weight: 600;
1153
+ color: #f0f6fc;
1154
+ }
1155
+
1156
+ .terminal-icon {
1157
+ width: 16px;
1158
+ height: 16px;
1159
+ background: #238636;
1160
+ border-radius: 50%;
1161
+ position: relative;
1162
+ }
1163
+
1164
+ .terminal-icon::after {
1165
+ content: '>';
1166
+ position: absolute;
1167
+ top: 50%;
1168
+ left: 50%;
1169
+ transform: translate(-50%, -50%);
1170
+ font-size: 10px;
1171
+ color: white;
1172
+ font-weight: bold;
1173
+ }
1174
+
1175
+ .terminal-controls {
1176
+ display: flex;
1177
+ gap: 8px;
1178
+ }
1179
+
1180
+ .control-btn {
1181
+ width: 12px;
1182
+ height: 12px;
1183
+ border-radius: 50%;
1184
+ border: none;
1185
+ cursor: pointer;
1186
+ transition: opacity 0.2s;
1187
+ }
1188
+
1189
+ .control-btn:hover {
1190
+ opacity: 0.8;
1191
+ }
1192
+
1193
+ .close { background: #ff5f56; }
1194
+ .minimize { background: #ffbd2e; }
1195
+ .maximize { background: #27ca3f; }
1196
+
1197
+ .terminal-body {
1198
+ flex: 1;
1199
+ display: flex;
1200
+ flex-direction: column;
1201
+ overflow: hidden;
1202
+ }
1203
+
1204
+ .terminal-output {
1205
+ flex: 1;
1206
+ padding: 8px;
1207
+ overflow-y: auto;
1208
+ font-size: 10px;
1209
+ line-height: 1.2;
1210
+ background: #0d1117;
1211
+ color: #c9d1d9;
1212
+ scrollbar-width: thin;
1213
+ scrollbar-color: #30363d #0d1117;
1214
+ height: 100%;
1215
+ word-wrap: break-word;
1216
+ white-space: pre-wrap;
1217
+ }
1218
+
1219
+ .terminal-output::-webkit-scrollbar {
1220
+ width: 8px;
1221
+ }
1222
+
1223
+ .terminal-output::-webkit-scrollbar-track {
1224
+ background: #0d1117;
1225
+ }
1226
+
1227
+ .terminal-output::-webkit-scrollbar-thumb {
1228
+ background: #30363d;
1229
+ border-radius: 4px;
1230
+ }
1231
+
1232
+ .terminal-output::-webkit-scrollbar-thumb:hover {
1233
+ background: #484f58;
1234
+ }
1235
+
1236
+ .terminal-line {
1237
+ margin-bottom: 1px;
1238
+ white-space: pre-wrap;
1239
+ word-wrap: break-word;
1240
+ display: block;
1241
+ width: 100%;
1242
+ }
1243
+
1244
+ .command-line {
1245
+ color: #58a6ff;
1246
+ font-weight: 600;
1247
+ }
1248
+
1249
+ .output-line {
1250
+ color: #c9d1d9;
1251
+ }
1252
+
1253
+ .error-line {
1254
+ color: #f85149;
1255
+ }
1256
+
1257
+ .success-line {
1258
+ color: #56d364;
1259
+ }
1260
+
1261
+ .system-line {
1262
+ color: #ffa657;
1263
+ font-style: italic;
1264
+ }
1265
+
1266
+ .timestamp {
1267
+ color: #7d8590;
1268
+ font-size: 8px;
1269
+ margin-right: 4px;
1270
+ display: inline-block;
1271
+ min-width: 60px;
1272
+ }
1273
+
1274
+ .terminal-input {
1275
+ display: flex;
1276
+ align-items: center;
1277
+ padding: 12px 16px;
1278
+ background: #161b22;
1279
+ border-top: 1px solid #30363d;
1280
+ }
1281
+
1282
+ .prompt {
1283
+ color: #58a6ff;
1284
+ margin-right: 8px;
1285
+ font-weight: 600;
1286
+ }
1287
+
1288
+ .input-field {
1289
+ flex: 1;
1290
+ background: transparent;
1291
+ border: none;
1292
+ color: #c9d1d9;
1293
+ font-family: inherit;
1294
+ font-size: 11px;
1295
+ outline: none;
1296
+ }
1297
+
1298
+ .input-field::placeholder {
1299
+ color: #7d8590;
1300
+ }
1301
+
1302
+ .status-indicator {
1303
+ display: flex;
1304
+ align-items: center;
1305
+ gap: 8px;
1306
+ margin-left: 12px;
1307
+ }
1308
+
1309
+ .status-dot {
1310
+ width: 8px;
1311
+ height: 8px;
1312
+ border-radius: 50%;
1313
+ background: #7d8590;
1314
+ transition: background-color 0.3s;
1315
+ }
1316
+
1317
+ .status-dot.connected {
1318
+ background: #56d364;
1319
+ box-shadow: 0 0 8px rgba(86, 211, 100, 0.5);
1320
+ }
1321
+
1322
+ .status-dot.running {
1323
+ background: #ffa657;
1324
+ animation: pulse 1.5s infinite;
1325
+ }
1326
+
1327
+ .status-dot.error {
1328
+ background: #f85149;
1329
+ }
1330
+
1331
+ @keyframes terminal-pulse {
1332
+ 0%, 100% { opacity: 1; }
1333
+ 50% { opacity: 0.5; }
1334
+ }
1335
+
1336
+ /* Prompt Gallery Styling */
1337
+ .prompt-gallery {
1338
+ background: var(--background-fill-secondary);
1339
+ border: 1px solid var(--border-color-primary);
1340
+ border-radius: 8px;
1341
+ padding: 16px;
1342
+ margin: 8px 0;
1343
+ }
1344
+
1345
+ .prompt-card {
1346
+ background: var(--background-fill-primary);
1347
+ border: 1px solid var(--border-color-accent);
1348
+ border-radius: 6px;
1349
+ padding: 12px;
1350
+ margin: 8px 0;
1351
+ cursor: pointer;
1352
+ transition: all 0.3s ease;
1353
+ }
1354
+
1355
+ .prompt-card:hover {
1356
+ background: var(--background-fill-secondary);
1357
+ border-color: var(--color-accent);
1358
+ transform: translateY(-2px);
1359
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
1360
+ }
1361
+
1362
+ .prompt-card-header {
1363
+ display: flex;
1364
+ align-items: center;
1365
+ gap: 8px;
1366
+ margin-bottom: 8px;
1367
+ }
1368
+
1369
+ .prompt-card-title {
1370
+ font-weight: 600;
1371
+ color: var(--body-text-color);
1372
+ margin: 0;
1373
+ }
1374
+
1375
+ .prompt-card-description {
1376
+ color: var(--body-text-color-subdued);
1377
+ font-size: 0.9em;
1378
+ margin: 0;
1379
+ }
1380
+
1381
+ .prompt-preview {
1382
+ background: var(--background-fill-secondary);
1383
+ border: 1px solid var(--border-color-primary);
1384
+ border-radius: 4px;
1385
+ padding: 8px;
1386
+ margin-top: 8px;
1387
+ font-size: 0.85em;
1388
+ color: var(--body-text-color-subdued);
1389
+ max-height: 100px;
1390
+ overflow-y: auto;
1391
+ }
1392
+
1393
+ .gallery-category {
1394
+ margin-bottom: 16px;
1395
+ }
1396
+
1397
+ .category-header {
1398
+ display: flex;
1399
+ align-items: center;
1400
+ gap: 8px;
1401
+ margin-bottom: 12px;
1402
+ padding-bottom: 8px;
1403
+ border-bottom: 2px solid var(--border-color-accent);
1404
+ }
1405
+
1406
+ .category-title {
1407
+ font-size: 1.1em;
1408
+ font-weight: 600;
1409
+ color: var(--body-text-color);
1410
+ margin: 0;
1411
+ }
1412
+
1413
+ .use-prompt-btn {
1414
+ background: linear-gradient(135deg, #667eea, #764ba2);
1415
+ color: white;
1416
+ border: none;
1417
+ padding: 6px 12px;
1418
+ border-radius: 4px;
1419
+ font-size: 0.85em;
1420
+ cursor: pointer;
1421
+ transition: all 0.3s ease;
1422
+ margin-top: 8px;
1423
+ }
1424
+
1425
+ .use-prompt-btn:hover {
1426
+ background: linear-gradient(135deg, #764ba2, #667eea);
1427
+ transform: translateY(-1px);
1428
+ box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3);
1429
+ }
1430
+ """
1431
+
1432
+
1433
+ class WorkflowUI:
1434
+ def __init__(self):
1435
+ self.file_handler = FileHandler()
1436
+ self.session_id = str(uuid.uuid4())[:8] # Generate our own session ID
1437
+
1438
+ # Create workflow with database storage for caching
1439
+ self.workflow = FinancialDocumentWorkflow(
1440
+ session_id=self.session_id,
1441
+ storage=SqliteStorage(
1442
+ table_name="financial_workflows",
1443
+ db_file=str(Path(settings.TEMP_DIR) / "workflows.db")
1444
+ )
1445
+ )
1446
+
1447
+ self.processing_started = False
1448
+ self.selected_prompt = None
1449
+
1450
+ # Simple step configuration for UI display
1451
+ self.steps_config = {
1452
+ "extraction": {
1453
+ "name": "Financial Data Extraction",
1454
+ "description": "Extracting financial data points from document",
1455
+ "icon": "🔍"
1456
+ },
1457
+ "arrangement": {
1458
+ "name": "Data Analysis & Organization",
1459
+ "description": "Organizing and analyzing extracted financial data",
1460
+ "icon": "📊"
1461
+ },
1462
+ "code_generation": {
1463
+ "name": "Excel Code Generation",
1464
+ "description": "Generating Python code for Excel reports",
1465
+ "icon": "💻"
1466
+ },
1467
+ "execution": {
1468
+ "name": "Excel Report Creation",
1469
+ "description": "Executing code to create Excel workbook",
1470
+ "icon": "📊"
1471
+ }
1472
+ }
1473
+
1474
+ def validate_file(self, file_path):
1475
+ """Validate uploaded file."""
1476
+ logger.info(f"Validating file: {file_path}")
1477
+
1478
+ if not file_path:
1479
+ logger.warning("No file uploaded")
1480
+ return {"valid": False, "error": "No file uploaded"}
1481
+
1482
+ path = Path(file_path)
1483
+ if not path.exists():
1484
+ logger.error(f"File does not exist: {file_path}")
1485
+ return {"valid": False, "error": "File does not exist"}
1486
+
1487
+ file_extension = path.suffix.lower().lstrip(".")
1488
+
1489
+ if file_extension not in settings.SUPPORTED_FILE_TYPES:
1490
+ logger.error(f"Unsupported file type: {file_extension}")
1491
+ return {
1492
+ "valid": False,
1493
+ "error": f"Unsupported file type. Supported: {', '.join(settings.SUPPORTED_FILE_TYPES)}",
1494
+ }
1495
+
1496
+ file_size_mb = path.stat().st_size / (1024 * 1024)
1497
+ if file_size_mb > 50: # 50MB limit
1498
+ logger.error(f"File too large: {file_size_mb}MB")
1499
+ return {"valid": False, "error": "File too large (max 50MB)"}
1500
+
1501
+ logger.info(
1502
+ f"File validation successful: {path.name} ({file_extension}, {file_size_mb}MB)"
1503
+ )
1504
+ return {
1505
+ "valid": True,
1506
+ "file_info": {
1507
+ "name": path.name,
1508
+ "type": file_extension,
1509
+ "size_mb": round(file_size_mb, 2),
1510
+ },
1511
+ }
1512
+
1513
+ file_size_mb = path.stat().st_size / (1024 * 1024)
1514
+ if file_size_mb > 50: # 50MB limit
1515
+ return {"valid": False, "error": "File too large (max 50MB)"}
1516
+
1517
+ return {
1518
+ "valid": True,
1519
+ "file_info": {
1520
+ "name": path.name,
1521
+ "type": file_extension,
1522
+ "size_mb": round(file_size_mb, 2),
1523
+ },
1524
+ }
1525
+
1526
+ def get_file_preview(self, file_path):
1527
+ """Get file preview."""
1528
+ try:
1529
+ path = Path(file_path)
1530
+ if path.suffix.lower() in [".txt", ".md", ".py", ".json"]:
1531
+ with open(path, "r", encoding="utf-8") as f:
1532
+ content = f.read()
1533
+ return content[:1000] + "..." if len(content) > 1000 else content
1534
+ else:
1535
+ return f"Binary file: {path.name} ({path.suffix})"
1536
+ except Exception as e:
1537
+ return f"Error reading file: {str(e)}"
1538
+
1539
+ def get_prompt_text(self, category_id, prompt_id):
1540
+ """Get the full text of a specific prompt."""
1541
+ prompt = prompt_gallery.get_prompt_by_id(category_id, prompt_id)
1542
+ return prompt.get('prompt', '') if prompt else ''
1543
+
1544
+ def download_processed_files(self):
1545
+ """Create a zip file of all processed files and return for download."""
1546
+ # Update activity for auto-shutdown monitoring
1547
+ shutdown_manager.update_activity()
1548
+
1549
+ try:
1550
+ import zipfile
1551
+ import tempfile
1552
+ import os
1553
+ import shutil
1554
+ from datetime import datetime
1555
+
1556
+ # Get session output directory - now using workflow's output directory
1557
+ session_output_dir = self.workflow.session_output_dir
1558
+
1559
+ if not session_output_dir.exists():
1560
+ logger.warning(f"Output directory does not exist: {session_output_dir}")
1561
+ return None
1562
+
1563
+ # Create a properly named zip file in a temporary location that Gradio can access
1564
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1565
+ zip_filename = f"processed_files_{self.session_id}_{timestamp}.zip"
1566
+
1567
+ # Use Python's tempfile to create a file in the system temp directory
1568
+ # This ensures Gradio can access it properly
1569
+ temp_dir = tempfile.gettempdir()
1570
+ zip_path = Path(temp_dir) / zip_filename
1571
+
1572
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
1573
+ # Add all files from output directory
1574
+ file_count = 0
1575
+ for file_path in session_output_dir.rglob('*'):
1576
+ if file_path.is_file():
1577
+ # Calculate relative path for zip
1578
+ arcname = file_path.relative_to(session_output_dir)
1579
+ zipf.write(file_path, arcname)
1580
+ file_count += 1
1581
+ logger.debug(f"Added to zip: {arcname}")
1582
+
1583
+ if file_count == 0:
1584
+ logger.warning("No files found to download")
1585
+ # Debug: List all files in session directory
1586
+ session_dir = Path(settings.TEMP_DIR) / self.session_id
1587
+ if session_dir.exists():
1588
+ logger.info(f"Session directory exists: {session_dir}")
1589
+ for subdir in ['input', 'output', 'temp']:
1590
+ subdir_path = session_dir / subdir
1591
+ if subdir_path.exists():
1592
+ files = list(subdir_path.glob('*'))
1593
+ logger.info(f"{subdir} directory has {len(files)} files: {[f.name for f in files]}")
1594
+ else:
1595
+ logger.info(f"{subdir} directory does not exist")
1596
+ else:
1597
+ logger.warning(f"Session directory does not exist: {session_dir}")
1598
+ # Clean up empty zip file
1599
+ if zip_path.exists():
1600
+ zip_path.unlink()
1601
+ return None
1602
+
1603
+ logger.info(f"Created zip file with {file_count} files: {zip_path}")
1604
+
1605
+ # Ensure the file exists and has content
1606
+ if zip_path.exists() and zip_path.stat().st_size > 0:
1607
+ # For Gradio file downloads, we need to return the file path in a specific way
1608
+ abs_path = str(zip_path.absolute())
1609
+ logger.info(f"Returning zip file path for download: {abs_path}")
1610
+ logger.info(f"File size: {zip_path.stat().st_size} bytes")
1611
+
1612
+ # Try to make the file accessible by setting proper permissions
1613
+ os.chmod(abs_path, 0o644)
1614
+
1615
+ # Return the file path for Gradio to handle
1616
+ # Make sure to return the path in a way Gradio can process
1617
+ return abs_path
1618
+ else:
1619
+ logger.error("Zip file was created but is empty or doesn't exist")
1620
+ return None
1621
+
1622
+ except Exception as e:
1623
+ logger.error(f"Error creating download: {str(e)}")
1624
+ import traceback
1625
+ logger.error(f"Traceback: {traceback.format_exc()}")
1626
+ return None
1627
+
1628
+
1629
+ def create_gradio_app():
1630
+ """Create the main Gradio application."""
1631
+
1632
+ # Start WebSocket server for terminal streaming
1633
+ try:
1634
+ run_websocket_server()
1635
+ logger.info("Terminal WebSocket server started on port 8765")
1636
+ except Exception as e:
1637
+ logger.error(f"Failed to start terminal WebSocket server: {e}")
1638
+
1639
+ def initialize_session():
1640
+ """Initialize a new session with fresh WorkflowUI instance."""
1641
+ return WorkflowUI()
1642
+
1643
+ def process_file(file, verbose_print, session_state, progress=gr.Progress()):
1644
+ """Process uploaded file with step-by-step execution and progress updates."""
1645
+ # Get or create session-specific UI instance
1646
+ if session_state is None:
1647
+ session_state = WorkflowUI()
1648
+
1649
+ ui = session_state
1650
+ logger.info(f"🚀 PROCESSING STARTED - File: {file.name if file else 'None'}, Verbose: {verbose_print}")
1651
+ logger.info(f"📋 Session ID: {ui.session_id}")
1652
+
1653
+ # Update activity for auto-shutdown monitoring
1654
+ shutdown_manager.update_activity()
1655
+
1656
+ if not file:
1657
+ logger.warning("Missing file")
1658
+ return "", "", "", None, session_state
1659
+
1660
+ # Validate file (file.name contains Gradio's temp path)
1661
+ logger.info(f"🔍 VALIDATING FILE: {file.name}")
1662
+ validation = ui.validate_file(file.name)
1663
+ logger.info(f"✅ File validation result: {validation}")
1664
+
1665
+ if not validation["valid"]:
1666
+ logger.error(f"❌ FILE VALIDATION FAILED: {validation['error']}")
1667
+ return "", "", "", None, session_state
1668
+
1669
+ # Save file to our session directory
1670
+ logger.info("💾 Saving uploaded file to session directory...")
1671
+ temp_path = ui.file_handler.save_uploaded_file(file, ui.session_id)
1672
+ logger.info(f"✅ File saved to: {temp_path}")
1673
+ logger.info(f"📊 File size: {validation.get('file_info', {}).get('size_mb', 'Unknown')} MB")
1674
+
1675
+ def create_step_html(current_step):
1676
+ """Create HTML for step progress display"""
1677
+ steps = [
1678
+ {"key": "extraction", "name": "Data Extraction", "icon": "🔍"},
1679
+ {"key": "arrangement", "name": "Organization", "icon": "📊"},
1680
+ {"key": "code_generation", "name": "Code Generation", "icon": "💻"},
1681
+ {"key": "execution", "name": "Excel Creation", "icon": "����"}
1682
+ ]
1683
+
1684
+ step_html = '<div style="display: flex; gap: 10px; margin-top: 15px;">'
1685
+
1686
+ for step in steps:
1687
+ if step["key"] == current_step:
1688
+ # Current step - blue with animation
1689
+ step_html += f'''
1690
+ <div style="padding: 10px; border-radius: 6px; background: rgba(59, 130, 246, 0.2); border: 2px solid #3b82f6; position: relative; overflow: hidden;">
1691
+ <div style="position: absolute; top: 0; left: -100%; width: 100%; height: 100%; background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent); animation: shimmer 2s infinite;"></div>
1692
+ {step["icon"]} {step["name"]} ⚡
1693
+ </div>
1694
+ '''
1695
+ elif any(s["key"] == step["key"] and steps.index(s) < steps.index(next(s for s in steps if s["key"] == current_step)) for s in steps):
1696
+ # Completed step - green
1697
+ step_html += f'''
1698
+ <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
1699
+ ✅ {step["name"]}
1700
+ </div>
1701
+ '''
1702
+ else:
1703
+ # Pending step - gray
1704
+ step_html += f'''
1705
+ <div style="padding: 10px; border-radius: 6px; background: rgba(107, 114, 128, 0.1); border: 1px solid #6b7280;">
1706
+ {step["icon"]} {step["name"]}
1707
+ </div>
1708
+ '''
1709
+
1710
+ step_html += '</div>'
1711
+
1712
+ return f'''
1713
+ <div style="padding: 20px; background: var(--background-fill-secondary); border-radius: 8px;">
1714
+ <h3>📊 Financial Document Analysis Workflow</h3>
1715
+ {step_html}
1716
+ <p style="margin-top: 15px; color: var(--body-text-color-subdued);">
1717
+ Current step: <strong>{next(s["name"] for s in steps if s["key"] == current_step)}</strong>
1718
+ </p>
1719
+ <style>
1720
+ @keyframes shimmer {{
1721
+ 0% {{ transform: translateX(-100%); }}
1722
+ 100% {{ transform: translateX(200%); }}
1723
+ }}
1724
+ </style>
1725
+ </div>
1726
+ '''
1727
+
1728
+ try:
1729
+ import time
1730
+ from pathlib import Path
1731
+ from agno.media import File
1732
+
1733
+ # Step 0: Initialize
1734
+ progress_html = "🚀 <strong>Initializing financial document processing...</strong>"
1735
+ logger.info(f"🎯 WORKFLOW INITIALIZATION - Session: {ui.session_id}")
1736
+ logger.info(f"📝 Document: {temp_path}")
1737
+ logger.info("⚡ Starting multi-step financial analysis workflow...")
1738
+ yield (progress_html, create_step_html("extraction"), "", gr.Column(visible=False))
1739
+
1740
+ time.sleep(1) # Brief pause for UI update
1741
+
1742
+ # Step 1: Data Extraction
1743
+ logger.info("=" * 60)
1744
+ logger.info("🔍 STEP 1/4: DATA EXTRACTION PHASE")
1745
+ logger.info("=" * 60)
1746
+ logger.info("📋 Initializing financial data extraction agent...")
1747
+ progress_html = "🔍 <strong>Step 1/4: Extracting financial data from document...</strong>"
1748
+ yield (progress_html, create_step_html("extraction"), "", gr.Column(visible=False))
1749
+
1750
+ # Check for cached extraction
1751
+ if "extracted_data" in ui.workflow.session_state:
1752
+ logger.info("💾 Using cached extraction data from previous run")
1753
+ logger.info("⏩ Skipping extraction step - data already available")
1754
+ time.sleep(0.5) # Brief pause to show step
1755
+ else:
1756
+ logger.info(f"🔄 Starting fresh data extraction from document: {temp_path}")
1757
+ logger.info("📄 Creating document object for analysis...")
1758
+ # Perform data extraction
1759
+ document = File(filepath=temp_path)
1760
+ logger.info("✅ Document object created successfully")
1761
+
1762
+ extraction_prompt = f"""
1763
+ Analyze this financial document and extract all relevant financial data points.
1764
+
1765
+ Focus on:
1766
+ - Company identification and reporting period
1767
+ - Revenue, expenses, profits, and losses
1768
+ - Assets, liabilities, and equity
1769
+ - Cash flows and financial ratios
1770
+ - Any other key financial metrics
1771
+
1772
+ Document path: {temp_path}
1773
+ """
1774
+
1775
+ logger.info("🤖 Calling data extractor agent with financial analysis prompt")
1776
+ logger.info("⏳ This may take 30-60 seconds depending on document complexity...")
1777
+
1778
+ extraction_response = ui.workflow.data_extractor.run(
1779
+ extraction_prompt,
1780
+ files=[document]
1781
+ )
1782
+ extracted_data = extraction_response.content
1783
+
1784
+ logger.info("🎉 Data extraction agent completed successfully!")
1785
+ logger.info(f"📊 Extracted {len(extracted_data.data_points)} financial data points")
1786
+
1787
+ # Cache the result
1788
+ ui.workflow.session_state["extracted_data"] = extracted_data.model_dump()
1789
+ logger.info(f"💾 Cached extraction results for session {ui.session_id}")
1790
+ logger.info("✅ Step 1 COMPLETED - Data extraction successful")
1791
+
1792
+ # Step 2: Data Arrangement
1793
+ logger.info("=" * 60)
1794
+ logger.info("📊 STEP 2/4: DATA ORGANIZATION PHASE")
1795
+ logger.info("=" * 60)
1796
+ progress_html = "📊 <strong>Step 2/4: Organizing and analyzing financial data...</strong>"
1797
+ yield (progress_html, create_step_html("arrangement"), "", gr.Column(visible=False))
1798
+
1799
+ if "arrangement_response" in ui.workflow.session_state:
1800
+ logger.info("💾 Using cached data arrangement from previous run")
1801
+ logger.info("⏩ Skipping organization step - data already structured")
1802
+ time.sleep(0.5) # Brief pause to show step
1803
+ else:
1804
+ logger.info("🔄 Starting fresh data organization and analysis")
1805
+ # Get extracted data for arrangement
1806
+ extracted_data_dict = ui.workflow.session_state["extracted_data"]
1807
+ logger.info(f"📋 Retrieved {len(extracted_data_dict.get('data_points', []))} data points for organization")
1808
+ logger.info("🏗️ Preparing to organize data into 12 financial categories...")
1809
+
1810
+ arrangement_prompt = f"""
1811
+ You are given raw, extracted financial data. Your task is to reorganize it and prepare it for Excel-based reporting.
1812
+
1813
+ ========== WHAT TO DELIVER ==========
1814
+ • A single JSON object saved as arranged_financial_data.json
1815
+ • Fields required: categories, key_metrics, insights, summary
1816
+
1817
+ ========== HOW TO ORGANIZE ==========
1818
+ Create 12 distinct, Excel-ready categories (one worksheet each):
1819
+ 1. Executive Summary & Key Metrics
1820
+ 2. Income Statement / P&L
1821
+ 3. Balance Sheet – Assets
1822
+ 4. Balance Sheet – Liabilities & Equity
1823
+ 5. Cash-Flow Statement
1824
+ 6. Financial Ratios & Analysis
1825
+ 7. Revenue Analysis
1826
+ 8. Expense Analysis
1827
+ 9. Profitability Analysis
1828
+ 10. Liquidity & Solvency
1829
+ 11. Operational Metrics
1830
+ 12. Risk Assessment & Notes
1831
+
1832
+ ========== STEP-BY-STEP ==========
1833
+ 1. Map every data point into the most appropriate category above.
1834
+ 2. Calculate or aggregate key financial metrics where possible.
1835
+ 3. Add concise insights for trends, anomalies, or red flags.
1836
+ 4. Write an executive summary that highlights the most important findings.
1837
+ 5. Assemble everything into the JSON schema described under "WHAT TO DELIVER."
1838
+ 6. Save the JSON as arranged_financial_data.json via save_file.
1839
+ 7. Use list_files to confirm the file exists, then read_file to validate its content.
1840
+ 8. If the file is missing or malformed, fix the issue and repeat steps 6 – 7.
1841
+ 9. Only report success after the file passes both existence and content checks.
1842
+ 10. Conclude with a short, plain-language summary of what was organized.
1843
+
1844
+ Extracted Data: {json.dumps(extracted_data_dict, indent=2)}
1845
+ """
1846
+
1847
+ logger.info("Calling data arranger to organize financial data into 12 categories")
1848
+ arrangement_response = ui.workflow.data_arranger.run(arrangement_prompt)
1849
+ arrangement_content = arrangement_response.content
1850
+
1851
+ # Cache the result
1852
+ ui.workflow.session_state["arrangement_response"] = arrangement_content
1853
+ logger.info("Data organization completed successfully - financial data categorized")
1854
+ logger.info(f"Cached arrangement results for session {ui.session_id}")
1855
+
1856
+ # Step 3: Code Generation
1857
+ logger.info("Step 3: Starting code generation...")
1858
+ progress_html = "💻 <strong>Step 3/4: Generating Python code for Excel reports...</strong>"
1859
+ yield (progress_html, create_step_html("code_generation"), "", gr.Column(visible=False))
1860
+
1861
+ if "code_generation_response" in ui.workflow.session_state:
1862
+ logger.info("Using cached code generation results from previous run")
1863
+ code_generation_content = ui.workflow.session_state["code_generation_response"]
1864
+ execution_success = ui.workflow.session_state.get("execution_success", False)
1865
+ logger.info(f"Previous execution status: {'Success' if execution_success else 'Failed'}")
1866
+ time.sleep(0.5) # Brief pause to show step
1867
+ else:
1868
+ logger.info("Starting fresh Python code generation for Excel report creation")
1869
+ code_prompt = f"""
1870
+ Your objective: Turn the organized JSON data into a polished, multi-sheet Excel report—and prove that it works.
1871
+
1872
+ ========== INPUT ==========
1873
+ File: arranged_financial_data.json
1874
+ Tool to read it: read_file
1875
+
1876
+ ========== WHAT THE PYTHON SCRIPT MUST DO ==========
1877
+ 1. Load arranged_financial_data.json and parse its contents.
1878
+ 2. For each category in the JSON, create a dedicated worksheet using openpyxl.
1879
+ 3. Apply professional touches:
1880
+ • Bold, centered headers
1881
+ • Appropriate number formats
1882
+ • Column-width auto-sizing
1883
+ • Borders, cell styles, and freeze panes
1884
+ 4. Insert charts (bar, line, or pie) wherever the data lends itself to visualisation.
1885
+ 5. Embed key metrics and summary notes prominently in the Executive Summary sheet.
1886
+ 6. Name the workbook: Financial_Report_<YYYYMMDD_HHMMSS>.xlsx.
1887
+ 7. Wrap every file and workbook operation in robust try/except blocks.
1888
+ 8. Log all major steps and any exceptions for easy debugging.
1889
+ 9. Save the script via save_to_file_and_run and execute it immediately.
1890
+ 10. After execution, use list_files to ensure the Excel file was created.
1891
+ 11. Optionally inspect the file (e.g., size or first bytes via read_file) to confirm it is not empty.
1892
+ 12. If the workbook is missing or corrupted, refine the code, re-save, and re-run until success.
1893
+
1894
+ ========== OUTPUT ==========
1895
+ • A fully formatted Excel workbook in the working directory.
1896
+ • A concise summary of what ran, any issues encountered, and confirmation that the file exists and opens without error.
1897
+ """
1898
+
1899
+ logger.info("Calling code generator to create Python Excel generation script")
1900
+ code_response = ui.workflow.code_generator.run(code_prompt)
1901
+ code_generation_content = code_response.content
1902
+
1903
+ # Simple check for execution success based on response content
1904
+ execution_success = (
1905
+ "error" not in code_generation_content.lower() or
1906
+ "success" in code_generation_content.lower() or
1907
+ "completed" in code_generation_content.lower()
1908
+ )
1909
+
1910
+ # Cache the results
1911
+ ui.workflow.session_state["code_generation_response"] = code_generation_content
1912
+ ui.workflow.session_state["execution_success"] = execution_success
1913
+
1914
+ logger.info(f"Code generation and execution completed: {'✅ Success' if execution_success else '❌ Failed'}")
1915
+ logger.info(f"Cached code generation results for session {ui.session_id}")
1916
+
1917
+ # Step 4: Final Results
1918
+ logger.info("Step 4: Preparing final results...")
1919
+ progress_html = "📊 <strong>Step 4/4: Creating final Excel report...</strong>"
1920
+ yield (progress_html, create_step_html("execution"), "", gr.Column(visible=False))
1921
+
1922
+ time.sleep(1) # Brief pause to show step
1923
+
1924
+ # Prepare final results
1925
+ logger.info("Scanning output directory for generated files")
1926
+ output_files = []
1927
+ if ui.workflow.session_output_dir.exists():
1928
+ output_files = [f.name for f in ui.workflow.session_output_dir.iterdir() if f.is_file()]
1929
+ logger.info(f"Found {len(output_files)} generated files: {', '.join(output_files)}")
1930
+ else:
1931
+ logger.warning(f"Output directory does not exist: {ui.workflow.session_output_dir}")
1932
+
1933
+ # Get cached data
1934
+ extracted_data_dict = ui.workflow.session_state["extracted_data"]
1935
+ arrangement_content = ui.workflow.session_state["arrangement_response"]
1936
+ code_generation_content = ui.workflow.session_state["code_generation_response"]
1937
+ execution_success = ui.workflow.session_state.get("execution_success", False)
1938
+
1939
+ results_summary = f"""
1940
+ # Financial Document Analysis Complete
1941
+
1942
+ ## Document Information
1943
+ - **Company**: {extracted_data_dict.get('company_name', 'Not specified') if extracted_data_dict else 'Not specified'}
1944
+ - **Document Type**: {extracted_data_dict.get('document_type', 'Unknown') if extracted_data_dict else 'Unknown'}
1945
+ - **Reporting Period**: {extracted_data_dict.get('reporting_period', 'Not specified') if extracted_data_dict else 'Not specified'}
1946
+
1947
+ ## Processing Summary
1948
+ - **Data Points Extracted**: {len(extracted_data_dict.get('data_points', [])) if extracted_data_dict else 0}
1949
+ - **Data Organization**: {'✅ Completed' if arrangement_content else '❌ Failed'}
1950
+ - **Excel Creation**: {'✅ Success' if execution_success else '❌ Failed'}
1951
+
1952
+ ## Data Organization Results
1953
+ {arrangement_content[:500] + '...' if arrangement_content and len(arrangement_content) > 500 else arrangement_content or 'No arrangement data available'}
1954
+
1955
+ ## Tool Execution Summary
1956
+ **Data Arranger**: Used FileTools to save organized data to JSON
1957
+ **Code Generator**: Used PythonTools and FileTools for Excel generation
1958
+
1959
+ ## Code Generation Results
1960
+ {code_generation_content[:500] + '...' if code_generation_content and len(code_generation_content) > 500 else code_generation_content or 'No code generation results available'}
1961
+
1962
+ ## Generated Files ({len(output_files)} files)
1963
+ {chr(10).join(f"- **{file}**" for file in output_files) if output_files else "- No files generated"}
1964
+
1965
+ ## Output Directory
1966
+ 📁 `{ui.workflow.session_output_dir}`
1967
+
1968
+ ---
1969
+ *Generated using Agno Workflows with step-by-step execution*
1970
+ *Note: Each step was executed individually with progress updates*
1971
+ """
1972
+
1973
+ # Cache final results
1974
+ ui.workflow.session_state["final_results"] = results_summary
1975
+ logger.info("Final results compiled and cached successfully")
1976
+ logger.info(f"Processing workflow completed for session {ui.session_id}")
1977
+
1978
+ # Create completion HTML
1979
+ final_progress_html = "✅ <strong>All steps completed successfully!</strong>"
1980
+ final_steps_html = '''
1981
+ <div style="padding: 20px; background: var(--background-fill-secondary); border-radius: 8px;">
1982
+ <h3>✅ Workflow Completed Successfully</h3>
1983
+ <div style="display: flex; gap: 10px; margin-top: 15px;">
1984
+ <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
1985
+ ✅ Data Extraction
1986
+ </div>
1987
+ <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
1988
+ ✅ Organization
1989
+ </div>
1990
+ <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
1991
+ ✅ Code Generation
1992
+ </div>
1993
+ <div style="padding: 10px; border-radius: 6px; background: rgba(16, 185, 129, 0.1); border: 1px solid #10b981;">
1994
+ ✅ Excel Creation
1995
+ </div>
1996
+ </div>
1997
+ <div style="margin-top: 15px; padding: 10px; background: rgba(16, 185, 129, 0.05); border-radius: 4px;">
1998
+ <strong>All steps executed successfully!</strong>
1999
+ <ul style="margin: 5px 0;">
2000
+ <li><strong>Data Extraction:</strong> Completed</li>
2001
+ <li><strong>Organization:</strong> Completed</li>
2002
+ <li><strong>Code Generation:</strong> Completed</li>
2003
+ <li><strong>Excel Creation:</strong> ''' + ('Completed' if execution_success else 'Partial') + '''</li>
2004
+ </ul>
2005
+ </div>
2006
+ </div>
2007
+ '''
2008
+
2009
+ logger.info("Financial document processing completed successfully")
2010
+ if verbose_print:
2011
+ logger.info("Final workflow response:\n" + results_summary)
2012
+
2013
+ # Return final results with updated session state
2014
+ yield (final_progress_html, final_steps_html, results_summary, gr.Column(visible=True), session_state)
2015
+
2016
+ except Exception as e:
2017
+ logger.error(f"Processing failed: {str(e)}", exc_info=True)
2018
+ error_progress = f"❌ <strong>Processing failed: {str(e)}</strong>"
2019
+ error_steps = f"""
2020
+ <div style="padding: 20px; background: rgba(239, 68, 68, 0.1); border: 1px solid #ef4444; border-radius: 8px;">
2021
+ <h3>❌ Processing Failed</h3>
2022
+ <p><strong>Error:</strong> {str(e)}</p>
2023
+ <p>Please check the file and try again. If the problem persists, check the logs for more details.</p>
2024
+ </div>
2025
+ """
2026
+ error_markdown = f"# ❌ Processing Error\n\n**Error:** {str(e)}\n\nPlease try again or check the logs for more details."
2027
+ yield (error_progress, error_steps, error_markdown, gr.Column(visible=True), session_state)
2028
+
2029
+
2030
+ def get_terminal_with_logs(session_state):
2031
+ """Get the complete terminal HTML with real backend logs."""
2032
+ try:
2033
+ # Get session-specific logs if session exists
2034
+ session_id = session_state.session_id if session_state else None
2035
+ logs = terminal_log_handler.get_logs(session_id=session_id, limit=25)
2036
+
2037
+ # If no session-specific logs, get general logs
2038
+ if not logs:
2039
+ logs = terminal_log_handler.get_logs(session_id=None, limit=25)
2040
+
2041
+ log_lines = []
2042
+
2043
+ # Add initial messages if no logs
2044
+ if not logs:
2045
+ log_lines = [
2046
+ f'<div class="terminal-line system-line"><span class="timestamp">{datetime.now().strftime("%H:%M:%S")}</span><span>🎯 Terminal initialized - Monitoring backend logs</span></div>',
2047
+ f'<div class="terminal-line system-line"><span class="timestamp">{datetime.now().strftime("%H:%M:%S")}</span><span>💡 Backend processing logs will appear here in real-time</span></div>',
2048
+ f'<div class="terminal-line system-line"><span class="timestamp">{datetime.now().strftime("%H:%M:%S")}</span><span>📚 Session ID: {session_id or "Not initialized"}</span></div>'
2049
+ ]
2050
+ else:
2051
+ for log in logs:
2052
+ level_class = {
2053
+ 'DEBUG': 'system-line',
2054
+ 'INFO': 'output-line',
2055
+ 'WARNING': 'system-line',
2056
+ 'ERROR': 'error-line',
2057
+ 'CRITICAL': 'error-line'
2058
+ }.get(log['level'], 'output-line')
2059
+
2060
+ # Escape HTML and preserve formatting
2061
+ message = log['message'].replace('<', '&lt;').replace('>', '&gt;')
2062
+ logger_name = log['logger'].replace('<', '&lt;').replace('>', '&gt;')
2063
+
2064
+ log_lines.append(f'<div class="terminal-line {level_class}"><span class="timestamp">{log["timestamp"]}</span><span>[{log["level"]}] {logger_name}: {message}</span></div>')
2065
+
2066
+ # Create the complete terminal HTML
2067
+ terminal_html = f"""
2068
+ <div class="terminal-container">
2069
+ <div class="terminal-header">
2070
+ <div class="terminal-title">
2071
+ <div class="terminal-icon"></div>
2072
+ <span>Terminal</span>
2073
+ </div>
2074
+ <div class="terminal-controls">
2075
+ <button class="control-btn close" onclick="clearTerminal()"></button>
2076
+ <button class="control-btn minimize" onclick="minimizeTerminal()"></button>
2077
+ <button class="control-btn maximize" onclick="maximizeTerminal()"></button>
2078
+ </div>
2079
+ </div>
2080
+
2081
+ <div class="terminal-body">
2082
+ <div class="terminal-output" id="terminalOutput">
2083
+ {''.join(log_lines)}
2084
+ </div>
2085
+ </div>
2086
+ </div>
2087
+
2088
+ <script>
2089
+ // Simple read-only terminal for backend log display
2090
+ class LogTerminal {{
2091
+ constructor() {{
2092
+ this.output = document.getElementById('terminalOutput');
2093
+ this.autoScroll = true;
2094
+ this.userScrolled = false;
2095
+
2096
+ this.init();
2097
+ }}
2098
+
2099
+ init() {{
2100
+ // Add scroll event listener to detect manual scrolling
2101
+ if (this.output) {{
2102
+ this.output.addEventListener('scroll', (e) => this.handleScroll(e));
2103
+ }}
2104
+
2105
+ this.scrollToBottom();
2106
+ }}
2107
+
2108
+ handleScroll(e) {{
2109
+ const element = e.target;
2110
+ const isScrolledToBottom = element.scrollHeight - element.clientHeight <= element.scrollTop + 1;
2111
+
2112
+ // If user scrolled away from bottom, disable auto-scroll
2113
+ if (!isScrolledToBottom && this.autoScroll) {{
2114
+ this.userScrolled = true;
2115
+ this.autoScroll = false;
2116
+ }} else if (isScrolledToBottom && !this.autoScroll) {{
2117
+ // If user scrolled back to bottom, re-enable auto-scroll
2118
+ this.userScrolled = false;
2119
+ this.autoScroll = true;
2120
+ }}
2121
+ }}
2122
+
2123
+ scrollToBottom() {{
2124
+ if (this.output && this.autoScroll) {{
2125
+ this.output.scrollTop = this.output.scrollHeight;
2126
+ }}
2127
+ }}
2128
+
2129
+ clear() {{
2130
+ if (this.output) {{
2131
+ this.output.innerHTML = '';
2132
+ this.autoScroll = true;
2133
+ this.userScrolled = false;
2134
+ }}
2135
+ }}
2136
+ }}
2137
+
2138
+ // Initialize terminal with auto-scroll preservation
2139
+ function initTerminal() {{
2140
+ if (window.logTerminal) {{
2141
+ // Preserve scroll state if terminal exists
2142
+ window.logTerminal.init();
2143
+ }} else {{
2144
+ window.logTerminal = new LogTerminal();
2145
+ }}
2146
+
2147
+ // Enable auto-scroll for new content
2148
+ if (window.logTerminal && window.logTerminal.autoScroll) {{
2149
+ setTimeout(() => {{
2150
+ window.logTerminal.scrollToBottom();
2151
+ }}, 100);
2152
+ }}
2153
+ }}
2154
+
2155
+ // Initialize immediately and on DOM changes
2156
+ initTerminal();
2157
+
2158
+ // Reinitialize when terminal content updates
2159
+ setTimeout(initTerminal, 200);
2160
+
2161
+ // Terminal control functions
2162
+ function clearTerminal() {{
2163
+ if (window.logTerminal) {{
2164
+ window.logTerminal.clear();
2165
+ }}
2166
+ }}
2167
+
2168
+ function minimizeTerminal() {{
2169
+ console.log('Minimize terminal');
2170
+ }}
2171
+
2172
+ function maximizeTerminal() {{
2173
+ console.log('Maximize terminal');
2174
+ }}
2175
+ </script>
2176
+ """
2177
+
2178
+ return terminal_html
2179
+
2180
+ except Exception as e:
2181
+ logger.error(f"Error creating terminal with logs: {e}")
2182
+ return f"""
2183
+ <div class="terminal-container">
2184
+ <div class="terminal-line error-line">
2185
+ <span class="timestamp">{datetime.now().strftime('%H:%M:%S')}</span>
2186
+ <span>Error loading terminal: {str(e)}</span>
2187
+ </div>
2188
+ </div>
2189
+ """
2190
+
2191
+ def reset_session(session_state):
2192
+ """Reset the current session."""
2193
+ # Create completely new WorkflowUI instance
2194
+ new_session = WorkflowUI()
2195
+ logger.info(f"Session reset - New session ID: {new_session.session_id}")
2196
+ return ("", "", "", None, new_session, new_session.session_id)
2197
+
2198
+ def update_session_display(session_state):
2199
+ """Update session display with current session ID."""
2200
+ if session_state is None:
2201
+ session_state = WorkflowUI()
2202
+ return session_state.session_id, session_state
2203
+
2204
+ # Create Gradio interface
2205
+ with gr.Blocks(css=custom_css, title="📊 Data Extractor Using Gemini") as app:
2206
+ # Session state to maintain per-user data
2207
+ session_state = gr.State()
2208
+
2209
+ # Header
2210
+ gr.HTML("""
2211
+ <div class="header-title">
2212
+ 📊 Data Extractor Using Gemini
2213
+ </div>
2214
+ """)
2215
+
2216
+ # Main interface with integrated terminal (Manus AI style)
2217
+ with gr.Row():
2218
+ # Left side - Main processing interface
2219
+ with gr.Column(scale=2):
2220
+ # Configuration Panel
2221
+ gr.Markdown("## ⚙️ Configuration")
2222
+
2223
+ # Session info - will be updated when session initializes
2224
+ session_info = gr.Textbox(
2225
+ label="Session ID", value="Initializing...", interactive=False
2226
+ )
2227
+
2228
+ # File upload
2229
+ gr.Markdown("### 📄 Upload Document")
2230
+ file_input = gr.File(
2231
+ label="Choose a file",
2232
+ file_types=[f".{ext}" for ext in settings.SUPPORTED_FILE_TYPES],
2233
+ )
2234
+
2235
+
2236
+ # Info about automated processing
2237
+ gr.Markdown("### 🎯 Automated Financial Data Extraction")
2238
+ gr.Markdown("This application automatically extracts financial data points from uploaded documents and generates comprehensive analysis reports. No additional input required!")
2239
+
2240
+ # Control buttons
2241
+ with gr.Row():
2242
+ process_btn = gr.Button(
2243
+ "🚀 Start Processing", variant="primary", scale=2
2244
+ )
2245
+ reset_btn = gr.Button("🔄 Reset Session", scale=1)
2246
+
2247
+ # Processing Panel
2248
+ gr.Markdown("## ⚡ Processing Status")
2249
+
2250
+ # Progress bar
2251
+ progress_display = gr.HTML(label="Progress")
2252
+
2253
+ # Steps display
2254
+ steps_display = gr.HTML(label="Processing Steps")
2255
+
2256
+ # Results - Hidden initially, shown when processing completes
2257
+ verbose_checkbox = gr.Checkbox(label="Print model response", value=False)
2258
+
2259
+ # Results section
2260
+ results_section = gr.Column(visible=False)
2261
+ with results_section:
2262
+ gr.Markdown("### 📊 Results")
2263
+ results_display = gr.Code(
2264
+ label="Final Results", language="markdown", lines=10
2265
+ )
2266
+
2267
+ # Download section
2268
+ gr.Markdown("### ⬇️ Download Processed Files")
2269
+ download_btn = gr.Button("📥 Download All Files", variant="primary")
2270
+ download_output = gr.File(
2271
+ label="Download Files",
2272
+ file_count="single",
2273
+ file_types=[".zip"],
2274
+ interactive=False,
2275
+ visible=True
2276
+ )
2277
+
2278
+ # Right side - Integrated Terminal Panel
2279
+ with gr.Column(scale=3):
2280
+ gr.Markdown("## 💻 Terminal")
2281
+
2282
+ # Terminal component with real backend logs
2283
+ terminal_html = gr.HTML()
2284
+
2285
+
2286
+ # Event handlers
2287
+ process_btn.click(
2288
+ fn=process_file,
2289
+ inputs=[file_input, verbose_checkbox, session_state],
2290
+ outputs=[progress_display, steps_display, results_display, results_section, session_state],
2291
+ )
2292
+
2293
+ def session_download(session_state):
2294
+ """Session-aware download function."""
2295
+ if session_state is None:
2296
+ return None
2297
+ return session_state.download_processed_files()
2298
+
2299
+ download_btn.click(
2300
+ fn=session_download,
2301
+ inputs=[session_state],
2302
+ outputs=[download_output],
2303
+ show_progress=True
2304
+ )
2305
+
2306
+ reset_btn.click(
2307
+ fn=reset_session,
2308
+ inputs=[session_state],
2309
+ outputs=[progress_display, steps_display, results_display, download_output, session_state, session_info],
2310
+ )
2311
+
2312
+
2313
+ # Initialize session and terminal on load
2314
+ def initialize_app():
2315
+ """Initialize app with fresh session."""
2316
+ new_session = WorkflowUI()
2317
+ terminal_html_content = get_terminal_with_logs(new_session)
2318
+ return new_session, new_session.session_id, terminal_html_content
2319
+
2320
+ app.load(
2321
+ fn=initialize_app,
2322
+ outputs=[session_state, session_info, terminal_html],
2323
+ )
2324
+
2325
+ # Auto-refresh timer component (hidden)
2326
+ refresh_timer = gr.Timer(value=3.0, active=True) # Refresh every 3 seconds
2327
+
2328
+ # Timer event to auto-refresh terminal with session awareness
2329
+ refresh_timer.tick(
2330
+ fn=get_terminal_with_logs,
2331
+ inputs=[session_state],
2332
+ outputs=[terminal_html],
2333
+ )
2334
+
2335
+ return app
2336
+
2337
+
2338
+ def main():
2339
+ """Main application entry point."""
2340
+ app = create_gradio_app()
2341
+
2342
+ # Start auto-shutdown monitoring
2343
+ shutdown_manager.start_monitoring(app)
2344
+
2345
+ logger.info("Starting Gradio application with auto-shutdown enabled")
2346
+ logger.info(f"Auto-shutdown timeout: {INACTIVITY_TIMEOUT_MINUTES} minutes")
2347
+ logger.info("Press Ctrl+C to stop the server manually")
2348
+
2349
+ try:
2350
+ # Launch the app
2351
+ app.launch(
2352
+ server_name="0.0.0.0",
2353
+ server_port=7860,
2354
+ share=True,
2355
+ debug=False,
2356
+ show_error=True,
2357
+ )
2358
+ except KeyboardInterrupt:
2359
+ logger.info("Received keyboard interrupt, shutting down...")
2360
+ shutdown_manager._shutdown_server()
2361
+ except Exception as e:
2362
+ logger.error(f"Error during app launch: {e}")
2363
+ shutdown_manager._shutdown_server()
2364
+
2365
+
2366
+ if __name__ == "__main__":
2367
+ main()
config/__init__.py ADDED
File without changes
config/prompt_gallery.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "categories": {
3
+ "financial": {
4
+ "name": "Financial Content Extraction (Simple Structure)",
5
+ "icon": "📊",
6
+ "description": "Extract all tables and sectioned data from annual reports, placing each type in separate Excel sheets, without calculations.",
7
+ "prompts": [
8
+ {
9
+ "id": "extract_all_tables_simple",
10
+ "title": "Extract All Tables & Sections (No Charts, No Calculations)",
11
+ "icon": "📄",
12
+ "description": "Extract every table and structured data section from the annual report PDF and organize into clearly named Excel sheets. No calculations or charts—just pure content.",
13
+ "prompt": "For the provided annual report, extract EVERY table and structured content section found (including financial statements, notes, schedules, management discussion tables, segmental/line/regional breakdowns, etc.) and output into an Excel (.xlsx) file. Each sheet should be named after the report section or table heading, matching the document (examples: 'Income Statement', 'Balance Sheet', 'Segment Information', 'Risk Table', 'Notes to FS - Table 4', etc). Maintain all original row/column structure and include all source footnotes, captions, and section headers in the appropriate positions for context. \n\nHeader Row Formatting: Bold, fill light gray (RGB 230,230,230), font size 11. Freeze top row in every sheet. Wrap text in all columns if content overflows. Maintain all cell alignments as close to original as possible. \n\nInsert a cover sheet named 'Extracted Sections Index' that lists every sheet name, the original page number/range, and a short description ('Income Statement – p. 23 – Consolidated company-wide income', etc). Do not perform or add any numerical calculations or analytics. The focus is pure, lossless data extraction and organization."
14
+ },
15
+ {
16
+ "id": "extract_all_tables_with_charts",
17
+ "title": "Extract All Tables & Sections (Add Simple Charts)",
18
+ "icon": "📊",
19
+ "description": "Extract all tables and structured content, with optional basic Excel charts for major financial statements, but no derived calculations.",
20
+ "prompt": "Extract every table and section of structured data from the annual report into a multi-sheet Excel (.xlsx) file. Sheet names should match those of the tables' original titles in the report (e.g., 'Cash Flow Statement', 'Product Sales', 'Management Table 2'). For the three core statements ('Income Statement', 'Balance Sheet', 'Cash Flow Statement'), create a second sheet with the same name plus ' Chart' (e.g. 'Income Statement Chart'), placing a default bar or line chart visualizing the table's top-level rows by year (with no extra calculations or commentary—just raw data charted as-is). \n\nAll other sheet formatting rules: Header row bold, pale blue fill (RGB 217,228,240), font 11. Freeze top row. Wrap text in all columns. Add a first sheet called 'Sections Directory' with a table listing all subsequent sheet names, their corresponding report page(s), and a short summary for user navigation. No calculated fields or analytics—output is strictly direct report extraction with optional reference charts only for core statements."
21
+ }
22
+ ]
23
+ }
24
+ },
25
+ "metadata": {
26
+ "version": "1.0-simple",
27
+ "last_updated": "2025-07-18",
28
+ "description": "Intuitive and simple financial document extraction prompts: choose lossless structure-only or add basic charts—no calculations."
29
+ }
30
+ }
config/settings.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+
8
+ class Settings:
9
+ GOOGLE_AI_API_KEY = os.getenv("GOOGLE_AI_API_KEY")
10
+ MAX_FILE_SIZE_MB = 50
11
+ SUPPORTED_FILE_TYPES = [
12
+ "pdf",
13
+ "txt",
14
+ "png",
15
+ "jpg",
16
+ "jpeg",
17
+ "docx",
18
+ "xlsx",
19
+ "csv",
20
+ "md",
21
+ "json",
22
+ "xml",
23
+ "html",
24
+ "py",
25
+ "js",
26
+ "ts",
27
+ "doc",
28
+ "xls",
29
+ "ppt",
30
+ "pptx",
31
+ ]
32
+ # Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
33
+ TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
34
+ DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
35
+ COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
36
+ PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
37
+ DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
38
+ DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.5-pro")
39
+ CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.5-pro")
40
+
41
+ COORDINATOR_MODEL_THINKING_BUDGET=2048
42
+ PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
43
+ DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
44
+ DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
45
+ CODE_GENERATOR_MODEL_THINKING_BUDGET=3072
46
+
47
+ @classmethod
48
+ def validate_config(cls):
49
+ if not cls.GOOGLE_AI_API_KEY:
50
+ raise ValueError("GOOGLE_AI_API_KEY required")
51
+ cls.TEMP_DIR.mkdir(exist_ok=True)
52
+
53
+
54
+ settings = Settings()
models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Data models for structured agent communication
models/data_models.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Dict, Any, Optional
3
+ from datetime import datetime
4
+
5
+
6
+ class FileInfo(BaseModel):
7
+ """Information about the file being processed."""
8
+ name: str = Field(description="File name")
9
+ type: str = Field(description="File type/extension")
10
+ size_mb: float = Field(description="File size in MB")
11
+ path: str = Field(description="Full file path")
12
+
13
+
14
+ class SimplifiedAgentConfig(BaseModel):
15
+ """Simplified configuration for agent creation without complex nesting."""
16
+ instructions: str = Field(description="Single string instructions")
17
+ requirement_type: str = Field(default="standard", description="Type of requirements")
18
+ custom_notes: List[str] = Field(default_factory=list, description="Simple notes")
19
+
20
+
21
+ class ProcessingPlan(BaseModel):
22
+ """Simplified processing plan for document analysis."""
23
+ # Basic plan information
24
+ document_type: str = Field(description="Document type (financial, legal, technical, etc.)")
25
+ analysis_objective: str = Field(description="Primary analysis objective")
26
+ complexity: str = Field(default="moderate", description="Complexity level")
27
+ processing_strategy: str = Field(description="Overall processing strategy")
28
+
29
+ # Essential configurations (simplified)
30
+ agent_configs: Dict[str, str] = Field(
31
+ default_factory=dict,
32
+ description="Simple agent configuration summaries"
33
+ )
34
+
35
+ # Simple schema suggestions using basic types
36
+ data_fields: List[str] = Field(description="List of suggested data fields to extract")
37
+ validation_rules: List[str] = Field(default_factory=list, description="Validation rules")
38
+ output_formats: List[str] = Field(default_factory=list, description="Required output formats")
39
+
40
+ # Simple notes and requirements
41
+ requirements: List[str] = Field(default_factory=list, description="Processing requirements")
42
+ notes: str = Field(default="", description="Additional notes")
43
+
44
+
45
+ class AgentConfiguration(BaseModel):
46
+ """Configuration for a dynamically created agent."""
47
+ instructions: List[str] = Field(description="Specific instructions for this agent")
48
+ custom_prompt_template: Optional[str] = Field(default="", description="Custom prompt template for this agent")
49
+ special_requirements: List[str] = Field(default_factory=list, description="Special requirements or constraints")
50
+
51
+
52
+ class DataPoint(BaseModel):
53
+ """Individual data point extracted from document."""
54
+ field_name: str = Field(description="Name of the data field")
55
+ value: str = Field(description="Value of the field")
56
+ data_type: Optional[str] = Field(default="", description="Type of data (text, number, date, etc.)")
57
+ category: Optional[str] = Field(default="", description="Category or section this data belongs to")
58
+ unit: Optional[str] = Field(default="", description="Unit of measurement if applicable")
59
+ period: Optional[str] = Field(default="", description="Time period if applicable")
60
+ confidence_score: float = Field(description="Confidence score for the extraction (0-1)")
61
+ source_location: Optional[str] = Field(default="", description="Location in document where data was found")
62
+
63
+
64
+ class ExtractedData(BaseModel):
65
+ """Structured data extracted from the document."""
66
+ data_points: List[DataPoint] = Field(description="List of extracted data points")
67
+ extraction_notes: str = Field(default="", description="Notes about the extraction process")
68
+ confidence_score: float = Field(description="Overall confidence score for the extraction")
69
+ extraction_timestamp: datetime = Field(default_factory=datetime.now, description="When extraction was performed")
70
+ document_summary: Optional[str] = Field(default="", description="Brief summary of the document content")
71
+
72
+
73
+ class DataInsight(BaseModel):
74
+ """Individual insight from data analysis."""
75
+ insight_type: str = Field(description="Type of insight (trend, comparison, etc.)")
76
+ description: str = Field(description="Description of the insight")
77
+ supporting_data: List[str] = Field(description="Data points that support this insight")
78
+ importance_level: str = Field(description="Importance level (high, medium, low)")
79
+
80
+
81
+ class DataCategory(BaseModel):
82
+ """A category of organized data."""
83
+ category_name: str = Field(description="Name of the data category")
84
+ data_points: Dict[str, str] = Field(description="Key-value pairs of data in this category")
85
+
86
+ class ArrangedData(BaseModel):
87
+ """Organized and analyzed data."""
88
+ organized_categories: List[DataCategory] = Field(
89
+ description="Data organized into logical categories"
90
+ )
91
+ insights: List[DataInsight] = Field(description="Insights generated from the data")
92
+ summary: str = Field(description="Summary of the arranged data")
93
+ arrangement_notes: str = Field(description="Notes about the arrangement process")
94
+
95
+
96
+ class CodeGenerationResult(BaseModel):
97
+ """Result of code generation and execution."""
98
+ generated_code: str = Field(description="The generated Python code")
99
+ execution_result: str = Field(description="Result of code execution")
100
+ output_files: List[str] = Field(description="List of output files created")
101
+ execution_success: bool = Field(description="Whether code execution was successful")
102
+ error_messages: List[str] = Field(default_factory=list, description="Any error messages encountered")
103
+
104
+
105
+ class DocumentAnalysisResult(BaseModel):
106
+ """Complete result of document analysis team workflow."""
107
+ document_type: str = Field(description="Type of document analyzed")
108
+ analysis_objective: str = Field(description="Original analysis objective")
109
+ processing_summary: str = Field(description="Summary of the entire processing workflow")
110
+
111
+ # Results from each stage
112
+ planning_notes: str = Field(description="Notes from the planning stage")
113
+ prompts_created: str = Field(description="Summary of prompts and schemas created")
114
+ data_extracted: str = Field(description="Summary of data extraction results")
115
+ data_arranged: str = Field(description="Summary of data arrangement and insights")
116
+ code_generated: str = Field(description="Summary of code generation and execution")
117
+
118
+ # Final outputs
119
+ key_findings: List[str] = Field(description="Key findings from the analysis")
120
+ output_files_created: List[str] = Field(description="List of output files created")
121
+ success: bool = Field(description="Whether the analysis completed successfully")
122
+ recommendations: List[str] = Field(default_factory=list, description="Recommendations based on analysis")
123
+
124
+
125
+ class ExtractionField(BaseModel):
126
+ """Individual field specification for data extraction."""
127
+ field_name: str = Field(description="Name of the field to extract")
128
+ field_type: str = Field(description="Type of data (text, number, date, etc.)")
129
+ description: str = Field(description="Description of what this field represents")
130
+ required: bool = Field(default=True, description="Whether this field is required")
131
+
132
+ class AgentPrompt(BaseModel):
133
+ """Prompt configuration for a specific agent."""
134
+ agent_name: str = Field(description="Name of the agent")
135
+ specialized_instructions: List[str] = Field(description="Specialized instructions for this agent")
136
+ input_requirements: List[str] = Field(description="What input this agent needs")
137
+ output_requirements: List[str] = Field(description="What output this agent should produce")
138
+ success_criteria: List[str] = Field(description="Criteria for successful completion")
139
+
140
+ class PromptsAndSchemas(BaseModel):
141
+ """Prompts and schemas for all agents in the workflow."""
142
+ # Data extraction specific
143
+ extraction_prompt: str = Field(description="Optimized prompt for data extraction")
144
+ extraction_fields: List[ExtractionField] = Field(
145
+ description="List of fields to extract from the document"
146
+ )
147
+ arrangement_rules: List[str] = Field(description="Rules for organizing extracted data")
148
+ validation_criteria: List[str] = Field(description="Criteria for validating extracted data")
149
+
150
+ # All agent prompts
151
+ agent_prompts: List[AgentPrompt] = Field(description="Specialized prompts for each agent")
152
+ workflow_coordination: List[str] = Field(description="Instructions for coordinating between agents")
153
+ quality_assurance: List[str] = Field(description="Quality assurance guidelines for all agents")
prompt_gallery.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "categories": {
3
+ "financial": {
4
+ "name": "Financial Content Extraction (Simple Structure)",
5
+ "icon": "📊",
6
+ "description": "Extract all tables and sectioned data from annual reports, placing each type in separate Excel sheets, without calculations.",
7
+ "prompts": [
8
+ {
9
+ "id": "extract_all_tables_simple",
10
+ "title": "Extract All Tables & Sections (No Charts, No Calculations)",
11
+ "icon": "📄",
12
+ "description": "Extract every table and structured data section from the annual report PDF and organize into clearly named Excel sheets. No calculations or charts—just pure content.",
13
+ "prompt": "For the provided annual report, extract EVERY table and structured content section found (including financial statements, notes, schedules, management discussion tables, segmental/line/regional breakdowns, etc.) and output into an Excel (.xlsx) file. Each sheet should be named after the report section or table heading, matching the document (examples: 'Income Statement', 'Balance Sheet', 'Segment Information', 'Risk Table', 'Notes to FS - Table 4', etc). Maintain all original row/column structure and include all source footnotes, captions, and section headers in the appropriate positions for context. \n\nHeader Row Formatting: Bold, fill light gray (RGB 230,230,230), font size 11. Freeze top row in every sheet. Wrap text in all columns if content overflows. Maintain all cell alignments as close to original as possible. \n\nInsert a cover sheet named 'Extracted Sections Index' that lists every sheet name, the original page number/range, and a short description ('Income Statement – p. 23 – Consolidated company-wide income', etc). Do not perform or add any numerical calculations or analytics. The focus is pure, lossless data extraction and organization."
14
+ },
15
+ {
16
+ "id": "extract_all_tables_with_charts",
17
+ "title": "Extract All Tables & Sections (Add Simple Charts)",
18
+ "icon": "📊",
19
+ "description": "Extract all tables and structured content, with optional basic Excel charts for major financial statements, but no derived calculations.",
20
+ "prompt": "Extract every table and section of structured data from the annual report into a multi-sheet Excel (.xlsx) file. Sheet names should match those of the tables' original titles in the report (e.g., 'Cash Flow Statement', 'Product Sales', 'Management Table 2'). For the three core statements ('Income Statement', 'Balance Sheet', 'Cash Flow Statement'), create a second sheet with the same name plus ' Chart' (e.g. 'Income Statement Chart'), placing a default bar or line chart visualizing the table's top-level rows by year (with no extra calculations or commentary—just raw data charted as-is). \n\nAll other sheet formatting rules: Header row bold, pale blue fill (RGB 217,228,240), font 11. Freeze top row. Wrap text in all columns. Add a first sheet called 'Sections Directory' with a table listing all subsequent sheet names, their corresponding report page(s), and a short summary for user navigation. No calculated fields or analytics—output is strictly direct report extraction with optional reference charts only for core statements."
21
+ }
22
+ ]
23
+ }
24
+ },
25
+ "metadata": {
26
+ "version": "1.0-simple",
27
+ "last_updated": "2025-07-18",
28
+ "description": "Intuitive and simple financial document extraction prompts: choose lossless structure-only or add basic charts—no calculations."
29
+ }
30
+ }
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ agno>=1.7.4
2
+ gradio
3
+ google-generativeai
4
+ PyPDF2
5
+ Pillow
6
+ python-dotenv
7
+ pandas
8
+ matplotlib
9
+ openpyxl
10
+ python-docx
11
+ lxml
12
+ markdown
13
+ requests
14
+ google-genai
15
+ seaborn
16
+ sqlalchemy
17
+ websockets
settings.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+
8
+ class Settings:
9
+ GOOGLE_AI_API_KEY = os.getenv("GOOGLE_AI_API_KEY")
10
+ MAX_FILE_SIZE_MB = 50
11
+ SUPPORTED_FILE_TYPES = [
12
+ "pdf",
13
+ "txt",
14
+ "png",
15
+ "jpg",
16
+ "jpeg",
17
+ "docx",
18
+ "xlsx",
19
+ "csv",
20
+ "md",
21
+ "json",
22
+ "xml",
23
+ "html",
24
+ "py",
25
+ "js",
26
+ "ts",
27
+ "doc",
28
+ "xls",
29
+ "ppt",
30
+ "pptx",
31
+ ]
32
+ # Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
33
+ TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
34
+ DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
35
+ COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
36
+ PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
37
+ DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
38
+ DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.5-pro")
39
+ CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.5-pro")
40
+
41
+ COORDINATOR_MODEL_THINKING_BUDGET=2048
42
+ PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
43
+ DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
44
+ DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
45
+ CODE_GENERATOR_MODEL_THINKING_BUDGET=3072
46
+
47
+ @classmethod
48
+ def validate_config(cls):
49
+ if not cls.GOOGLE_AI_API_KEY:
50
+ raise ValueError("GOOGLE_AI_API_KEY required")
51
+ cls.TEMP_DIR.mkdir(exist_ok=True)
52
+
53
+
54
+ settings = Settings()
static/terminal.html ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Manus AI Terminal</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ body {
15
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
16
+ background: #0d1117;
17
+ color: #c9d1d9;
18
+ height: 100vh;
19
+ overflow: hidden;
20
+ }
21
+
22
+ .terminal-container {
23
+ display: flex;
24
+ flex-direction: column;
25
+ height: 100vh;
26
+ background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
27
+ border: 1px solid #30363d;
28
+ }
29
+
30
+ .terminal-header {
31
+ display: flex;
32
+ align-items: center;
33
+ justify-content: space-between;
34
+ padding: 12px 16px;
35
+ background: #161b22;
36
+ border-bottom: 1px solid #30363d;
37
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
38
+ }
39
+
40
+ .terminal-title {
41
+ display: flex;
42
+ align-items: center;
43
+ gap: 8px;
44
+ font-size: 14px;
45
+ font-weight: 600;
46
+ color: #f0f6fc;
47
+ }
48
+
49
+ .terminal-icon {
50
+ width: 16px;
51
+ height: 16px;
52
+ background: #238636;
53
+ border-radius: 50%;
54
+ position: relative;
55
+ }
56
+
57
+ .terminal-icon::after {
58
+ content: '>';
59
+ position: absolute;
60
+ top: 50%;
61
+ left: 50%;
62
+ transform: translate(-50%, -50%);
63
+ font-size: 10px;
64
+ color: white;
65
+ font-weight: bold;
66
+ }
67
+
68
+ .terminal-controls {
69
+ display: flex;
70
+ gap: 8px;
71
+ }
72
+
73
+ .control-btn {
74
+ width: 12px;
75
+ height: 12px;
76
+ border-radius: 50%;
77
+ border: none;
78
+ cursor: pointer;
79
+ transition: opacity 0.2s;
80
+ }
81
+
82
+ .control-btn:hover {
83
+ opacity: 0.8;
84
+ }
85
+
86
+ .close { background: #ff5f56; }
87
+ .minimize { background: #ffbd2e; }
88
+ .maximize { background: #27ca3f; }
89
+
90
+ .terminal-body {
91
+ flex: 1;
92
+ display: flex;
93
+ flex-direction: column;
94
+ overflow: hidden;
95
+ }
96
+
97
+ .terminal-output {
98
+ flex: 1;
99
+ padding: 16px;
100
+ overflow-y: auto;
101
+ font-size: 13px;
102
+ line-height: 1.4;
103
+ background: #0d1117;
104
+ scrollbar-width: thin;
105
+ scrollbar-color: #30363d #0d1117;
106
+ }
107
+
108
+ .terminal-output::-webkit-scrollbar {
109
+ width: 8px;
110
+ }
111
+
112
+ .terminal-output::-webkit-scrollbar-track {
113
+ background: #0d1117;
114
+ }
115
+
116
+ .terminal-output::-webkit-scrollbar-thumb {
117
+ background: #30363d;
118
+ border-radius: 4px;
119
+ }
120
+
121
+ .terminal-output::-webkit-scrollbar-thumb:hover {
122
+ background: #484f58;
123
+ }
124
+
125
+ .terminal-line {
126
+ margin-bottom: 2px;
127
+ white-space: pre-wrap;
128
+ word-wrap: break-word;
129
+ }
130
+
131
+ .command-line {
132
+ color: #58a6ff;
133
+ font-weight: 600;
134
+ }
135
+
136
+ .output-line {
137
+ color: #c9d1d9;
138
+ }
139
+
140
+ .error-line {
141
+ color: #f85149;
142
+ }
143
+
144
+ .success-line {
145
+ color: #56d364;
146
+ }
147
+
148
+ .system-line {
149
+ color: #ffa657;
150
+ font-style: italic;
151
+ }
152
+
153
+ .timestamp {
154
+ color: #7d8590;
155
+ font-size: 11px;
156
+ margin-right: 8px;
157
+ }
158
+
159
+ .terminal-input {
160
+ display: flex;
161
+ align-items: center;
162
+ padding: 12px 16px;
163
+ background: #161b22;
164
+ border-top: 1px solid #30363d;
165
+ }
166
+
167
+ .prompt {
168
+ color: #58a6ff;
169
+ margin-right: 8px;
170
+ font-weight: 600;
171
+ }
172
+
173
+ .input-field {
174
+ flex: 1;
175
+ background: transparent;
176
+ border: none;
177
+ color: #c9d1d9;
178
+ font-family: inherit;
179
+ font-size: 13px;
180
+ outline: none;
181
+ }
182
+
183
+ .input-field::placeholder {
184
+ color: #7d8590;
185
+ }
186
+
187
+ .status-indicator {
188
+ display: flex;
189
+ align-items: center;
190
+ gap: 8px;
191
+ margin-left: 12px;
192
+ }
193
+
194
+ .status-dot {
195
+ width: 8px;
196
+ height: 8px;
197
+ border-radius: 50%;
198
+ background: #7d8590;
199
+ transition: background-color 0.3s;
200
+ }
201
+
202
+ .status-dot.connected {
203
+ background: #56d364;
204
+ box-shadow: 0 0 8px rgba(86, 211, 100, 0.5);
205
+ }
206
+
207
+ .status-dot.running {
208
+ background: #ffa657;
209
+ animation: pulse 1.5s infinite;
210
+ }
211
+
212
+ .status-dot.error {
213
+ background: #f85149;
214
+ }
215
+
216
+ @keyframes pulse {
217
+ 0%, 100% { opacity: 1; }
218
+ 50% { opacity: 0.5; }
219
+ }
220
+
221
+ .typing-indicator {
222
+ display: none;
223
+ color: #7d8590;
224
+ font-style: italic;
225
+ animation: blink 1s infinite;
226
+ }
227
+
228
+ @keyframes blink {
229
+ 0%, 50% { opacity: 1; }
230
+ 51%, 100% { opacity: 0; }
231
+ }
232
+
233
+ .command-history {
234
+ position: absolute;
235
+ bottom: 60px;
236
+ left: 16px;
237
+ right: 16px;
238
+ background: #21262d;
239
+ border: 1px solid #30363d;
240
+ border-radius: 6px;
241
+ max-height: 200px;
242
+ overflow-y: auto;
243
+ display: none;
244
+ z-index: 1000;
245
+ }
246
+
247
+ .history-item {
248
+ padding: 8px 12px;
249
+ cursor: pointer;
250
+ border-bottom: 1px solid #30363d;
251
+ transition: background-color 0.2s;
252
+ }
253
+
254
+ .history-item:hover {
255
+ background: #30363d;
256
+ }
257
+
258
+ .history-item:last-child {
259
+ border-bottom: none;
260
+ }
261
+
262
+ /* Responsive design */
263
+ @media (max-width: 768px) {
264
+ .terminal-header {
265
+ padding: 8px 12px;
266
+ }
267
+
268
+ .terminal-output {
269
+ padding: 12px;
270
+ font-size: 12px;
271
+ }
272
+
273
+ .terminal-input {
274
+ padding: 8px 12px;
275
+ }
276
+ }
277
+ </style>
278
+ </head>
279
+ <body>
280
+ <div class="terminal-container">
281
+ <div class="terminal-header">
282
+ <div class="terminal-title">
283
+ <div class="terminal-icon"></div>
284
+ <span>Manus AI Terminal</span>
285
+ </div>
286
+ <div class="terminal-controls">
287
+ <button class="control-btn close" onclick="closeTerminal()"></button>
288
+ <button class="control-btn minimize" onclick="minimizeTerminal()"></button>
289
+ <button class="control-btn maximize" onclick="maximizeTerminal()"></button>
290
+ </div>
291
+ </div>
292
+
293
+ <div class="terminal-body">
294
+ <div class="terminal-output" id="output"></div>
295
+ <div class="command-history" id="history"></div>
296
+
297
+ <div class="terminal-input">
298
+ <span class="prompt">$</span>
299
+ <input type="text" class="input-field" id="commandInput"
300
+ placeholder="Type a command and press Enter..."
301
+ autocomplete="off" spellcheck="false">
302
+ <div class="status-indicator">
303
+ <div class="status-dot" id="statusDot"></div>
304
+ <span id="statusText">Disconnected</span>
305
+ </div>
306
+ </div>
307
+ </div>
308
+ </div>
309
+
310
+ <script>
311
+ class ManusTerminal {
312
+ constructor() {
313
+ this.ws = null;
314
+ this.output = document.getElementById('output');
315
+ this.input = document.getElementById('commandInput');
316
+ this.statusDot = document.getElementById('statusDot');
317
+ this.statusText = document.getElementById('statusText');
318
+ this.history = document.getElementById('history');
319
+
320
+ this.commandHistory = [];
321
+ this.historyIndex = -1;
322
+ this.isConnected = false;
323
+ this.isRunning = false;
324
+
325
+ this.init();
326
+ }
327
+
328
+ init() {
329
+ this.setupEventListeners();
330
+ this.connect();
331
+ this.addWelcomeMessage();
332
+ }
333
+
334
+ setupEventListeners() {
335
+ this.input.addEventListener('keydown', (e) => this.handleKeyDown(e));
336
+ this.input.addEventListener('keyup', (e) => this.handleKeyUp(e));
337
+
338
+ // Auto-reconnect on window focus
339
+ window.addEventListener('focus', () => {
340
+ if (!this.isConnected) {
341
+ this.connect();
342
+ }
343
+ });
344
+ }
345
+
346
+ connect() {
347
+ try {
348
+ this.ws = new WebSocket('ws://localhost:8765');
349
+
350
+ this.ws.onopen = () => {
351
+ this.isConnected = true;
352
+ this.updateStatus('connected', 'Connected');
353
+ this.addSystemMessage('🚀 Connected to terminal server');
354
+ };
355
+
356
+ this.ws.onmessage = (event) => {
357
+ const data = JSON.parse(event.data);
358
+ this.handleMessage(data);
359
+ };
360
+
361
+ this.ws.onclose = () => {
362
+ this.isConnected = false;
363
+ this.isRunning = false;
364
+ this.updateStatus('error', 'Disconnected');
365
+ this.addSystemMessage('❌ Connection lost. Attempting to reconnect...');
366
+
367
+ // Auto-reconnect after 3 seconds
368
+ setTimeout(() => this.connect(), 3000);
369
+ };
370
+
371
+ this.ws.onerror = (error) => {
372
+ this.addSystemMessage('⚠️ Connection error. Check if the server is running.');
373
+ };
374
+
375
+ } catch (error) {
376
+ this.addSystemMessage('❌ Failed to connect to terminal server');
377
+ }
378
+ }
379
+
380
+ handleMessage(data) {
381
+ const timestamp = new Date(data.timestamp).toLocaleTimeString();
382
+
383
+ switch (data.type) {
384
+ case 'connected':
385
+ this.addSystemMessage(data.message);
386
+ break;
387
+
388
+ case 'command_start':
389
+ this.isRunning = true;
390
+ this.updateStatus('running', 'Running');
391
+ this.addCommandLine(data.message);
392
+ break;
393
+
394
+ case 'output':
395
+ this.addOutputLine(data.data, data.stream);
396
+ break;
397
+
398
+ case 'command_complete':
399
+ this.isRunning = false;
400
+ this.updateStatus('connected', 'Connected');
401
+ this.addSystemMessage(`Process completed with exit code ${data.exit_code}`);
402
+ break;
403
+
404
+ case 'error':
405
+ this.addErrorLine(data.data);
406
+ break;
407
+
408
+ case 'interrupted':
409
+ this.isRunning = false;
410
+ this.updateStatus('connected', 'Connected');
411
+ this.addSystemMessage(data.message);
412
+ break;
413
+ }
414
+ }
415
+
416
+ handleKeyDown(e) {
417
+ switch (e.key) {
418
+ case 'Enter':
419
+ e.preventDefault();
420
+ this.executeCommand();
421
+ break;
422
+
423
+ case 'ArrowUp':
424
+ e.preventDefault();
425
+ this.navigateHistory(-1);
426
+ break;
427
+
428
+ case 'ArrowDown':
429
+ e.preventDefault();
430
+ this.navigateHistory(1);
431
+ break;
432
+
433
+ case 'Tab':
434
+ e.preventDefault();
435
+ // TODO: Implement command completion
436
+ break;
437
+
438
+ case 'c':
439
+ if (e.ctrlKey) {
440
+ e.preventDefault();
441
+ this.interruptCommand();
442
+ }
443
+ break;
444
+ }
445
+ }
446
+
447
+ handleKeyUp(e) {
448
+ // Show typing indicator
449
+ if (e.target.value.length > 0) {
450
+ // TODO: Implement typing indicator
451
+ }
452
+ }
453
+
454
+ executeCommand() {
455
+ const command = this.input.value.trim();
456
+ if (!command || !this.isConnected) return;
457
+
458
+ // Add to history
459
+ if (this.commandHistory[this.commandHistory.length - 1] !== command) {
460
+ this.commandHistory.push(command);
461
+ }
462
+ this.historyIndex = this.commandHistory.length;
463
+
464
+ // Send command
465
+ this.ws.send(JSON.stringify({
466
+ type: 'command',
467
+ command: command
468
+ }));
469
+
470
+ // Clear input
471
+ this.input.value = '';
472
+ }
473
+
474
+ interruptCommand() {
475
+ if (this.isRunning && this.isConnected) {
476
+ this.ws.send(JSON.stringify({
477
+ type: 'interrupt'
478
+ }));
479
+ }
480
+ }
481
+
482
+ navigateHistory(direction) {
483
+ if (this.commandHistory.length === 0) return;
484
+
485
+ this.historyIndex += direction;
486
+
487
+ if (this.historyIndex < 0) {
488
+ this.historyIndex = 0;
489
+ } else if (this.historyIndex >= this.commandHistory.length) {
490
+ this.historyIndex = this.commandHistory.length;
491
+ this.input.value = '';
492
+ return;
493
+ }
494
+
495
+ this.input.value = this.commandHistory[this.historyIndex] || '';
496
+ }
497
+
498
+ updateStatus(status, text) {
499
+ this.statusDot.className = `status-dot ${status}`;
500
+ this.statusText.textContent = text;
501
+ }
502
+
503
+ addWelcomeMessage() {
504
+ this.addSystemMessage('🎯 Manus AI Terminal - Ready for commands');
505
+ this.addSystemMessage('💡 Use Ctrl+C to interrupt running commands');
506
+ this.addSystemMessage('📚 Use ↑/↓ arrows to navigate command history');
507
+ }
508
+
509
+ addCommandLine(text) {
510
+ this.addLine(text, 'command-line');
511
+ }
512
+
513
+ addOutputLine(text, stream = 'stdout') {
514
+ const className = stream === 'stderr' ? 'error-line' : 'output-line';
515
+ this.addLine(text, className);
516
+ }
517
+
518
+ addErrorLine(text) {
519
+ this.addLine(text, 'error-line');
520
+ }
521
+
522
+ addSystemMessage(text) {
523
+ this.addLine(text, 'system-line');
524
+ }
525
+
526
+ addLine(text, className = 'output-line') {
527
+ const line = document.createElement('div');
528
+ line.className = `terminal-line ${className}`;
529
+
530
+ const timestamp = document.createElement('span');
531
+ timestamp.className = 'timestamp';
532
+ timestamp.textContent = new Date().toLocaleTimeString();
533
+
534
+ const content = document.createElement('span');
535
+ content.textContent = text;
536
+
537
+ line.appendChild(timestamp);
538
+ line.appendChild(content);
539
+
540
+ this.output.appendChild(line);
541
+ this.scrollToBottom();
542
+ }
543
+
544
+ scrollToBottom() {
545
+ this.output.scrollTop = this.output.scrollHeight;
546
+ }
547
+
548
+ clear() {
549
+ this.output.innerHTML = '';
550
+ this.addWelcomeMessage();
551
+ }
552
+ }
553
+
554
+ // Terminal control functions
555
+ function closeTerminal() {
556
+ if (confirm('Are you sure you want to close the terminal?')) {
557
+ window.close();
558
+ }
559
+ }
560
+
561
+ function minimizeTerminal() {
562
+ // Implement minimize functionality
563
+ console.log('Minimize terminal');
564
+ }
565
+
566
+ function maximizeTerminal() {
567
+ if (document.fullscreenElement) {
568
+ document.exitFullscreen();
569
+ } else {
570
+ document.documentElement.requestFullscreen();
571
+ }
572
+ }
573
+
574
+ // Initialize terminal when page loads
575
+ document.addEventListener('DOMContentLoaded', () => {
576
+ window.terminal = new ManusTerminal();
577
+ });
578
+
579
+ // Add global commands
580
+ window.addEventListener('keydown', (e) => {
581
+ if (e.ctrlKey && e.key === 'l') {
582
+ e.preventDefault();
583
+ window.terminal.clear();
584
+ }
585
+ });
586
+ </script>
587
+ </body>
588
+ </html>
terminal_stream.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import websockets
4
+ from typing import Dict, Any, Set
5
+ import subprocess
6
+ import shlex
7
+ from queue import Queue
8
+ import threading
9
+ import time
10
+ import logging
11
+ from datetime import datetime
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class TerminalStreamManager:
16
+ """Manages real-time terminal streaming with WebSocket connections."""
17
+
18
+ def __init__(self):
19
+ self.clients: Set[websockets.WebSocketServerProtocol] = set()
20
+ self.command_queue = Queue()
21
+ self.is_running = False
22
+ self.current_process = None
23
+
24
+ async def register_client(self, websocket):
25
+ """Register a new WebSocket client."""
26
+ self.clients.add(websocket)
27
+ await websocket.send(json.dumps({
28
+ 'type': 'connected',
29
+ 'message': '🚀 Terminal connected successfully',
30
+ 'timestamp': datetime.now().isoformat()
31
+ }))
32
+ logger.info(f"Terminal client connected. Total clients: {len(self.clients)}")
33
+
34
+ async def unregister_client(self, websocket):
35
+ """Unregister a WebSocket client."""
36
+ self.clients.discard(websocket)
37
+ logger.info(f"Terminal client disconnected. Total clients: {len(self.clients)}")
38
+
39
+ async def broadcast(self, message: Dict[str, Any]):
40
+ """Broadcast message to all connected clients."""
41
+ if self.clients:
42
+ disconnected = set()
43
+ message['timestamp'] = datetime.now().isoformat()
44
+
45
+ for client in self.clients:
46
+ try:
47
+ await client.send(json.dumps(message))
48
+ except websockets.exceptions.ConnectionClosed:
49
+ disconnected.add(client)
50
+ except Exception as e:
51
+ logger.error(f"Error broadcasting to client: {e}")
52
+ disconnected.add(client)
53
+
54
+ # Clean up disconnected clients
55
+ for client in disconnected:
56
+ self.clients.discard(client)
57
+
58
+ async def execute_command(self, command: str):
59
+ """Execute a command and stream output in real-time."""
60
+ await self.broadcast({
61
+ 'type': 'command_start',
62
+ 'command': command,
63
+ 'message': f'$ {command}'
64
+ })
65
+
66
+ try:
67
+ # Security: Use shell=False and sanitize input
68
+ safe_command = shlex.split(command)
69
+
70
+ self.current_process = subprocess.Popen(
71
+ safe_command,
72
+ stdout=subprocess.PIPE,
73
+ stderr=subprocess.PIPE,
74
+ text=True,
75
+ bufsize=1,
76
+ universal_newlines=True
77
+ )
78
+
79
+ # Stream output in real-time
80
+ while True:
81
+ # Check if process is still running
82
+ if self.current_process.poll() is not None:
83
+ # Process finished, read remaining output
84
+ remaining_stdout = self.current_process.stdout.read()
85
+ remaining_stderr = self.current_process.stderr.read()
86
+
87
+ if remaining_stdout:
88
+ await self.broadcast({
89
+ 'type': 'output',
90
+ 'data': remaining_stdout,
91
+ 'stream': 'stdout'
92
+ })
93
+
94
+ if remaining_stderr:
95
+ await self.broadcast({
96
+ 'type': 'output',
97
+ 'data': remaining_stderr,
98
+ 'stream': 'stderr'
99
+ })
100
+
101
+ break
102
+
103
+ # Read available output
104
+ try:
105
+ # Use select or polling for non-blocking read
106
+ import select
107
+ ready, _, _ = select.select([self.current_process.stdout, self.current_process.stderr], [], [], 0.1)
108
+
109
+ for stream in ready:
110
+ if stream == self.current_process.stdout:
111
+ line = stream.readline()
112
+ if line:
113
+ await self.broadcast({
114
+ 'type': 'output',
115
+ 'data': line,
116
+ 'stream': 'stdout'
117
+ })
118
+ elif stream == self.current_process.stderr:
119
+ line = stream.readline()
120
+ if line:
121
+ await self.broadcast({
122
+ 'type': 'output',
123
+ 'data': line,
124
+ 'stream': 'stderr'
125
+ })
126
+ except:
127
+ # Fallback for systems without select
128
+ await asyncio.sleep(0.1)
129
+
130
+ # Send completion message
131
+ await self.broadcast({
132
+ 'type': 'command_complete',
133
+ 'exit_code': self.current_process.returncode,
134
+ 'message': f'Process exited with code {self.current_process.returncode}'
135
+ })
136
+
137
+ except Exception as e:
138
+ await self.broadcast({
139
+ 'type': 'error',
140
+ 'data': str(e),
141
+ 'stream': 'system'
142
+ })
143
+ finally:
144
+ self.current_process = None
145
+
146
+ async def handle_client(self, websocket, path):
147
+ """Handle WebSocket client connections."""
148
+ await self.register_client(websocket)
149
+ try:
150
+ async for message in websocket:
151
+ try:
152
+ data = json.loads(message)
153
+
154
+ if data.get('type') == 'command':
155
+ command = data.get('command', '').strip()
156
+ if command:
157
+ await self.execute_command(command)
158
+
159
+ elif data.get('type') == 'interrupt':
160
+ if self.current_process:
161
+ self.current_process.terminate()
162
+ await self.broadcast({
163
+ 'type': 'interrupted',
164
+ 'message': 'Process interrupted by user'
165
+ })
166
+
167
+ except json.JSONDecodeError:
168
+ await websocket.send(json.dumps({
169
+ 'type': 'error',
170
+ 'message': 'Invalid JSON message'
171
+ }))
172
+
173
+ except websockets.exceptions.ConnectionClosed:
174
+ pass
175
+ finally:
176
+ await self.unregister_client(websocket)
177
+
178
+ # Global terminal manager instance
179
+ terminal_manager = TerminalStreamManager()
180
+
181
+ async def start_websocket_server(host='localhost', port=8765):
182
+ """Start the WebSocket server for terminal streaming."""
183
+ logger.info(f"Starting terminal WebSocket server on {host}:{port}")
184
+
185
+ async def handler(websocket, path):
186
+ await terminal_manager.handle_client(websocket, path)
187
+
188
+ return await websockets.serve(handler, host, port)
189
+
190
+ def run_websocket_server():
191
+ """Run WebSocket server in a separate thread."""
192
+ def start_server():
193
+ loop = asyncio.new_event_loop()
194
+ asyncio.set_event_loop(loop)
195
+
196
+ try:
197
+ server = loop.run_until_complete(start_websocket_server())
198
+ logger.info("Terminal WebSocket server started successfully")
199
+ loop.run_forever()
200
+ except Exception as e:
201
+ logger.error(f"Error starting WebSocket server: {e}")
202
+
203
+ thread = threading.Thread(target=start_server, daemon=True)
204
+ thread.start()
205
+ return thread
utils/__init__.py ADDED
File without changes
utils/file_handler.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from PIL import Image
3
+ import PyPDF2
4
+ from config.settings import settings
5
+ from typing import Dict
6
+ import tempfile
7
+ import os
8
+
9
+ class FileHandler:
10
+ def __init__(self):
11
+ self.temp_dir = Path(settings.TEMP_DIR)
12
+ self.max_size_mb = settings.MAX_FILE_SIZE_MB
13
+
14
+ def validate_file(self, uploaded_file) -> Dict:
15
+ validation = {"valid": False, "error": None, "file_info": None}
16
+ if not uploaded_file:
17
+ validation["error"] = "No file"
18
+ return validation
19
+ file_size_mb = len(uploaded_file.getbuffer()) / (1024 * 1024)
20
+ if file_size_mb > self.max_size_mb:
21
+ validation["error"] = "File too large"
22
+ return validation
23
+ file_extension = uploaded_file.name.split('.')[-1].lower()
24
+ if file_extension not in settings.SUPPORTED_FILE_TYPES:
25
+ validation["error"] = "Unsupported type"
26
+ return validation
27
+ validation["valid"] = True
28
+ # Extract just filename for display (uploaded_file.name contains full Gradio temp path)
29
+ import os
30
+ filename = os.path.basename(uploaded_file.name)
31
+ validation["file_info"] = {"name": filename, "size_mb": file_size_mb, "type": file_extension}
32
+ return validation
33
+
34
+ def save_uploaded_file(self, uploaded_file, session_id: str) -> str:
35
+ # Handle None session_id gracefully
36
+ if not session_id:
37
+ import uuid
38
+ session_id = str(uuid.uuid4())[:8]
39
+
40
+ # Create session directory in temp
41
+ session_dir = self.temp_dir / session_id / "input"
42
+ session_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ # Extract just the filename from the full path (uploaded_file.name contains full Gradio temp path)
45
+ import os
46
+ import logging
47
+ logger = logging.getLogger(__name__)
48
+
49
+ filename = os.path.basename(uploaded_file.name)
50
+ file_path = session_dir / filename
51
+
52
+ logger.info(f"Moving file from Gradio temp: {uploaded_file.name}")
53
+ logger.info(f"To session directory: {file_path}")
54
+
55
+ with open(file_path, "wb") as f:
56
+ # Handle different types of file upload objects
57
+ if hasattr(uploaded_file, 'getbuffer'):
58
+ f.write(uploaded_file.getbuffer())
59
+ elif hasattr(uploaded_file, 'read'):
60
+ f.write(uploaded_file.read())
61
+ else:
62
+ # For NamedString or similar objects, read from the file path
63
+ with open(uploaded_file.name, 'rb') as src: # Use uploaded_file.name (Gradio temp path) to read
64
+ f.write(src.read())
65
+ return str(file_path)
66
+
67
+ def get_file_preview(self, file_path: str, file_type: str) -> str:
68
+ if file_type == 'pdf':
69
+ try:
70
+ with open(file_path, 'rb') as file:
71
+ reader = PyPDF2.PdfReader(file)
72
+ if len(reader.pages) > 0:
73
+ text = reader.pages[0].extract_text()
74
+ return text[:500] + "..." if len(text) > 500 else text
75
+ except Exception:
76
+ return "PDF preview not available"
77
+ elif file_type == 'txt':
78
+ try:
79
+ with open(file_path, 'r', encoding='utf-8') as file:
80
+ text = file.read()
81
+ return text[:500] + "..." if len(text) > 500 else text
82
+ except Exception:
83
+ return "Text preview not available"
84
+ # Similar for image types could be added
85
+ return "Preview not available"
86
+
87
+ def cleanup_temp_files(self):
88
+ """Clean up old temporary files."""
89
+ try:
90
+ import time
91
+ current_time = time.time()
92
+ # Clean up sessions older than 24 hours
93
+ for session_dir in self.temp_dir.iterdir():
94
+ if session_dir.is_dir():
95
+ # Check if directory is older than 24 hours
96
+ dir_age = current_time - session_dir.stat().st_mtime
97
+ if dir_age > 24 * 3600: # 24 hours in seconds
98
+ import shutil
99
+ shutil.rmtree(session_dir)
100
+ except Exception:
101
+ pass # Ignore cleanup errors
utils/logger.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+
5
+ class AgentLogger:
6
+ def __init__(self, log_dir="logs"):
7
+ self.log_dir = Path(log_dir)
8
+ self.log_dir.mkdir(exist_ok=True)
9
+ self.logger = logging.getLogger("agent_logger")
10
+ self.logger.setLevel(logging.DEBUG)
11
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
12
+ console_handler = logging.StreamHandler()
13
+ console_handler.setLevel(logging.INFO)
14
+ console_handler.setFormatter(formatter)
15
+ file_handler = logging.FileHandler(self.log_dir / f"agents_{datetime.now().strftime('%Y%m%d')}.log")
16
+ file_handler.setLevel(logging.DEBUG)
17
+ file_handler.setFormatter(formatter)
18
+ self.logger.addHandler(console_handler)
19
+ self.logger.addHandler(file_handler)
20
+
21
+ def log_workflow_step(self, agent_name, message):
22
+ self.logger.info(f"{agent_name}: {message}")
23
+
24
+ def log_agent_output(self, agent_name, output, method, duration):
25
+ self.logger.debug(f"{agent_name} {method} output: {output} ({duration}s)")
26
+
27
+ def log_inter_agent_pass(self, from_agent, to_agent, data_size):
28
+ self.logger.info(f"🔗 PASS: {from_agent} → {to_agent} | Size: {data_size}")
29
+
30
+ agent_logger = AgentLogger()
workflow/__init__.py ADDED
File without changes
workflow/financial_workflow.py ADDED
@@ -0,0 +1,505 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Financial Document Analysis Workflow using Agno Workflows
3
+ Clean, pure-python implementation with structured outputs to avoid JSON parsing issues
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Iterator
9
+ from pydantic import BaseModel, Field
10
+
11
+ from agno.agent import Agent, RunResponse
12
+ from agno.models.google import Gemini
13
+ from agno.media import File
14
+ from agno.tools.file import FileTools
15
+ from agno.tools.python import PythonTools
16
+ from agno.workflow import Workflow
17
+ from agno.utils.log import logger
18
+ from agno.tools.shell import ShellTools
19
+ from config.settings import settings
20
+
21
+
22
+ # Structured Output Models to avoid JSON parsing issues
23
+ class DataPoint(BaseModel):
24
+ """Individual financial data point"""
25
+ field_name: str = Field(..., description="Name of the financial data field")
26
+ value: str = Field(..., description="Value of the field")
27
+ category: str = Field(..., description="Financial category (revenue, expenses, assets, etc.)")
28
+ period: str = Field(default="", description="Time period if applicable")
29
+ unit: str = Field(default="", description="Currency or measurement unit")
30
+ confidence: float = Field(default=0.9, description="Confidence score 0-1")
31
+
32
+ class ExtractedFinancialData(BaseModel):
33
+ """Structured output for data extraction phase"""
34
+ company_name: str = Field(default="", description="Company name")
35
+ document_type: str = Field(..., description="Type of financial document")
36
+ reporting_period: str = Field(default="", description="Reporting period")
37
+ data_points: List[DataPoint] = Field(..., description="All extracted financial data points")
38
+ summary: str = Field(..., description="Brief summary of extracted data")
39
+
40
+ class FinancialCategory(BaseModel):
41
+ """A category of organized financial data"""
42
+ category_name: str = Field(..., description="Name of the financial category")
43
+ description: str = Field(..., description="Description of what this category contains")
44
+ data_items: Dict[str, str] = Field(..., description="Key-value pairs of financial data")
45
+ totals: Dict[str, str] = Field(default_factory=dict, description="Any calculated totals")
46
+
47
+ class ArrangedFinancialData(BaseModel):
48
+ """Structured output for data arrangement phase"""
49
+ categories: List[FinancialCategory] = Field(..., description="Organized financial categories")
50
+ key_metrics: Dict[str, str] = Field(default_factory=dict, description="Key financial metrics")
51
+ insights: List[str] = Field(default_factory=list, description="Financial insights and analysis")
52
+ summary: str = Field(..., description="Summary of arranged data")
53
+
54
+ class GeneratedCode(BaseModel):
55
+ """Structured output for code generation phase"""
56
+ code: str = Field(..., description="Generated Python code for Excel creation")
57
+ description: str = Field(..., description="Description of what the code does")
58
+ output_filename: str = Field(..., description="Expected output filename")
59
+ execution_notes: str = Field(default="", description="Notes about code execution")
60
+
61
+
62
+ class FinancialDocumentWorkflow(Workflow):
63
+ """
64
+ Pure Python workflow for financial document analysis
65
+ Uses structured outputs to eliminate JSON parsing issues
66
+ """
67
+
68
+ description: str = "Financial document analysis workflow with data extraction, organization, and Excel generation"
69
+
70
+
71
+
72
+ # Data Extractor Agent - Structured output eliminates JSON parsing issues
73
+ data_extractor: Agent = Agent(
74
+ model=Gemini(id=settings.DATA_EXTRACTOR_MODEL,thinking_budget=settings.DATA_EXTRACTOR_MODEL_THINKING_BUDGET,api_key=settings.GOOGLE_AI_API_KEY),
75
+ description="Expert financial data extraction specialist",
76
+ instructions=[
77
+ "Extract comprehensive financial data from documents with these priorities:",
78
+ "Identify and classify the document type: Income Statement, Balance Sheet, Cash Flow Statement, 10-K, 10-Q, Annual Report, Quarterly/Interim Report, Prospectus, Earnings Release, Proxy Statement, Investor Presentation, Press Release, or other",
79
+ "Extract report version: audited, unaudited, restated, pro forma",
80
+ "Capture language, country/jurisdiction, and file format (PDF, XLSX, HTML, etc.)",
81
+ "Extract company name and unique identifiers: LEI, CIK, ISIN, Ticker",
82
+ "Extract reporting entity: consolidated, subsidiary, segment",
83
+ "Extract fiscal year and period covered (start and end dates)",
84
+ "Extract all reporting, publication, and filing dates",
85
+ "Extract currency and any currency translation notes",
86
+ "Extract auditors name, if present",
87
+ "Identify financial statement presentation style: single-step, multi-step, consolidated, segmental",
88
+ "Capture table and note references for each data point",
89
+ "Extract total revenue/net sales (with by-product/service, segment, and geography breakdowns if disclosed)",
90
+ "Extract COGS or cost of sales",
91
+ "Extract gross profit and gross margin",
92
+ "Extract operating expenses: R&D, SG&A, advertising, depreciation, amortization",
93
+ "Extract operating income (EBIT) and EBIT margin",
94
+ "Extract non-operating items: interest income/expense, other income/expenses",
95
+ "Extract pretax income, income tax expense, and net income (with breakdowns: continuing, discontinued ops)",
96
+ "Extract basic and diluted EPS",
97
+ "Extract comprehensive and other comprehensive income items",
98
+ "Extract YoY and sequential income comparisons (if available)",
99
+ "Extract current assets: cash and equivalents, marketable securities, accounts receivable (gross/net), inventory (raw, WIP, finished), prepaid expenses, other",
100
+ "Extract non-current assets: PP&E (gross/net), intangible assets, goodwill, LT investments, deferred tax assets, right-of-use assets, other",
101
+ "Extract current liabilities: accounts payable, accrued expenses, short-term debt, lease liabilities, taxes payable, other",
102
+ "Extract non-current liabilities: long-term debt, deferred tax liabilities, pensions, lease obligations, other",
103
+ "Extract total shareholders equity: common/ordinary stock, retained earnings, additional paid-in capital, treasury stock, accumulated OCI, minority interest",
104
+ "Extract book value per share",
105
+ "Extract cash flows: net cash from operating, investing, and financing activities",
106
+ "Extract key cash flow line items: net cash from ops, capex, acquisitions/disposals, dividends, share buybacks, debt activities",
107
+ "Extract non-cash adjustments: depreciation, amortization, SBC, deferred taxes, impairments, gain/loss on sale",
108
+ "Extract profitability ratios: gross margin, operating margin, net margin, EBITDA margin",
109
+ "Extract return ratios: ROE, ROA, ROIC",
110
+ "Extract liquidity/solvency: current ratio, quick ratio, debt/equity, interest coverage",
111
+ "Extract efficiency: asset turnover, inventory turnover, receivables turnover",
112
+ "Extract per-share metrics: EPS (basic/diluted), BVPS, FCF per share",
113
+ "Extract segmental/geographical/operational ratios and breakdowns",
114
+ "Extract shares outstanding, share class details, voting rights",
115
+ "Extract dividends declared/paid (amount, dates)",
116
+ "Extract buyback authorization/utilization details",
117
+ "Extract employee count (average, period-end)",
118
+ "Extract store/branch/office count",
119
+ "Extract customer/user/subscriber numbers (active/paying, ARPU, churn, MAU/DAU)",
120
+ "Extract units shipped/sold, production volumes, operational stats",
121
+ "Extract key management guidance/forecasts if present",
122
+ "Extract risk factors, uncertainties, and forward-looking statements",
123
+ "Extract ESG/sustainability data where available (emissions, board diversity, etc.)",
124
+ "Flag any restatements, adjustments, or one-off items",
125
+ "Highlight material non-recurring, extraordinary, or unusual items (gains/losses, litigation, impairments, restructuring)",
126
+ "Identify related-party transactions and accounting policy changes",
127
+ "For each data point, provide a confidence score (0–1) based on clarity and documentation",
128
+ "Include table/note reference numbers where possible",
129
+ "Note any ambiguity or extraction limitations for specific data",
130
+ "List all units, scales (millions, thousands), and any conversion performed",
131
+ "Normalize date and currency formats across extracted data",
132
+ "Validate calculations (e.g., assets = liabilities + equity), and flag inconsistencies",
133
+ "Return data in a structured format (JSON/table), with reference and confidence annotation"
134
+ ],
135
+ response_model=ExtractedFinancialData,
136
+ structured_outputs=True,
137
+ debug_mode=True,
138
+ )
139
+
140
+
141
+
142
+ # Data Arranger Agent - Organizes data into categories for Excel
143
+ data_arranger: Agent = Agent(
144
+ model=Gemini(id=settings.DATA_ARRANGER_MODEL,thinking_budget=settings.DATA_ARRANGER_MODEL_THINKING_BUDGET,api_key=settings.GOOGLE_AI_API_KEY),
145
+ description="Financial data organization and analysis expert",
146
+ instructions=[
147
+ 'Organize the extracted financial data into logical categories based on financial statement types (Income Statement, Balance Sheet, Cash Flow Statement, etc.).',
148
+ 'Group related financial items together (e.g., all revenue items, all expense items, all asset items).',
149
+ 'Ensure each category has a clear, descriptive name that would work as an Excel worksheet tab.',
150
+ 'Always add appropriate headers for Excel templates including: Years (e.g., 2021, 2022, 2023, 2024), Company names or entity identifiers, Financial line item names, and Units of measurement (e.g., "in millions", "in thousands").',
151
+ 'Create column headers that clearly identify what each data column represents.',
152
+ 'Include row headers that clearly identify each financial line item.',
153
+ 'Design categories suitable for comprehensive Excel worksheets, such as: Income Statement Data, Balance Sheet Data, Cash Flow Data, Key Metrics, and Company Information.',
154
+ 'Maintain data integrity - do not modify, calculate, or analyze the original data values.',
155
+ 'Preserve original data formats and units.',
156
+ 'Ensure data is organized in a tabular format suitable for Excel import.',
157
+ 'Include metadata about data sources and reporting periods where available.',
158
+ 'Package everything into a JSON object with the fields: categories (object containing organized data by category), headers (object containing appropriate headers for each category), and metadata (object containing information about data sources, periods, and units).',
159
+ 'Never perform any analysis on the data.',
160
+ 'Do not calculate ratios, growth rates, or trends.',
161
+ 'Do not provide insights or interpretations.',
162
+ 'Do not modify the actual data values.',
163
+ 'Focus solely on organization and proper formatting.',
164
+ 'Save this JSON as \'arranged_financial_data.json\' using the save_file tool.',
165
+ 'Run list_files to verify that the file now exists in the working directory.',
166
+ 'Use read_file to ensure the JSON content was written correctly.',
167
+ 'If the file is missing or the content is incorrect, debug, re-save, and repeat steps',
168
+ 'Only report success after the files presence and validity are fully confirmed.'
169
+ ],
170
+ tools=[FileTools()], # FileTools for saving arranged data
171
+ # NOTE: Cannot use structured_outputs with tools in Gemini - choosing tools over structured outputs
172
+ markdown=True,
173
+ debug_mode=True,
174
+ add_memory_references=True,
175
+ add_session_summary_references=True,
176
+ exponential_backoff=True,
177
+ retries=10,
178
+ )
179
+
180
+ # Code Generator Agent - Creates Excel generation code
181
+ code_generator = Agent(
182
+ model=Gemini(
183
+ id=settings.CODE_GENERATOR_MODEL,
184
+ thinking_budget=settings.CODE_GENERATOR_MODEL_THINKING_BUDGET,
185
+ api_key=settings.GOOGLE_AI_API_KEY
186
+ ),
187
+ description="Excel report generator that analyzes JSON data and creates formatted workbooks using shell execution on any OS",
188
+ goal="Generate a professional Excel report from arranged_financial_data.json with multiple worksheets, formatting, and charts",
189
+ instructions=[
190
+ "EXECUTION RULE: Always use run_shell_command() for Python execution. Never use save_to_file_and_run().",
191
+ "",
192
+ "CRITICAL: Always read the file to understand the struction of the JSON First"
193
+ "FIRST, use read_file tool to load 'arranged_financial_data.json'.",
194
+ "SECOND, analyze its structure deeply. Identify all keys, data types, nested structures, and any inconsistencies.",
195
+ "THIRD, create analysis.py to programmatically examine the JSON. Execute using run_shell_command().",
196
+ "FOURTH, based on the analysis, design your Excel structure. Plan worksheets, formatting, and charts needed.",
197
+ "FIFTH, implement generate_excel_report.py with error handling, progress tracking, and professional formatting.",
198
+ "",
199
+ "CRITICAL: Always start Python scripts with:",
200
+ "import os",
201
+ "os.chdir(os.path.dirname(os.path.abspath(__file__)) or '.')",
202
+ "This ensures the script runs in the correct directory regardless of OS.",
203
+ "",
204
+ "Available Tools:",
205
+ "- FileTools: read_file, save_file, list_files",
206
+ "- PythonTools: pip_install_package (ONLY for package installation)",
207
+ "- ShellTools: run_shell_command (PRIMARY execution tool)",
208
+ "",
209
+ "Cross-Platform Execution:",
210
+ "- Try: run_shell_command('python script.py 2>&1')",
211
+ "- If fails on Windows: run_shell_command('python.exe script.py 2>&1')",
212
+ "- PowerShell alternative: run_shell_command('powershell -Command \"python script.py\" 2>&1')",
213
+ "",
214
+ "Verification Commands (Linux/Mac):",
215
+ "- run_shell_command('ls -la *.xlsx')",
216
+ "- run_shell_command('file Financial_Report*.xlsx')",
217
+ "- run_shell_command('du -h *.xlsx')",
218
+ "",
219
+ "Verification Commands (Windows/PowerShell):",
220
+ "- run_shell_command('dir *.xlsx')",
221
+ "- run_shell_command('powershell -Command \"Get-ChildItem *.xlsx\"')",
222
+ "- run_shell_command('powershell -Command \"(Get-Item *.xlsx).Length\"')",
223
+ "",
224
+ "Debug Commands (Cross-Platform):",
225
+ "- Current directory: run_shell_command('pwd') or run_shell_command('cd')",
226
+ "- Python location: run_shell_command('where python') or run_shell_command('which python')",
227
+ "- List files: run_shell_command('dir') or run_shell_command('ls')",
228
+ "",
229
+ "Package Installation:",
230
+ "- pip_install_package('openpyxl')",
231
+ "- Or via shell: run_shell_command('pip install openpyxl')",
232
+ "- Windows: run_shell_command('python -m pip install openpyxl')",
233
+ "",
234
+ "Success Criteria: Excel file exists, size >5KB, no errors in output."
235
+ ],
236
+ expected_output="A Financial_Report_YYYYMMDD_HHMMSS.xlsx file containing formatted data from the JSON with multiple worksheets, professional styling, and relevant charts",
237
+ additional_context="This agent must work on Windows, Mac, and Linux. Always use os.path for file operations and handle path separators correctly. Include proper error handling for cross-platform compatibility.",
238
+ tools=[
239
+ ShellTools(),
240
+ FileTools(save_files=True, read_files=True, list_files=True),
241
+ PythonTools(pip_install=True, save_and_run=False, run_code=False)
242
+ ],
243
+ markdown=True,
244
+ show_tool_calls=True,
245
+ debug_mode=True,
246
+ retries=10,
247
+ add_datetime_to_instructions=True,
248
+ delay_between_retries=10
249
+ )
250
+
251
+ def __init__(self, session_id: str = None, **kwargs):
252
+ super().__init__(session_id=session_id, **kwargs)
253
+ self.session_id = session_id or f"financial_workflow_{int(__import__('time').time())}"
254
+ self.session_output_dir = Path(settings.TEMP_DIR) / self.session_id / "output"
255
+ self.session_output_dir.mkdir(parents=True, exist_ok=True)
256
+
257
+ # Configure tools with correct base directories after initialization
258
+ self._configure_agent_tools()
259
+
260
+ logger.info(f"FinancialDocumentWorkflow initialized with session: {self.session_id}")
261
+
262
+ def _configure_agent_tools(self):
263
+ """Configure agent tools with the correct base directories"""
264
+ # Configure data arranger's FileTools with session output directory
265
+ if hasattr(self.data_arranger, 'tools') and self.data_arranger.tools:
266
+ for tool in self.data_arranger.tools:
267
+ if isinstance(tool, FileTools):
268
+ tool.base_dir = self.session_output_dir
269
+
270
+ # Configure code generator's tools with session output directory
271
+ if hasattr(self.code_generator, 'tools') and self.code_generator.tools:
272
+ for tool in self.code_generator.tools:
273
+ if isinstance(tool, FileTools):
274
+ tool.base_dir = self.session_output_dir
275
+ elif isinstance(tool, PythonTools):
276
+ tool.base_dir = self.session_output_dir
277
+
278
+ def run(self, file_path: str, use_cache: bool = True) -> RunResponse:
279
+ """
280
+ Pure Python workflow execution - no streaming, no JSON parsing issues
281
+ """
282
+ logger.info(f"Processing financial document: {file_path}")
283
+
284
+ # Check cache first if enabled
285
+ if use_cache and "final_results" in self.session_state:
286
+ logger.info("Returning cached results")
287
+ return RunResponse(
288
+ run_id=self.run_id,
289
+ content=self.session_state["final_results"]
290
+ )
291
+
292
+ try:
293
+ # Step 1: Extract Financial Data
294
+ logger.info("Step 1: Extracting financial data...")
295
+
296
+ # Check for cached extraction
297
+ if use_cache and "extracted_data" in self.session_state:
298
+ extracted_data = ExtractedFinancialData.model_validate(
299
+ self.session_state["extracted_data"]
300
+ )
301
+ logger.info("Using cached extraction data")
302
+ else:
303
+ document = File(filepath=file_path)
304
+ extraction_prompt = f"""
305
+ Analyze this financial document and extract all relevant financial data points.
306
+
307
+ Focus on:
308
+ - Company identification, including company name, entity identifiers (e.g., Ticker, CIK, ISIN, LEI), and reporting entity type (consolidated/subsidiary/segment).
309
+ - All reporting period information: fiscal year, period start and end dates, reporting date, publication date, and currency used.
310
+ - Revenue data: total revenue/net sales, breakdown by product/service, segment, and geography if available, and year-over-year growth rates.
311
+ - Expense data: COGS, operating expenses (R&D, SG&A, advertising, depreciation/amortization), interest expenses, taxes, and any non-operating items.
312
+ - Profit data: gross profit, operating income (EBIT/EBITDA), pretax profit, net income, basic and diluted earnings per share (EPS), comprehensive income.
313
+ - Balance sheet items: current assets (cash, securities, receivables, inventories), non-current assets (PP&E, intangibles, goodwill), current liabilities, non-current liabilities, and all categories of shareholders’ equity.
314
+ - Cash flow details: cash from operations, investing, and financing; capex, dividends, buybacks; non-cash adjustments (depreciation, SBC, etc.).
315
+ - Financial ratios: profitability (gross margin, operating margin, net margin), return (ROE, ROA, ROIC), liquidity (current/quick ratio), leverage (debt/equity, interest coverage), efficiency (asset/inventory/receivables turnover), per-share metrics.
316
+ - Capital and shareholder information: shares outstanding, share class details, dividends, and buyback information.
317
+ - Non-financial and operational metrics: employee, store, customer/user counts, production volumes, and operational breakdowns.
318
+ - Extract any additional material metrics, key management guidance, risks, uncertainties, ESG indicators, or forward-looking statements.
319
+ - Flag/annotate any unusual or non-recurring items, restatements, or related-party transactions.
320
+ - For each data point, provide a confidence score (0–1) and, where possible, include reference identifiers (table/note numbers).
321
+ - If units or currencies differ throughout, normalize and annotate the data accordingly.
322
+ Return your extraction in a structured, machine-readable format with references and confidence levels for each field.
323
+ Document path: {file_path}
324
+ """
325
+
326
+ extraction_response: RunResponse = self.data_extractor.run(
327
+ extraction_prompt,
328
+ files=[document]
329
+ )
330
+ extracted_data: ExtractedFinancialData = extraction_response.content
331
+
332
+ # Cache the result
333
+ self.session_state["extracted_data"] = extracted_data.model_dump()
334
+ logger.info(f"Extracted {len(extracted_data.data_points)} data points")
335
+
336
+ # Step 2: Arrange and Organize Data
337
+ logger.info("Step 2: Organizing financial data...")
338
+
339
+ if use_cache and "arrangement_response" in self.session_state:
340
+ arrangement_content = self.session_state["arrangement_response"]
341
+ logger.info("Using cached arrangement data")
342
+ else:
343
+ arrangement_prompt = f"""
344
+ You are given raw, extracted financial data. Your task is to reorganize it and prepare it for Excel-based reporting.
345
+
346
+ ========== WHAT TO DELIVER ==========
347
+ • A single JSON object saved as arranged_financial_data.json
348
+ • Fields required: categories, headers, metadata
349
+
350
+ ========== HOW TO ORGANIZE ==========
351
+ Create distinct, Excel-ready categories (one worksheet each) for logical grouping of financial data. Examples include:
352
+ 1. Income Statement Data
353
+ 2. Balance Sheet Data
354
+ 3. Cash Flow Data
355
+ 4. Company Information / General Data
356
+
357
+ ========== STEP-BY-STEP ==========
358
+ 1. Map every data point into the most appropriate category above.
359
+ 2. For each category, identify and include all necessary headers for an Excel template, such as years, company names, financial line item names, and units of measurement (e.g., "in millions").
360
+ 3. Ensure data integrity by not modifying, calculating, or analyzing the original data values.
361
+ 4. Preserve original data formats and units.
362
+ 5. Organize data in a tabular format suitable for direct Excel import.
363
+ 6. Include metadata about data sources and reporting periods where available.
364
+ 7. Assemble everything into the JSON schema described under “WHAT TO DELIVER.”
365
+ 8. Save the JSON as arranged_financial_data.json via save_file.
366
+ 9. Use list_files to confirm the file exists, then read_file to validate its content.
367
+ 10. If the file is missing or malformed, fix the issue and repeat steps 8 – 9.
368
+ 11. Only report success after the file passes both existence and content checks.
369
+
370
+ ========== IMPORTANT RESTRICTIONS ==========
371
+ - Never perform any analysis on the data.
372
+ - Do not calculate ratios, growth rates, or trends.
373
+ - Do not provide insights or interpretations.
374
+ - Do not modify the actual data values.
375
+ - Focus solely on organization and proper formatting.
376
+
377
+ Extracted Data: {extracted_data.model_dump_json(indent=2)}
378
+ """
379
+
380
+ arrangement_response: RunResponse = self.data_arranger.run(arrangement_prompt)
381
+ arrangement_content = arrangement_response.content
382
+
383
+ # Cache the result
384
+ self.session_state["arrangement_response"] = arrangement_content
385
+ logger.info("Data organization completed - check output directory for arranged_financial_data.json")
386
+
387
+ # Step 3: Generate and Execute Excel Code
388
+ logger.info("Step 3: Generating and executing Excel code...")
389
+
390
+ if use_cache and "code_generation_response" in self.session_state:
391
+ code_generation_content = self.session_state["code_generation_response"]
392
+ execution_success = self.session_state.get("execution_success", False)
393
+ logger.info("Using cached code generation results")
394
+ else:
395
+ code_prompt = f"""
396
+ Your objective: Turn the organized JSON data into a polished, multi-sheet Excel report—and prove that it works.
397
+
398
+ ========== INPUT ==========
399
+ File: arranged_financial_data.json
400
+ Tool to read it: read_file
401
+
402
+ ========== WHAT THE PYTHON SCRIPT MUST DO ==========
403
+ 1. Load arranged_financial_data.json and parse its contents.
404
+ 2. For each category in the JSON, create a dedicated worksheet using openpyxl.
405
+ 3. Apply professional touches:
406
+ • Bold, centered headers
407
+ • Appropriate number formats
408
+ • Column-width auto-sizing
409
+ • Borders, cell styles, and freeze panes
410
+ 4. Insert charts (bar, line, or pie) wherever the data lends itself to visualisation.
411
+ 5. Embed key metrics and summary notes prominently in the Executive Summary sheet.
412
+ 6. Name the workbook: Financial_Report_<YYYYMMDD_HHMMSS>.xlsx.
413
+ 7. Wrap every file and workbook operation in robust try/except blocks.
414
+ 8. Log all major steps and any exceptions for easy debugging.
415
+ 9. Save the script via save_to_file_and_run and execute it immediately.
416
+ 10. After execution, use list_files to ensure the Excel file was created.
417
+ 11. Optionally inspect the file (e.g., size or first bytes via read_file) to confirm it is not empty.
418
+ 12. If the workbook is missing or corrupted, refine the code, re-save, and re-run until success.
419
+
420
+ ========== OUTPUT ==========
421
+ • A fully formatted Excel workbook in the working directory.
422
+ • A concise summary of what ran, any issues encountered, and confirmation that the file exists and opens without error.
423
+ """
424
+
425
+ code_response: RunResponse = self.code_generator.run(code_prompt)
426
+ code_generation_content = code_response.content
427
+
428
+ # Simple check for execution success based on response content
429
+ execution_success = (
430
+ "error" not in code_generation_content.lower() or
431
+ "success" in code_generation_content.lower() or
432
+ "completed" in code_generation_content.lower()
433
+ )
434
+
435
+ # Cache the results
436
+ self.session_state["code_generation_response"] = code_generation_content
437
+ self.session_state["execution_success"] = execution_success
438
+
439
+ logger.info(f"Code generation and execution completed: {'✅ Success' if execution_success else '❌ Failed'}")
440
+
441
+ # Prepare final results
442
+ # List actual output files
443
+ output_files = []
444
+ if self.session_output_dir.exists():
445
+ output_files = [f.name for f in self.session_output_dir.iterdir() if f.is_file()]
446
+
447
+ results_summary = f"""
448
+ # Financial Document Analysis Complete
449
+
450
+ ## Document Information
451
+ - **Company**: {extracted_data.company_name or 'Not specified'}
452
+ - **Document Type**: {extracted_data.document_type}
453
+ - **Reporting Period**: {extracted_data.reporting_period or 'Not specified'}
454
+
455
+ ## Processing Summary
456
+ - **Data Points Extracted**: {len(extracted_data.data_points)}
457
+ - **Data Organization**: {'✅ Completed' if arrangement_content else '❌ Failed'}
458
+ - **Excel Creation**: {'✅ Success' if execution_success else '❌ Failed'}
459
+
460
+ ## Data Organization Results
461
+ {arrangement_content[:500] + '...' if arrangement_content and len(arrangement_content) > 500 else arrangement_content or 'No arrangement data available'}
462
+
463
+ ## Tool Execution Summary
464
+ **Data Arranger**: Used FileTools to save organized data to JSON
465
+ **Code Generator**: Used PythonTools and FileTools for Excel generation
466
+
467
+ ## Code Generation Results
468
+ {code_generation_content[:500] + '...' if code_generation_content and len(code_generation_content) > 500 else code_generation_content or 'No code generation results available'}
469
+
470
+ ## Generated Files ({len(output_files)} files)
471
+ {chr(10).join(f"- **{file}**" for file in output_files) if output_files else "- No files generated"}
472
+
473
+ ## Output Directory
474
+ 📁 `{self.session_output_dir}`
475
+
476
+ ---
477
+ *Generated using Agno Workflows with FileTools and PythonTools integration*
478
+ *Note: Due to Gemini limitations, structured outputs were used for data extraction only*
479
+ """
480
+
481
+ # Cache final results
482
+ self.session_state["final_results"] = results_summary
483
+
484
+ return RunResponse(
485
+ run_id=self.run_id,
486
+ content=results_summary
487
+ )
488
+
489
+ except Exception as e:
490
+ error_message = f"❌ Workflow failed: {str(e)}"
491
+ logger.error(f"Financial workflow error: {e}", exc_info=True)
492
+ return RunResponse(
493
+ run_id=self.run_id,
494
+ content=error_message
495
+ )
496
+
497
+ def get_processing_status(self) -> Dict[str, str]:
498
+ """Get the current processing status"""
499
+ status = {
500
+ "extraction": "completed" if "extracted_data" in self.session_state else "pending",
501
+ "arrangement": "completed" if "arranged_data" in self.session_state else "pending",
502
+ "code_generation": "completed" if "generated_code" in self.session_state else "pending",
503
+ "final_results": "completed" if "final_results" in self.session_state else "pending"
504
+ }
505
+ return status