Spaces:
Sleeping
Sleeping
Commit
·
5ff5453
1
Parent(s):
115be73
added download buttons
Browse files- app.py +144 -19
- requirements.txt +2 -1
- templates/index.html +32 -1
app.py
CHANGED
@@ -1,29 +1,32 @@
|
|
1 |
-
from flask import Flask, render_template, request, jsonify, Response
|
2 |
-
from
|
3 |
-
import
|
4 |
-
from datetime import datetime
|
5 |
-
import os
|
6 |
import json
|
7 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import numpy as np
|
9 |
-
import pickle
|
10 |
-
from pathlib import Path
|
11 |
-
import umap
|
12 |
-
import plotly.express as px
|
13 |
-
import plotly.graph_objects as go
|
14 |
import pandas as pd
|
15 |
-
|
|
|
|
|
16 |
from sklearn.preprocessing import StandardScaler
|
17 |
-
import time
|
18 |
-
import queue
|
19 |
-
import threading
|
20 |
import hdbscan
|
21 |
-
|
22 |
-
import
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
|
26 |
app = Flask(__name__)
|
|
|
|
|
27 |
|
28 |
# Get API keys from environment variables
|
29 |
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')
|
@@ -32,6 +35,9 @@ MAX_PATENTS = 3000 # Increased from 2000 to 5000 for better coverage
|
|
32 |
MIN_PATENTS_FOR_GAPS = 3000 # Minimum patents needed for reliable gap detection
|
33 |
CACHE_FILE = 'patent_embeddings_cache.pkl'
|
34 |
|
|
|
|
|
|
|
35 |
# Global progress queue for SSE updates
|
36 |
progress_queue = queue.Queue()
|
37 |
|
@@ -1304,6 +1310,11 @@ def search():
|
|
1304 |
|
1305 |
# Generate innovation analysis from insights
|
1306 |
innovation_analysis = analyze_innovation_opportunities(viz_data['insights'])
|
|
|
|
|
|
|
|
|
|
|
1307 |
|
1308 |
return jsonify({
|
1309 |
'visualization': viz_data['plot'],
|
@@ -1317,5 +1328,119 @@ def search():
|
|
1317 |
progress_queue.put('DONE')
|
1318 |
return jsonify({'error': str(e)})
|
1319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1320 |
if __name__ == '__main__':
|
1321 |
app.run(host='0.0.0.0', port=7860)
|
|
|
1 |
+
from flask import Flask, render_template, request, jsonify, Response, session, send_file
|
2 |
+
from queue import Queue
|
3 |
+
import queue
|
|
|
|
|
4 |
import json
|
5 |
+
import traceback
|
6 |
+
import tempfile
|
7 |
+
from reportlab.lib import colors
|
8 |
+
from reportlab.lib.pagesizes import letter
|
9 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
10 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
11 |
+
import io
|
12 |
+
import os
|
13 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
14 |
import pandas as pd
|
15 |
+
import umap
|
16 |
+
import openai
|
17 |
+
from sklearn.neighbors import NearestNeighbors
|
18 |
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
|
19 |
import hdbscan
|
20 |
+
import plotly.graph_objects as go
|
21 |
+
import pickle
|
22 |
+
import requests
|
23 |
+
from serpapi import GoogleSearch
|
24 |
+
from datetime import datetime
|
25 |
+
import re
|
26 |
|
27 |
app = Flask(__name__)
|
28 |
+
app.secret_key = os.urandom(24) # Required for session
|
29 |
+
progress_queue = Queue()
|
30 |
|
31 |
# Get API keys from environment variables
|
32 |
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')
|
|
|
35 |
MIN_PATENTS_FOR_GAPS = 3000 # Minimum patents needed for reliable gap detection
|
36 |
CACHE_FILE = 'patent_embeddings_cache.pkl'
|
37 |
|
38 |
+
# Set up OpenAI API
|
39 |
+
openai.api_key = OPENAI_API_KEY
|
40 |
+
|
41 |
# Global progress queue for SSE updates
|
42 |
progress_queue = queue.Queue()
|
43 |
|
|
|
1310 |
|
1311 |
# Generate innovation analysis from insights
|
1312 |
innovation_analysis = analyze_innovation_opportunities(viz_data['insights'])
|
1313 |
+
|
1314 |
+
# Store data in session for downloads
|
1315 |
+
session['last_plot_data'] = viz_data['plot']
|
1316 |
+
session['last_insights'] = viz_data['insights']
|
1317 |
+
session['last_analysis'] = innovation_analysis
|
1318 |
|
1319 |
return jsonify({
|
1320 |
'visualization': viz_data['plot'],
|
|
|
1328 |
progress_queue.put('DONE')
|
1329 |
return jsonify({'error': str(e)})
|
1330 |
|
1331 |
+
@app.route('/download_plot')
|
1332 |
+
def download_plot():
|
1333 |
+
"""Download the latest plot as an HTML file"""
|
1334 |
+
# Get the plot data from the session
|
1335 |
+
plot_data = session.get('last_plot_data')
|
1336 |
+
if not plot_data:
|
1337 |
+
return jsonify({'error': 'No plot data available'})
|
1338 |
+
|
1339 |
+
# Create a temporary file
|
1340 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
|
1341 |
+
# Write the HTML content
|
1342 |
+
f.write("""
|
1343 |
+
<!DOCTYPE html>
|
1344 |
+
<html>
|
1345 |
+
<head>
|
1346 |
+
<title>Patent Technology Landscape</title>
|
1347 |
+
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
|
1348 |
+
</head>
|
1349 |
+
<body>
|
1350 |
+
<div id="plot"></div>
|
1351 |
+
<script>
|
1352 |
+
var plotData = %s;
|
1353 |
+
Plotly.newPlot('plot', plotData.data, plotData.layout);
|
1354 |
+
</script>
|
1355 |
+
</body>
|
1356 |
+
</html>
|
1357 |
+
""" % plot_data)
|
1358 |
+
temp_path = f.name
|
1359 |
+
|
1360 |
+
return send_file(
|
1361 |
+
temp_path,
|
1362 |
+
as_attachment=True,
|
1363 |
+
download_name='patent_landscape.html',
|
1364 |
+
mimetype='text/html'
|
1365 |
+
)
|
1366 |
+
|
1367 |
+
@app.route('/download_insights')
|
1368 |
+
def download_insights():
|
1369 |
+
"""Download the latest insights as a PDF file"""
|
1370 |
+
# Get the insights and analysis from the session
|
1371 |
+
insights = session.get('last_insights')
|
1372 |
+
analysis = session.get('last_analysis')
|
1373 |
+
if not insights:
|
1374 |
+
return jsonify({'error': 'No insights available'})
|
1375 |
+
|
1376 |
+
# Create a PDF in memory
|
1377 |
+
buffer = io.BytesIO()
|
1378 |
+
doc = SimpleDocTemplate(buffer, pagesize=letter)
|
1379 |
+
styles = getSampleStyleSheet()
|
1380 |
+
|
1381 |
+
# Create custom styles
|
1382 |
+
title_style = ParagraphStyle(
|
1383 |
+
'CustomTitle',
|
1384 |
+
parent=styles['Title'],
|
1385 |
+
fontSize=24,
|
1386 |
+
spaceAfter=30
|
1387 |
+
)
|
1388 |
+
heading_style = ParagraphStyle(
|
1389 |
+
'CustomHeading',
|
1390 |
+
parent=styles['Heading1'],
|
1391 |
+
fontSize=16,
|
1392 |
+
spaceAfter=20
|
1393 |
+
)
|
1394 |
+
normal_style = ParagraphStyle(
|
1395 |
+
'CustomNormal',
|
1396 |
+
parent=styles['Normal'],
|
1397 |
+
fontSize=12,
|
1398 |
+
spaceAfter=12
|
1399 |
+
)
|
1400 |
+
|
1401 |
+
# Build the document
|
1402 |
+
story = []
|
1403 |
+
story.append(Paragraph("Patent Technology Landscape Analysis", title_style))
|
1404 |
+
|
1405 |
+
# Add clusters
|
1406 |
+
story.append(Paragraph("Technology Clusters", heading_style))
|
1407 |
+
for insight in insights:
|
1408 |
+
if insight['type'] == 'cluster':
|
1409 |
+
text = f"<b>Cluster {insight['id']}:</b> {insight['description']}"
|
1410 |
+
story.append(Paragraph(text, normal_style))
|
1411 |
+
story.append(Spacer(1, 12))
|
1412 |
+
|
1413 |
+
# Add transitional areas
|
1414 |
+
story.append(Paragraph("Transitional Areas", heading_style))
|
1415 |
+
for insight in insights:
|
1416 |
+
if insight['type'] == 'transitional':
|
1417 |
+
text = f"<b>Transitional Area {insight['id']}:</b> {insight['description']}"
|
1418 |
+
story.append(Paragraph(text, normal_style))
|
1419 |
+
story.append(Spacer(1, 12))
|
1420 |
+
|
1421 |
+
# Add underexplored areas
|
1422 |
+
story.append(Paragraph("Underexplored Areas", heading_style))
|
1423 |
+
for insight in insights:
|
1424 |
+
if insight['type'] == 'innovation_subcluster':
|
1425 |
+
text = f"<b>Underexplored Area {insight['id']}:</b> {insight['description']}"
|
1426 |
+
story.append(Paragraph(text, normal_style))
|
1427 |
+
story.append(Spacer(1, 12))
|
1428 |
+
|
1429 |
+
# Add innovation analysis if available
|
1430 |
+
if analysis:
|
1431 |
+
story.append(Paragraph("Innovation Opportunities Analysis", heading_style))
|
1432 |
+
story.append(Paragraph(analysis, normal_style))
|
1433 |
+
|
1434 |
+
# Build PDF
|
1435 |
+
doc.build(story)
|
1436 |
+
buffer.seek(0)
|
1437 |
+
|
1438 |
+
return send_file(
|
1439 |
+
buffer,
|
1440 |
+
as_attachment=True,
|
1441 |
+
download_name='patent_insights.pdf',
|
1442 |
+
mimetype='application/pdf'
|
1443 |
+
)
|
1444 |
+
|
1445 |
if __name__ == '__main__':
|
1446 |
app.run(host='0.0.0.0', port=7860)
|
requirements.txt
CHANGED
@@ -12,4 +12,5 @@ llvmlite==0.39.1
|
|
12 |
numba==0.56.4
|
13 |
setuptools>=65.5.1
|
14 |
wheel>=0.38.0
|
15 |
-
hdbscan
|
|
|
|
12 |
numba==0.56.4
|
13 |
setuptools>=65.5.1
|
14 |
wheel>=0.38.0
|
15 |
+
hdbscan
|
16 |
+
reportlab==4.0.4
|
templates/index.html
CHANGED
@@ -215,7 +215,23 @@
|
|
215 |
</div>
|
216 |
|
217 |
<!-- Visualization Container -->
|
218 |
-
<div id="visualization" class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
<!-- Insights Panel -->
|
221 |
<div id="insights" class="insights-panel p-4"></div>
|
@@ -319,9 +335,22 @@
|
|
319 |
}
|
320 |
});
|
321 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
$(document).ready(function() {
|
|
|
|
|
323 |
$('#searchForm').on('submit', function(e) {
|
324 |
e.preventDefault();
|
|
|
325 |
const keywords = $('#keywords').val();
|
326 |
|
327 |
if (!keywords) {
|
@@ -486,6 +515,8 @@
|
|
486 |
console.log('Search completed successfully');
|
487 |
stopProgressMonitoring();
|
488 |
$('#loading').addClass('hidden');
|
|
|
|
|
489 |
},
|
490 |
error: function(jqXHR, textStatus, errorThrown) {
|
491 |
console.error('Ajax error:', textStatus, errorThrown);
|
|
|
215 |
</div>
|
216 |
|
217 |
<!-- Visualization Container -->
|
218 |
+
<div id="visualization-section" class="relative">
|
219 |
+
<div id="visualization" class="visualization-container"></div>
|
220 |
+
<div class="absolute top-4 right-4 flex gap-2">
|
221 |
+
<a id="download-plot" href="/download_plot" class="hidden px-4 py-2 bg-blue-600 text-white rounded hover:bg-blue-700 transition-colors duration-200 flex items-center">
|
222 |
+
<svg class="w-5 h-5 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
223 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
|
224 |
+
</svg>
|
225 |
+
Download Plot
|
226 |
+
</a>
|
227 |
+
<a id="download-insights" href="/download_insights" class="hidden px-4 py-2 bg-green-600 text-white rounded hover:bg-green-700 transition-colors duration-200 flex items-center">
|
228 |
+
<svg class="w-5 h-5 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
229 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M9 19l3 3m0 0l3-3m-3 3V10"></path>
|
230 |
+
</svg>
|
231 |
+
Download Insights
|
232 |
+
</a>
|
233 |
+
</div>
|
234 |
+
</div>
|
235 |
|
236 |
<!-- Insights Panel -->
|
237 |
<div id="insights" class="insights-panel p-4"></div>
|
|
|
335 |
}
|
336 |
});
|
337 |
}
|
338 |
+
function showDownloadButtons() {
|
339 |
+
$('#download-plot').removeClass('hidden');
|
340 |
+
$('#download-insights').removeClass('hidden');
|
341 |
+
}
|
342 |
+
|
343 |
+
function hideDownloadButtons() {
|
344 |
+
$('#download-plot').addClass('hidden');
|
345 |
+
$('#download-insights').addClass('hidden');
|
346 |
+
}
|
347 |
+
|
348 |
$(document).ready(function() {
|
349 |
+
hideDownloadButtons(); // Initially hide download buttons
|
350 |
+
|
351 |
$('#searchForm').on('submit', function(e) {
|
352 |
e.preventDefault();
|
353 |
+
hideDownloadButtons(); // Hide buttons when starting new search
|
354 |
const keywords = $('#keywords').val();
|
355 |
|
356 |
if (!keywords) {
|
|
|
515 |
console.log('Search completed successfully');
|
516 |
stopProgressMonitoring();
|
517 |
$('#loading').addClass('hidden');
|
518 |
+
// Show download buttons
|
519 |
+
showDownloadButtons();
|
520 |
},
|
521 |
error: function(jqXHR, textStatus, errorThrown) {
|
522 |
console.error('Ajax error:', textStatus, errorThrown);
|