Spaces:
Sleeping
Sleeping
Commit
·
65801e7
1
Parent(s):
16858c5
added 3D interactive plot
Browse files- app.py +412 -120
- requirements.txt +7 -8
- templates/index.html +322 -45
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from flask import Flask, render_template, request, jsonify
|
2 |
from dotenv import load_dotenv
|
3 |
import requests
|
4 |
from datetime import datetime
|
@@ -8,45 +8,56 @@ import openai
|
|
8 |
import numpy as np
|
9 |
import pickle
|
10 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
load_dotenv()
|
13 |
|
14 |
app = Flask(__name__)
|
15 |
|
16 |
# Get API keys from environment variables
|
17 |
-
|
18 |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
19 |
MAX_PATENTS = 300 # Limit number of patents to process
|
20 |
CACHE_FILE = 'patent_embeddings_cache.pkl'
|
21 |
-
QUERY_CACHE_FILE = 'query_embeddings_cache.pkl'
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
25 |
if not OPENAI_API_KEY:
|
26 |
raise ValueError("OPENAI_API_KEY environment variable is not set")
|
27 |
|
28 |
# Initialize OpenAI API key
|
29 |
openai.api_key = OPENAI_API_KEY
|
30 |
|
31 |
-
def load_cache(
|
32 |
"""Load cached embeddings from file"""
|
33 |
try:
|
34 |
-
if os.path.exists(
|
35 |
-
with open(
|
36 |
return pickle.load(f)
|
37 |
except Exception as e:
|
38 |
print(f"Error loading cache: {e}")
|
39 |
return {}
|
40 |
|
41 |
-
def save_cache(cache
|
42 |
"""Save embeddings cache to file"""
|
43 |
try:
|
44 |
-
with open(
|
45 |
pickle.dump(cache, f)
|
46 |
except Exception as e:
|
47 |
print(f"Error saving cache: {e}")
|
48 |
|
49 |
-
def get_embedding(text, cache
|
50 |
"""Get embedding for text, using cache if available"""
|
51 |
if not text or text.strip() == "":
|
52 |
return None
|
@@ -62,7 +73,7 @@ def get_embedding(text, cache, cache_file):
|
|
62 |
embedding = response['data'][0]['embedding']
|
63 |
if embedding: # Only cache if we got a valid embedding
|
64 |
cache[text] = embedding
|
65 |
-
save_cache(cache
|
66 |
return embedding
|
67 |
except Exception as e:
|
68 |
print(f"Error getting embedding: {e}")
|
@@ -70,119 +81,97 @@ def get_embedding(text, cache, cache_file):
|
|
70 |
|
71 |
def search_patents(keywords, page_size=100):
|
72 |
"""
|
73 |
-
Search patents using
|
74 |
"""
|
75 |
-
# Load
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
# Get embedding for search query
|
80 |
-
query_embedding = get_embedding(keywords, query_cache, QUERY_CACHE_FILE)
|
81 |
-
if not query_embedding:
|
82 |
-
return []
|
83 |
-
|
84 |
-
# PatentsView API endpoint
|
85 |
-
api_url = "https://search.patentsview.org/api/v1/patent/"
|
86 |
-
|
87 |
-
# Create a broader search query using related terms
|
88 |
-
# We'll search in both title and abstract with more flexible matching
|
89 |
-
query = {
|
90 |
-
"q": {
|
91 |
-
"_or": [
|
92 |
-
{"_text_any": {"patent_title": keywords.split()}},
|
93 |
-
{"_text_any": {"patent_abstract": keywords.split()}}
|
94 |
-
]
|
95 |
-
},
|
96 |
-
"f": [
|
97 |
-
"patent_title",
|
98 |
-
"patent_abstract",
|
99 |
-
"patent_date",
|
100 |
-
"patent_id",
|
101 |
-
"assignees"
|
102 |
-
],
|
103 |
-
"o": {
|
104 |
-
"page": 1,
|
105 |
-
"size": MAX_PATENTS # Get maximum allowed patents for better semantic matching
|
106 |
-
}
|
107 |
-
}
|
108 |
-
|
109 |
all_patents = []
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
}
|
115 |
|
116 |
-
|
117 |
-
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
date_str = patent.get('patent_date', '')
|
129 |
-
filing_year = 'N/A'
|
130 |
-
if date_str:
|
131 |
-
try:
|
132 |
-
filing_year = datetime.strptime(date_str, '%Y-%m-%d').year
|
133 |
-
except ValueError:
|
134 |
-
pass
|
135 |
-
|
136 |
-
# Get first assignee organization if available
|
137 |
-
assignee_org = 'N/A'
|
138 |
-
assignees = patent.get('assignees', [])
|
139 |
-
if assignees and len(assignees) > 0:
|
140 |
-
assignee_org = assignees[0].get('assignee_organization', 'N/A')
|
141 |
-
|
142 |
-
# Format patent ID for Google Patents URL
|
143 |
-
patent_id = patent.get('patent_id', '')
|
144 |
-
if patent_id and not patent_id.startswith('US'):
|
145 |
-
patent_id = f"US{patent_id}"
|
146 |
-
|
147 |
-
# Combine title and abstract for embedding
|
148 |
-
title = patent.get('patent_title', '').strip()
|
149 |
-
abstract = patent.get('patent_abstract', '').strip()
|
150 |
-
combined_text = f"{title}\n{abstract}".strip()
|
151 |
-
|
152 |
-
# Get embedding for combined text using patent cache
|
153 |
-
patent_embedding = get_embedding(combined_text, patent_cache, CACHE_FILE)
|
154 |
|
155 |
-
if
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
|
|
|
|
161 |
formatted_patent = {
|
162 |
'title': title,
|
163 |
-
'assignee':
|
164 |
'filing_year': filing_year,
|
165 |
'abstract': abstract,
|
166 |
-
'link':
|
167 |
-
'embedding':
|
168 |
-
'similarity': float(similarity) # Convert to float for JSON serialization
|
169 |
}
|
170 |
all_patents.append(formatted_patent)
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
182 |
|
183 |
-
# Save final cache
|
184 |
-
save_cache(
|
185 |
-
save_cache(query_cache, QUERY_CACHE_FILE)
|
186 |
|
187 |
print(f"Total patents retrieved and embedded: {len(all_patents)}")
|
188 |
return all_patents
|
@@ -218,10 +207,284 @@ def generate_summary(patents):
|
|
218 |
print(f"Error generating summary: {str(e)}")
|
219 |
return "Error generating summary."
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
@app.route('/')
|
222 |
def home():
|
223 |
return render_template('index.html')
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
@app.route('/search', methods=['POST'])
|
226 |
def search():
|
227 |
keywords = request.form.get('keywords', '')
|
@@ -229,16 +492,45 @@ def search():
|
|
229 |
return jsonify({'error': 'Please enter search keywords'})
|
230 |
|
231 |
print(f"\nProcessing search request for keywords: {keywords}")
|
232 |
-
patents = search_patents(keywords)
|
233 |
-
if not patents:
|
234 |
-
return jsonify({'error': 'No patents found or an error occurred'})
|
235 |
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
|
243 |
if __name__ == '__main__':
|
244 |
app.run(host='0.0.0.0', port=7860)
|
|
|
1 |
+
from flask import Flask, render_template, request, jsonify, Response
|
2 |
from dotenv import load_dotenv
|
3 |
import requests
|
4 |
from datetime import datetime
|
|
|
8 |
import numpy as np
|
9 |
import pickle
|
10 |
from pathlib import Path
|
11 |
+
import umap
|
12 |
+
import plotly.express as px
|
13 |
+
import plotly.graph_objects as go
|
14 |
+
import pandas as pd
|
15 |
+
from sklearn.cluster import DBSCAN
|
16 |
+
from sklearn.preprocessing import StandardScaler
|
17 |
+
import time
|
18 |
+
import queue
|
19 |
+
import threading
|
20 |
|
21 |
load_dotenv()
|
22 |
|
23 |
app = Flask(__name__)
|
24 |
|
25 |
# Get API keys from environment variables
|
26 |
+
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')
|
27 |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
28 |
MAX_PATENTS = 300 # Limit number of patents to process
|
29 |
CACHE_FILE = 'patent_embeddings_cache.pkl'
|
|
|
30 |
|
31 |
+
# Global progress queue for SSE updates
|
32 |
+
progress_queue = queue.Queue()
|
33 |
+
|
34 |
+
if not SERPAPI_API_KEY:
|
35 |
+
raise ValueError("SERPAPI_API_KEY environment variable is not set")
|
36 |
if not OPENAI_API_KEY:
|
37 |
raise ValueError("OPENAI_API_KEY environment variable is not set")
|
38 |
|
39 |
# Initialize OpenAI API key
|
40 |
openai.api_key = OPENAI_API_KEY
|
41 |
|
42 |
+
def load_cache():
|
43 |
"""Load cached embeddings from file"""
|
44 |
try:
|
45 |
+
if os.path.exists(CACHE_FILE):
|
46 |
+
with open(CACHE_FILE, 'rb') as f:
|
47 |
return pickle.load(f)
|
48 |
except Exception as e:
|
49 |
print(f"Error loading cache: {e}")
|
50 |
return {}
|
51 |
|
52 |
+
def save_cache(cache):
|
53 |
"""Save embeddings cache to file"""
|
54 |
try:
|
55 |
+
with open(CACHE_FILE, 'wb') as f:
|
56 |
pickle.dump(cache, f)
|
57 |
except Exception as e:
|
58 |
print(f"Error saving cache: {e}")
|
59 |
|
60 |
+
def get_embedding(text, cache):
|
61 |
"""Get embedding for text, using cache if available"""
|
62 |
if not text or text.strip() == "":
|
63 |
return None
|
|
|
73 |
embedding = response['data'][0]['embedding']
|
74 |
if embedding: # Only cache if we got a valid embedding
|
75 |
cache[text] = embedding
|
76 |
+
save_cache(cache) # Save cache after each new embedding
|
77 |
return embedding
|
78 |
except Exception as e:
|
79 |
print(f"Error getting embedding: {e}")
|
|
|
81 |
|
82 |
def search_patents(keywords, page_size=100):
|
83 |
"""
|
84 |
+
Search patents using SerpApi's Google Patents API with pagination and generate embeddings
|
85 |
"""
|
86 |
+
# Load existing cache
|
87 |
+
embedding_cache = load_cache()
|
88 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
all_patents = []
|
90 |
+
page = 1
|
91 |
+
total_processed = 0
|
92 |
+
|
93 |
+
while len(all_patents) < MAX_PATENTS:
|
94 |
+
update_progress('search', f'Fetching page {page} of patents...')
|
95 |
|
96 |
+
# SerpApi Google Patents API endpoint
|
97 |
+
api_url = "https://serpapi.com/search"
|
98 |
|
99 |
+
params = {
|
100 |
+
"engine": "google_patents",
|
101 |
+
"q": keywords,
|
102 |
+
"api_key": SERPAPI_API_KEY,
|
103 |
+
"num": page_size,
|
104 |
+
"start": (page - 1) * page_size
|
105 |
+
}
|
106 |
+
|
107 |
+
try:
|
108 |
+
response = requests.get(api_url, params=params)
|
109 |
+
response_data = response.json()
|
110 |
|
111 |
+
if "error" in response_data:
|
112 |
+
print(f"API returned error: {response_data['error']}")
|
113 |
+
break
|
114 |
+
|
115 |
+
patents_data = response_data.get('organic_results', [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
+
if not patents_data:
|
118 |
+
print(f"No more patents found on page {page}")
|
119 |
+
break
|
120 |
+
|
121 |
+
for idx, patent in enumerate(patents_data):
|
122 |
+
if len(all_patents) >= MAX_PATENTS:
|
123 |
+
break
|
124 |
+
|
125 |
+
# Format filing date
|
126 |
+
filing_date = patent.get('filing_date', '')
|
127 |
+
filing_year = 'N/A'
|
128 |
+
if filing_date:
|
129 |
+
try:
|
130 |
+
filing_year = datetime.strptime(filing_date, '%Y-%m-%d').year
|
131 |
+
except ValueError:
|
132 |
+
pass
|
133 |
+
|
134 |
+
# Get assignee
|
135 |
+
assignee = patent.get('assignee', 'N/A')
|
136 |
+
if isinstance(assignee, list) and assignee:
|
137 |
+
assignee = assignee[0]
|
138 |
+
|
139 |
+
# Format title and abstract for embedding
|
140 |
+
title = patent.get('title', '').strip()
|
141 |
+
abstract = patent.get('snippet', '').strip()
|
142 |
+
combined_text = f"{title}\n{abstract}".strip()
|
143 |
+
|
144 |
+
# Get embedding for combined text
|
145 |
+
total_processed += 1
|
146 |
+
if total_processed % 10 == 0: # Update progress every 10 patents
|
147 |
+
update_progress('embedding', f'Processing patent {total_processed} of {MAX_PATENTS}...')
|
148 |
|
149 |
+
embedding = get_embedding(combined_text, embedding_cache)
|
150 |
+
|
151 |
formatted_patent = {
|
152 |
'title': title,
|
153 |
+
'assignee': assignee,
|
154 |
'filing_year': filing_year,
|
155 |
'abstract': abstract,
|
156 |
+
'link': patent.get('patent_link', '') or patent.get('link', ''),
|
157 |
+
'embedding': embedding
|
|
|
158 |
}
|
159 |
all_patents.append(formatted_patent)
|
160 |
+
|
161 |
+
print(f"Retrieved {len(patents_data)} patents from page {page}")
|
162 |
+
|
163 |
+
# Check if there are more pages
|
164 |
+
if not response_data.get('serpapi_pagination', {}).get('next'):
|
165 |
+
break
|
166 |
+
|
167 |
+
page += 1
|
168 |
+
|
169 |
+
except Exception as e:
|
170 |
+
print(f"Error searching patents: {e}")
|
171 |
+
break
|
172 |
|
173 |
+
# Save final cache state
|
174 |
+
save_cache(embedding_cache)
|
|
|
175 |
|
176 |
print(f"Total patents retrieved and embedded: {len(all_patents)}")
|
177 |
return all_patents
|
|
|
207 |
print(f"Error generating summary: {str(e)}")
|
208 |
return "Error generating summary."
|
209 |
|
210 |
+
def analyze_clusters(df, labels, embeddings_3d):
|
211 |
+
"""
|
212 |
+
Generate descriptions for patent clusters and identify opportunity zones
|
213 |
+
"""
|
214 |
+
unique_labels = np.unique(labels)
|
215 |
+
cluster_insights = []
|
216 |
+
|
217 |
+
# Analyze each cluster (including noise points labeled as -1)
|
218 |
+
for label in unique_labels:
|
219 |
+
cluster_mask = labels == label
|
220 |
+
cluster_patents = df[cluster_mask]
|
221 |
+
cluster_points = embeddings_3d[cluster_mask]
|
222 |
+
|
223 |
+
if label == -1:
|
224 |
+
# Analyze sparse regions (potential opportunity zones)
|
225 |
+
if len(cluster_patents) > 0:
|
226 |
+
titles = "\n".join(cluster_patents['title'].tolist())
|
227 |
+
assignees = ", ".join(cluster_patents['assignee'].unique())
|
228 |
+
years = f"{cluster_patents['year'].min()} - {cluster_patents['year'].max()}"
|
229 |
+
|
230 |
+
prompt = f"""Analyze these {len(cluster_patents)} patents that are in sparse regions of the technology landscape:
|
231 |
+
|
232 |
+
Patents:
|
233 |
+
{titles}
|
234 |
+
|
235 |
+
Key assignees: {assignees}
|
236 |
+
Years: {years}
|
237 |
+
|
238 |
+
Please provide:
|
239 |
+
1. A brief description of these isolated technologies
|
240 |
+
2. Potential innovation opportunities in this space
|
241 |
+
3. Why these areas might be underexplored
|
242 |
+
Keep the response concise (max 3 sentences per point)."""
|
243 |
+
|
244 |
+
try:
|
245 |
+
response = openai.ChatCompletion.create(
|
246 |
+
model="gpt-3.5-turbo",
|
247 |
+
messages=[
|
248 |
+
{"role": "system", "content": "You are a patent and technology expert analyzing innovation opportunities."},
|
249 |
+
{"role": "user", "content": prompt}
|
250 |
+
],
|
251 |
+
max_tokens=300,
|
252 |
+
temperature=0.7
|
253 |
+
)
|
254 |
+
cluster_insights.append({
|
255 |
+
'type': 'opportunity_zone',
|
256 |
+
'size': len(cluster_patents),
|
257 |
+
'description': response['choices'][0]['message']['content']
|
258 |
+
})
|
259 |
+
except Exception as e:
|
260 |
+
print(f"Error generating opportunity zone analysis: {e}")
|
261 |
+
else:
|
262 |
+
# Analyze regular clusters
|
263 |
+
if len(cluster_patents) > 0:
|
264 |
+
titles = "\n".join(cluster_patents['title'].tolist())
|
265 |
+
assignees = ", ".join(cluster_patents['assignee'].unique())
|
266 |
+
years = f"{cluster_patents['year'].min()} - {cluster_patents['year'].max()}"
|
267 |
+
|
268 |
+
prompt = f"""Analyze this cluster of {len(cluster_patents)} related patents:
|
269 |
+
|
270 |
+
Patents:
|
271 |
+
{titles}
|
272 |
+
|
273 |
+
Key assignees: {assignees}
|
274 |
+
Years: {years}
|
275 |
+
|
276 |
+
Please provide a concise (2-3 sentences) summary of:
|
277 |
+
1. The main technology focus of this cluster
|
278 |
+
2. Current development status and trends"""
|
279 |
+
|
280 |
+
try:
|
281 |
+
response = openai.ChatCompletion.create(
|
282 |
+
model="gpt-3.5-turbo",
|
283 |
+
messages=[
|
284 |
+
{"role": "system", "content": "You are a patent and technology expert analyzing innovation clusters."},
|
285 |
+
{"role": "user", "content": prompt}
|
286 |
+
],
|
287 |
+
max_tokens=200,
|
288 |
+
temperature=0.7
|
289 |
+
)
|
290 |
+
cluster_insights.append({
|
291 |
+
'type': 'cluster',
|
292 |
+
'id': int(label),
|
293 |
+
'size': len(cluster_patents),
|
294 |
+
'description': response['choices'][0]['message']['content']
|
295 |
+
})
|
296 |
+
except Exception as e:
|
297 |
+
print(f"Error generating cluster analysis: {e}")
|
298 |
+
|
299 |
+
return cluster_insights
|
300 |
+
|
301 |
+
def create_3d_visualization(patents):
|
302 |
+
"""
|
303 |
+
Create a 3D visualization of patent embeddings using UMAP and Plotly
|
304 |
+
"""
|
305 |
+
if not patents:
|
306 |
+
return None
|
307 |
+
|
308 |
+
update_progress('clustering', 'Extracting embeddings...')
|
309 |
+
|
310 |
+
# Extract embeddings and metadata
|
311 |
+
embeddings = []
|
312 |
+
metadata = []
|
313 |
+
for patent in patents:
|
314 |
+
if patent['embedding'] is not None:
|
315 |
+
embeddings.append(patent['embedding'])
|
316 |
+
abstract = patent['abstract']
|
317 |
+
if len(abstract) > 200:
|
318 |
+
abstract = abstract[:200] + "..."
|
319 |
+
|
320 |
+
metadata.append({
|
321 |
+
'title': patent['title'],
|
322 |
+
'assignee': patent['assignee'],
|
323 |
+
'year': patent['filing_year'],
|
324 |
+
'abstract': abstract,
|
325 |
+
'link': patent['link']
|
326 |
+
})
|
327 |
+
|
328 |
+
if not embeddings:
|
329 |
+
return None
|
330 |
+
|
331 |
+
# Convert embeddings to numpy array
|
332 |
+
embeddings_array = np.array(embeddings)
|
333 |
+
|
334 |
+
update_progress('clustering', 'Applying UMAP dimensionality reduction...')
|
335 |
+
|
336 |
+
# Apply UMAP dimensionality reduction
|
337 |
+
reducer = umap.UMAP(n_components=3, random_state=42)
|
338 |
+
embedding_3d = reducer.fit_transform(embeddings_array)
|
339 |
+
|
340 |
+
update_progress('clustering', 'Performing DBSCAN clustering...')
|
341 |
+
|
342 |
+
# Create DataFrame for plotting
|
343 |
+
df = pd.DataFrame(metadata)
|
344 |
+
df['x'] = embedding_3d[:, 0]
|
345 |
+
df['y'] = embedding_3d[:, 1]
|
346 |
+
df['z'] = embedding_3d[:, 2]
|
347 |
+
|
348 |
+
# Apply DBSCAN clustering
|
349 |
+
scaler = StandardScaler()
|
350 |
+
scaled_embeddings = scaler.fit_transform(embedding_3d)
|
351 |
+
dbscan = DBSCAN(eps=0.75, min_samples=5)
|
352 |
+
clusters = dbscan.fit_predict(scaled_embeddings)
|
353 |
+
|
354 |
+
update_progress('analysis', 'Analyzing clusters and opportunities...')
|
355 |
+
|
356 |
+
# Print clustering statistics
|
357 |
+
n_clusters = len(set(clusters)) - (1 if -1 in clusters else 0)
|
358 |
+
n_noise = list(clusters).count(-1)
|
359 |
+
print(f"\nClustering Statistics:")
|
360 |
+
print(f"Number of clusters: {n_clusters}")
|
361 |
+
print(f"Number of patents in sparse regions: {n_noise}")
|
362 |
+
print(f"Total number of patents: {len(clusters)}")
|
363 |
+
|
364 |
+
if n_noise == 0:
|
365 |
+
print("\nWarning: No sparse regions detected. Consider adjusting DBSCAN parameters.")
|
366 |
+
dbscan = DBSCAN(eps=0.5, min_samples=7)
|
367 |
+
clusters = dbscan.fit_predict(scaled_embeddings)
|
368 |
+
n_clusters = len(set(clusters)) - (1 if -1 in clusters else 0)
|
369 |
+
n_noise = list(clusters).count(-1)
|
370 |
+
print(f"\nRetrying with stricter parameters:")
|
371 |
+
print(f"Number of clusters: {n_clusters}")
|
372 |
+
print(f"Number of patents in sparse regions: {n_noise}")
|
373 |
+
|
374 |
+
df['cluster'] = clusters
|
375 |
+
|
376 |
+
update_progress('analysis', 'Generating cluster insights...')
|
377 |
+
|
378 |
+
# Generate cluster insights
|
379 |
+
cluster_insights = analyze_clusters(df, clusters, embedding_3d)
|
380 |
+
|
381 |
+
update_progress('visualization', 'Creating interactive plot...')
|
382 |
+
|
383 |
+
# Create hover text with cluster information
|
384 |
+
hover_text = []
|
385 |
+
for idx, row in df.iterrows():
|
386 |
+
cluster_info = ""
|
387 |
+
if row['cluster'] == -1:
|
388 |
+
cluster_info = "<br><b>Region:</b> Sparse Area (Potential Innovation Zone)"
|
389 |
+
else:
|
390 |
+
cluster_info = f"<br><b>Cluster:</b> {row['cluster']}"
|
391 |
+
|
392 |
+
text = (
|
393 |
+
f"<b>{row['title']}</b><br><br>"
|
394 |
+
f"<b>By:</b> {row['assignee']} ({row['year']})<br>"
|
395 |
+
f"{cluster_info}<br><br>"
|
396 |
+
f"<b>Abstract:</b><br>{row['abstract']}"
|
397 |
+
)
|
398 |
+
hover_text.append(text)
|
399 |
+
|
400 |
+
# Create Plotly figure with clusters
|
401 |
+
fig = go.Figure(data=[go.Scatter3d(
|
402 |
+
x=df['x'],
|
403 |
+
y=df['y'],
|
404 |
+
z=df['z'],
|
405 |
+
mode='markers',
|
406 |
+
marker=dict(
|
407 |
+
size=10,
|
408 |
+
color=clusters,
|
409 |
+
colorscale='Viridis',
|
410 |
+
opacity=0.8,
|
411 |
+
showscale=True,
|
412 |
+
colorbar=dict(
|
413 |
+
title="Clusters<br>(-1: Opportunity Zones)",
|
414 |
+
tickfont=dict(size=10),
|
415 |
+
titlefont=dict(size=10)
|
416 |
+
)
|
417 |
+
),
|
418 |
+
text=hover_text,
|
419 |
+
hoverinfo='text',
|
420 |
+
hoverlabel=dict(
|
421 |
+
bgcolor="white",
|
422 |
+
font_size=12,
|
423 |
+
font_family="Arial",
|
424 |
+
align="left"
|
425 |
+
),
|
426 |
+
customdata=df['link'].tolist()
|
427 |
+
)])
|
428 |
+
|
429 |
+
# Update layout
|
430 |
+
fig.update_layout(
|
431 |
+
title="Patent Technology Landscape with Innovation Clusters",
|
432 |
+
scene=dict(
|
433 |
+
xaxis_title="UMAP 1",
|
434 |
+
yaxis_title="UMAP 2",
|
435 |
+
zaxis_title="UMAP 3",
|
436 |
+
camera=dict(
|
437 |
+
up=dict(x=0, y=0, z=1),
|
438 |
+
center=dict(x=0, y=0, z=0),
|
439 |
+
eye=dict(x=1.5, y=1.5, z=1.5)
|
440 |
+
)
|
441 |
+
),
|
442 |
+
margin=dict(l=0, r=0, b=0, t=30),
|
443 |
+
showlegend=False,
|
444 |
+
template="plotly_dark",
|
445 |
+
hoverlabel_align='left',
|
446 |
+
hoverdistance=100,
|
447 |
+
hovermode='closest'
|
448 |
+
)
|
449 |
+
|
450 |
+
# Add hover template configuration
|
451 |
+
fig.update_traces(
|
452 |
+
hovertemplate='%{text}<extra></extra>'
|
453 |
+
)
|
454 |
+
|
455 |
+
update_progress('visualization', 'Finalizing visualization...')
|
456 |
+
|
457 |
+
return {
|
458 |
+
'plot': fig.to_json(),
|
459 |
+
'insights': cluster_insights
|
460 |
+
}
|
461 |
+
|
462 |
@app.route('/')
|
463 |
def home():
|
464 |
return render_template('index.html')
|
465 |
|
466 |
+
@app.route('/progress')
|
467 |
+
def get_progress():
|
468 |
+
"""Server-sent events endpoint for progress updates"""
|
469 |
+
def generate():
|
470 |
+
while True:
|
471 |
+
try:
|
472 |
+
data = progress_queue.get(timeout=30) # 30 second timeout
|
473 |
+
if data == 'DONE':
|
474 |
+
break
|
475 |
+
yield f"data: {json.dumps(data)}\n\n"
|
476 |
+
except queue.Empty:
|
477 |
+
break
|
478 |
+
return Response(generate(), mimetype='text/event-stream')
|
479 |
+
|
480 |
+
def update_progress(step, status='processing'):
|
481 |
+
"""Update progress through the progress queue"""
|
482 |
+
progress_queue.put({
|
483 |
+
'step': step,
|
484 |
+
'status': status,
|
485 |
+
'timestamp': datetime.now().strftime('%H:%M:%S')
|
486 |
+
})
|
487 |
+
|
488 |
@app.route('/search', methods=['POST'])
|
489 |
def search():
|
490 |
keywords = request.form.get('keywords', '')
|
|
|
492 |
return jsonify({'error': 'Please enter search keywords'})
|
493 |
|
494 |
print(f"\nProcessing search request for keywords: {keywords}")
|
|
|
|
|
|
|
495 |
|
496 |
+
try:
|
497 |
+
# Clear any existing progress updates
|
498 |
+
while not progress_queue.empty():
|
499 |
+
progress_queue.get_nowait()
|
500 |
+
|
501 |
+
# Search for patents
|
502 |
+
update_progress('search')
|
503 |
+
patents = search_patents(keywords)
|
504 |
+
if not patents:
|
505 |
+
return jsonify({'error': 'No patents found or an error occurred'})
|
506 |
+
|
507 |
+
# Generate embeddings
|
508 |
+
update_progress('embedding')
|
509 |
+
|
510 |
+
# Cluster analysis
|
511 |
+
update_progress('clustering')
|
512 |
+
|
513 |
+
# Innovation analysis
|
514 |
+
update_progress('analysis')
|
515 |
+
|
516 |
+
# Create visualization
|
517 |
+
update_progress('visualization')
|
518 |
+
viz_data = create_3d_visualization(patents)
|
519 |
+
if not viz_data:
|
520 |
+
return jsonify({'error': 'Error creating visualization'})
|
521 |
+
|
522 |
+
# Signal completion
|
523 |
+
progress_queue.put('DONE')
|
524 |
+
|
525 |
+
return jsonify({
|
526 |
+
'visualization': viz_data['plot'],
|
527 |
+
'insights': viz_data['insights']
|
528 |
+
})
|
529 |
+
|
530 |
+
except Exception as e:
|
531 |
+
print(f"Error processing request: {e}")
|
532 |
+
progress_queue.put('DONE')
|
533 |
+
return jsonify({'error': str(e)})
|
534 |
|
535 |
if __name__ == '__main__':
|
536 |
app.run(host='0.0.0.0', port=7860)
|
requirements.txt
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
flask==2.0.1
|
2 |
-
|
3 |
-
requests==2.
|
4 |
-
gunicorn==20.1.0
|
5 |
-
itsdangerous==2.0.1
|
6 |
-
Jinja2==3.0.1
|
7 |
-
MarkupSafe==2.0.1
|
8 |
openai==0.28.1
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
1 |
flask==2.0.1
|
2 |
+
python-dotenv==0.19.0
|
3 |
+
requests==2.26.0
|
|
|
|
|
|
|
|
|
4 |
openai==0.28.1
|
5 |
+
numpy==1.24.3
|
6 |
+
pandas==2.0.3
|
7 |
+
umap-learn==0.5.3
|
8 |
+
plotly==5.3.1
|
9 |
+
scikit-learn==1.3.0
|
templates/index.html
CHANGED
@@ -3,43 +3,272 @@
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
-
<title>Patent
|
7 |
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
|
8 |
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
</head>
|
10 |
-
<body class="bg-gray-100 min-h-screen">
|
11 |
<div class="container mx-auto px-4 py-8">
|
12 |
-
<h1 class="text-4xl font-bold text-center text-blue-
|
13 |
|
14 |
<!-- Search Form -->
|
15 |
<div class="max-w-2xl mx-auto mb-8">
|
16 |
-
<form id="searchForm" class="bg-
|
17 |
<div class="mb-4">
|
18 |
<input type="text" id="keywords" name="keywords"
|
19 |
-
class="
|
20 |
-
placeholder="Enter keywords to
|
21 |
</div>
|
22 |
<div class="flex items-center justify-center">
|
23 |
<button type="submit"
|
24 |
-
class="bg-blue-
|
25 |
-
|
26 |
</button>
|
27 |
</div>
|
28 |
</form>
|
29 |
</div>
|
30 |
|
31 |
-
<!-- Loading
|
32 |
-
<div id="loading" class="hidden">
|
33 |
-
<div class="
|
34 |
-
<div class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
</div>
|
36 |
</div>
|
37 |
|
38 |
-
<!--
|
39 |
-
<div id="
|
|
|
|
|
|
|
40 |
</div>
|
41 |
|
42 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
$(document).ready(function() {
|
44 |
$('#searchForm').on('submit', function(e) {
|
45 |
e.preventDefault();
|
@@ -50,54 +279,102 @@
|
|
50 |
return;
|
51 |
}
|
52 |
|
53 |
-
//
|
54 |
$('#loading').removeClass('hidden');
|
55 |
-
$('#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
$.ajax({
|
58 |
url: '/search',
|
59 |
method: 'POST',
|
60 |
data: { keywords: keywords },
|
61 |
success: function(response) {
|
62 |
-
$('#loading').addClass('hidden');
|
63 |
-
|
64 |
if (response.error) {
|
65 |
-
|
|
|
|
|
66 |
return;
|
67 |
}
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
}
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
},
|
98 |
error: function() {
|
|
|
99 |
$('#loading').addClass('hidden');
|
100 |
-
|
101 |
}
|
102 |
});
|
103 |
});
|
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Patent Technology Landscape</title>
|
7 |
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
|
8 |
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
9 |
+
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
|
10 |
+
<style>
|
11 |
+
.visualization-container {
|
12 |
+
width: 100%;
|
13 |
+
height: 50vh;
|
14 |
+
margin-bottom: 20px;
|
15 |
+
background-color: #1a1a1a;
|
16 |
+
border-radius: 8px;
|
17 |
+
overflow: hidden;
|
18 |
+
}
|
19 |
+
.insights-panel {
|
20 |
+
background-color: #2d2d2d;
|
21 |
+
border-radius: 8px;
|
22 |
+
height: calc(120vh - 40px);
|
23 |
+
overflow-y: auto;
|
24 |
+
transition: all 0.3s ease;
|
25 |
+
}
|
26 |
+
.cluster-card {
|
27 |
+
background-color: #3d3d3d;
|
28 |
+
border-radius: 6px;
|
29 |
+
margin-bottom: 10px;
|
30 |
+
transition: all 0.2s ease;
|
31 |
+
}
|
32 |
+
.cluster-card:hover {
|
33 |
+
transform: translateY(-2px);
|
34 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
35 |
+
}
|
36 |
+
.opportunity-card {
|
37 |
+
background-color: #2d4a3e;
|
38 |
+
border-radius: 6px;
|
39 |
+
margin-bottom: 10px;
|
40 |
+
transition: all 0.2s ease;
|
41 |
+
}
|
42 |
+
.opportunity-card:hover {
|
43 |
+
transform: translateY(-2px);
|
44 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
45 |
+
}
|
46 |
+
.loading-container {
|
47 |
+
background-color: #2d2d2d;
|
48 |
+
border-radius: 8px;
|
49 |
+
padding: 1.5rem;
|
50 |
+
margin-bottom: 1rem;
|
51 |
+
}
|
52 |
+
.progress-bar {
|
53 |
+
width: 100%;
|
54 |
+
height: 8px;
|
55 |
+
background-color: #4a5568;
|
56 |
+
border-radius: 4px;
|
57 |
+
margin-bottom: 1rem;
|
58 |
+
overflow: hidden;
|
59 |
+
}
|
60 |
+
.progress-fill {
|
61 |
+
height: 100%;
|
62 |
+
width: 0;
|
63 |
+
background-color: #4299e1;
|
64 |
+
transition: width 0.3s ease;
|
65 |
+
}
|
66 |
+
.status-list {
|
67 |
+
max-height: 150px;
|
68 |
+
overflow-y: auto;
|
69 |
+
}
|
70 |
+
.status-item {
|
71 |
+
display: flex;
|
72 |
+
align-items: center;
|
73 |
+
margin-bottom: 0.75rem;
|
74 |
+
padding: 0.5rem;
|
75 |
+
border-radius: 4px;
|
76 |
+
background-color: #374151;
|
77 |
+
opacity: 0.6;
|
78 |
+
transition: all 0.3s ease;
|
79 |
+
}
|
80 |
+
.status-item.active {
|
81 |
+
opacity: 1;
|
82 |
+
background-color: #3b4f7d;
|
83 |
+
}
|
84 |
+
.status-item.complete {
|
85 |
+
opacity: 0.8;
|
86 |
+
background-color: #2d4a3e;
|
87 |
+
}
|
88 |
+
.status-icon {
|
89 |
+
width: 24px;
|
90 |
+
height: 24px;
|
91 |
+
margin-right: 12px;
|
92 |
+
border-radius: 50%;
|
93 |
+
display: inline-flex;
|
94 |
+
align-items: center;
|
95 |
+
justify-content: center;
|
96 |
+
font-size: 12px;
|
97 |
+
font-weight: bold;
|
98 |
+
}
|
99 |
+
.status-text {
|
100 |
+
flex-grow: 1;
|
101 |
+
}
|
102 |
+
.status-time {
|
103 |
+
font-size: 0.8rem;
|
104 |
+
color: #9ca3af;
|
105 |
+
margin-left: 8px;
|
106 |
+
}
|
107 |
+
.status-pending {
|
108 |
+
background-color: #4a5568;
|
109 |
+
}
|
110 |
+
.status-processing {
|
111 |
+
background-color: #4299e1;
|
112 |
+
animation: pulse 1.5s infinite;
|
113 |
+
}
|
114 |
+
.status-complete {
|
115 |
+
background-color: #48bb78;
|
116 |
+
}
|
117 |
+
@keyframes pulse {
|
118 |
+
0% { opacity: 0.6; }
|
119 |
+
50% { opacity: 1; }
|
120 |
+
100% { opacity: 0.6; }
|
121 |
+
}
|
122 |
+
/* Custom scrollbar for insights panel */
|
123 |
+
.insights-panel::-webkit-scrollbar {
|
124 |
+
width: 8px;
|
125 |
+
}
|
126 |
+
.insights-panel::-webkit-scrollbar-track {
|
127 |
+
background: #1a1a1a;
|
128 |
+
border-radius: 4px;
|
129 |
+
}
|
130 |
+
.insights-panel::-webkit-scrollbar-thumb {
|
131 |
+
background: #4a4a4a;
|
132 |
+
border-radius: 4px;
|
133 |
+
}
|
134 |
+
.insights-panel::-webkit-scrollbar-thumb:hover {
|
135 |
+
background: #555;
|
136 |
+
}
|
137 |
+
</style>
|
138 |
</head>
|
139 |
+
<body class="bg-gray-900 text-gray-100 min-h-screen">
|
140 |
<div class="container mx-auto px-4 py-8">
|
141 |
+
<h1 class="text-4xl font-bold text-center text-blue-400 mb-8">Patent Technology Landscape</h1>
|
142 |
|
143 |
<!-- Search Form -->
|
144 |
<div class="max-w-2xl mx-auto mb-8">
|
145 |
+
<form id="searchForm" class="bg-gray-800 shadow-lg rounded px-8 pt-6 pb-8 mb-4">
|
146 |
<div class="mb-4">
|
147 |
<input type="text" id="keywords" name="keywords"
|
148 |
+
class="w-full bg-gray-700 text-white rounded border border-gray-600 focus:border-blue-500 focus:ring-2 focus:ring-blue-900 py-2 px-4"
|
149 |
+
placeholder="Enter keywords to explore patent landscape...">
|
150 |
</div>
|
151 |
<div class="flex items-center justify-center">
|
152 |
<button type="submit"
|
153 |
+
class="bg-blue-600 hover:bg-blue-700 text-white font-bold py-2 px-6 rounded focus:outline-none focus:shadow-outline transform hover:scale-105 transition-transform duration-200">
|
154 |
+
Explore
|
155 |
</button>
|
156 |
</div>
|
157 |
</form>
|
158 |
</div>
|
159 |
|
160 |
+
<!-- Loading Status -->
|
161 |
+
<div id="loading" class="loading-container hidden">
|
162 |
+
<div class="mb-4">
|
163 |
+
<div class="flex justify-between items-center mb-2">
|
164 |
+
<span class="text-sm font-medium" id="progress-text">Initializing...</span>
|
165 |
+
<span class="text-sm font-medium" id="progress-percentage">0%</span>
|
166 |
+
</div>
|
167 |
+
<div class="progress-bar">
|
168 |
+
<div class="progress-fill" id="progress-fill"></div>
|
169 |
+
</div>
|
170 |
+
</div>
|
171 |
+
<div class="status-list">
|
172 |
+
<div class="status-item" data-step="search">
|
173 |
+
<div class="status-icon status-pending">1</div>
|
174 |
+
<div class="status-text">Searching for patents</div>
|
175 |
+
<div class="status-time"></div>
|
176 |
+
</div>
|
177 |
+
<div class="status-item" data-step="embedding">
|
178 |
+
<div class="status-icon status-pending">2</div>
|
179 |
+
<div class="status-text">Generating patent embeddings</div>
|
180 |
+
<div class="status-time"></div>
|
181 |
+
</div>
|
182 |
+
<div class="status-item" data-step="clustering">
|
183 |
+
<div class="status-icon status-pending">3</div>
|
184 |
+
<div class="status-text">Identifying technology clusters</div>
|
185 |
+
<div class="status-time"></div>
|
186 |
+
</div>
|
187 |
+
<div class="status-item" data-step="analysis">
|
188 |
+
<div class="status-icon status-pending">4</div>
|
189 |
+
<div class="status-text">Analyzing innovation opportunities</div>
|
190 |
+
<div class="status-time"></div>
|
191 |
+
</div>
|
192 |
+
<div class="status-item" data-step="visualization">
|
193 |
+
<div class="status-icon status-pending">5</div>
|
194 |
+
<div class="status-text">Creating interactive visualization</div>
|
195 |
+
<div class="status-time"></div>
|
196 |
+
</div>
|
197 |
</div>
|
198 |
</div>
|
199 |
|
200 |
+
<!-- Visualization Container -->
|
201 |
+
<div id="visualization" class="visualization-container"></div>
|
202 |
+
|
203 |
+
<!-- Insights Panel -->
|
204 |
+
<div id="insights" class="insights-panel p-4"></div>
|
205 |
</div>
|
206 |
|
207 |
<script>
|
208 |
+
let progressEventSource = null;
|
209 |
+
|
210 |
+
function startProgressMonitoring() {
|
211 |
+
if (progressEventSource) {
|
212 |
+
progressEventSource.close();
|
213 |
+
}
|
214 |
+
|
215 |
+
progressEventSource = new EventSource('/progress');
|
216 |
+
progressEventSource.onmessage = function(event) {
|
217 |
+
const data = JSON.parse(event.data);
|
218 |
+
updateProgress(data.step, data.status);
|
219 |
+
};
|
220 |
+
|
221 |
+
progressEventSource.onerror = function() {
|
222 |
+
progressEventSource.close();
|
223 |
+
};
|
224 |
+
}
|
225 |
+
|
226 |
+
function stopProgressMonitoring() {
|
227 |
+
if (progressEventSource) {
|
228 |
+
progressEventSource.close();
|
229 |
+
progressEventSource = null;
|
230 |
+
}
|
231 |
+
}
|
232 |
+
|
233 |
+
function updateProgress(step, status) {
|
234 |
+
const steps = ['search', 'embedding', 'clustering', 'analysis', 'visualization'];
|
235 |
+
const stepIndex = steps.indexOf(step);
|
236 |
+
const progress = ((stepIndex + 1) / steps.length) * 100;
|
237 |
+
|
238 |
+
// Update progress bar
|
239 |
+
$('#progress-fill').css('width', `${progress}%`);
|
240 |
+
$('#progress-percentage').text(`${Math.round(progress)}%`);
|
241 |
+
|
242 |
+
// Update status text
|
243 |
+
const statusTexts = {
|
244 |
+
'search': 'Searching patent database...',
|
245 |
+
'embedding': 'Generating patent embeddings...',
|
246 |
+
'clustering': 'Identifying technology clusters...',
|
247 |
+
'analysis': 'Analyzing innovation opportunities...',
|
248 |
+
'visualization': 'Creating interactive visualization...'
|
249 |
+
};
|
250 |
+
$('#progress-text').text(statusTexts[step]);
|
251 |
+
|
252 |
+
// Update status items
|
253 |
+
steps.forEach((s, i) => {
|
254 |
+
const item = $(`.status-item[data-step="${s}"]`);
|
255 |
+
const time = item.find('.status-time');
|
256 |
+
|
257 |
+
if (i < stepIndex) {
|
258 |
+
item.removeClass('active').addClass('complete');
|
259 |
+
if (!time.text()) {
|
260 |
+
time.text(new Date().toLocaleTimeString());
|
261 |
+
}
|
262 |
+
} else if (i === stepIndex) {
|
263 |
+
item.addClass('active').removeClass('complete');
|
264 |
+
time.text('In progress...');
|
265 |
+
} else {
|
266 |
+
item.removeClass('active complete');
|
267 |
+
time.text('');
|
268 |
+
}
|
269 |
+
});
|
270 |
+
}
|
271 |
+
|
272 |
$(document).ready(function() {
|
273 |
$('#searchForm').on('submit', function(e) {
|
274 |
e.preventDefault();
|
|
|
279 |
return;
|
280 |
}
|
281 |
|
282 |
+
// Reset and show loading status
|
283 |
$('#loading').removeClass('hidden');
|
284 |
+
$('#visualization').empty();
|
285 |
+
$('#insights').empty();
|
286 |
+
$('.progress-fill').css('width', '0%');
|
287 |
+
$('#progress-percentage').text('0%');
|
288 |
+
$('#progress-text').text('Initializing...');
|
289 |
+
$('.status-item').removeClass('active complete');
|
290 |
+
$('.status-time').text('');
|
291 |
+
|
292 |
+
// Start progress monitoring
|
293 |
+
startProgressMonitoring();
|
294 |
|
295 |
$.ajax({
|
296 |
url: '/search',
|
297 |
method: 'POST',
|
298 |
data: { keywords: keywords },
|
299 |
success: function(response) {
|
|
|
|
|
300 |
if (response.error) {
|
301 |
+
stopProgressMonitoring();
|
302 |
+
$('#loading').addClass('hidden');
|
303 |
+
alert(response.error);
|
304 |
return;
|
305 |
}
|
306 |
|
307 |
+
// Display visualization
|
308 |
+
if (response.visualization) {
|
309 |
+
const vizData = JSON.parse(response.visualization);
|
310 |
+
Plotly.newPlot('visualization', vizData.data, vizData.layout);
|
311 |
+
|
312 |
+
document.getElementById('visualization').on('plotly_click', function(data) {
|
313 |
+
const link = data.points[0].customdata;
|
314 |
+
if (link) {
|
315 |
+
window.open(link, '_blank');
|
316 |
+
}
|
317 |
+
});
|
318 |
}
|
319 |
|
320 |
+
// Display insights with two-column layout
|
321 |
+
if (response.insights) {
|
322 |
+
let insightsHtml = '<div class="grid grid-cols-1 lg:grid-cols-2 gap-6 p-6">';
|
323 |
+
|
324 |
+
// Left column: Innovation Opportunities
|
325 |
+
insightsHtml += '<div class="col-span-1">';
|
326 |
+
insightsHtml += '<h3 class="text-2xl font-bold mb-4 text-green-400">Innovation Opportunities</h3>';
|
327 |
+
|
328 |
+
const opportunityZones = response.insights.filter(i => i.type === 'opportunity_zone');
|
329 |
+
if (opportunityZones.length > 0) {
|
330 |
+
insightsHtml += '<div class="space-y-4">';
|
331 |
+
opportunityZones.forEach(zone => {
|
332 |
+
insightsHtml += `
|
333 |
+
<div class="opportunity-card p-6 text-base">
|
334 |
+
<div class="text-green-300 text-lg font-bold mb-3">Sparse Region (${zone.size} patents)</div>
|
335 |
+
<div class="text-gray-300 whitespace-pre-line leading-relaxed">${zone.description}</div>
|
336 |
+
</div>
|
337 |
+
`;
|
338 |
+
});
|
339 |
+
insightsHtml += '</div>';
|
340 |
+
} else {
|
341 |
+
insightsHtml += '<p class="text-gray-400">No innovation opportunities identified.</p>';
|
342 |
+
}
|
343 |
+
insightsHtml += '</div>';
|
344 |
+
|
345 |
+
// Right column: Technology Clusters
|
346 |
+
insightsHtml += '<div class="col-span-1">';
|
347 |
+
insightsHtml += '<h3 class="text-2xl font-bold mb-4 text-blue-400">Technology Clusters</h3>';
|
348 |
+
|
349 |
+
const clusters = response.insights.filter(i => i.type === 'cluster');
|
350 |
+
if (clusters.length > 0) {
|
351 |
+
insightsHtml += '<div class="space-y-4">';
|
352 |
+
clusters.forEach(cluster => {
|
353 |
+
insightsHtml += `
|
354 |
+
<div class="cluster-card p-6 text-base">
|
355 |
+
<div class="text-blue-300 text-lg font-bold mb-3">Cluster ${cluster.id} (${cluster.size} patents)</div>
|
356 |
+
<div class="text-gray-300 whitespace-pre-line leading-relaxed">${cluster.description}</div>
|
357 |
+
</div>
|
358 |
+
`;
|
359 |
+
});
|
360 |
+
insightsHtml += '</div>';
|
361 |
+
} else {
|
362 |
+
insightsHtml += '<p class="text-gray-400">No technology clusters identified.</p>';
|
363 |
+
}
|
364 |
+
insightsHtml += '</div>';
|
365 |
|
366 |
+
insightsHtml += '</div>';
|
367 |
+
$('#insights').html(insightsHtml);
|
368 |
+
}
|
369 |
+
|
370 |
+
// Stop progress monitoring and hide loading status
|
371 |
+
stopProgressMonitoring();
|
372 |
+
$('#loading').addClass('hidden');
|
373 |
},
|
374 |
error: function() {
|
375 |
+
stopProgressMonitoring();
|
376 |
$('#loading').addClass('hidden');
|
377 |
+
alert('An error occurred while analyzing patents.');
|
378 |
}
|
379 |
});
|
380 |
});
|