jzou19950715 commited on
Commit
7f04463
·
verified ·
1 Parent(s): bc642fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -146
app.py CHANGED
@@ -3,13 +3,12 @@ import logging
3
  from datetime import datetime
4
  from typing import Dict, List, Optional, Any
5
  import gradio as gr
6
- from openai import AsyncOpenAI # Changed to AsyncOpenAI
7
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
- # System prompts remain the same as before
13
  CONVERSATION_PROMPT = """You are LOSS DOG, a professional profile builder. Your goal is to have natural conversations
14
  with users to gather information about their professional background across 9 categories:
15
 
@@ -28,16 +27,9 @@ but respect their boundaries. Once you believe you have gathered sufficient info
28
  have nothing more to share), let them know they can click 'Generate Profile' to proceed.
29
  """
30
 
31
- EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to methodically analyze conversations and organize information into 9 specific categories. Process each category thoroughly and output in structured JSON format with no additional text.
32
 
33
- ANALYTICAL PROCESS:
34
- 1. Read entire conversation history
35
- 2. Extract explicit and implicit information
36
- 3. Make reasonable inferences when appropriate
37
- 4. Structure data according to defined schema
38
- 5. Include confidence scores for all extracted information
39
-
40
- OUTPUT SCHEMA:
41
  {
42
  "work_history_experience": {
43
  "positions": [
@@ -48,78 +40,50 @@ OUTPUT SCHEMA:
48
  "location": string,
49
  "employment_type": string,
50
  "adaptability": {
51
- "career_shifts": string[],
52
- "upskilling": string[]
53
  },
54
- "promotions": string[],
55
- "confidence": float
56
  }
57
  ]
58
  },
59
  "salary_compensation": {
60
  "history": [
61
  {
62
- "base_salary": number | null,
63
- "bonus_structure": string | null,
64
  "stock_options": {
65
  "type": string,
66
  "details": string
67
  },
68
- "commission": string | null,
69
  "benefits": {
70
  "health": string,
71
  "pto": string,
72
  "retirement": string,
73
- "other": string[]
74
  },
75
- "confidence": float
76
  }
77
  ]
78
  },
79
  "skills_certifications": {
80
- "hard_skills": string[],
81
- "soft_skills": string[],
82
- "programming_languages": string[],
83
- "spoken_languages": string[],
84
- "certifications": [
85
- {
86
- "name": string,
87
- "issuer": string,
88
- "date": string,
89
- "confidence": float
90
- }
91
- ],
92
- "licenses": [
93
- {
94
- "type": string,
95
- "issuer": string,
96
- "valid_until": string,
97
- "confidence": float
98
- }
99
- ]
100
  },
101
  "education_learning": {
102
- "formal_education": [
103
- {
104
- "degree": string,
105
- "institution": string,
106
- "gpa": number | null,
107
- "research": string[],
108
- "period": {
109
- "start": string,
110
- "end": string | null
111
- },
112
- "confidence": float
113
- }
114
- ],
115
  "online_courses": [],
116
  "executive_education": []
117
  },
118
  "personal_branding": {
119
  "portfolio": {
120
- "github": string | null,
121
- "behance": string | null,
122
- "other": string[]
123
  },
124
  "blog_posts": [],
125
  "blockchain_projects": {
@@ -127,7 +91,6 @@ OUTPUT SCHEMA:
127
  "defi": [],
128
  "dapps": []
129
  },
130
- "public_speaking": [],
131
  "social_media": {
132
  "platforms": [],
133
  "influence_metrics": {}
@@ -145,14 +108,7 @@ OUTPUT SCHEMA:
145
  "social_proof_networking": {
146
  "mentors": [],
147
  "references": [],
148
- "memberships": [
149
- {
150
- "organization": string,
151
- "type": string,
152
- "period": string,
153
- "confidence": float
154
- }
155
- ],
156
  "conference_engagement": []
157
  },
158
  "project_contributions": {
@@ -162,8 +118,8 @@ OUTPUT SCHEMA:
162
  "patents": [],
163
  "impact": {
164
  "description": string,
165
- "metrics": string[],
166
- "confidence": float
167
  }
168
  },
169
  "work_performance_metrics": {
@@ -175,40 +131,7 @@ OUTPUT SCHEMA:
175
  }
176
  }
177
 
178
- EXTRACTION GUIDELINES:
179
-
180
- 1. Process systematically:
181
- - Analyze conversation thoroughly
182
- - Look for both direct statements and implied information
183
- - Cross-reference information across different parts of conversation
184
- - Make reasonable inferences when appropriate
185
-
186
- 2. For each piece of information:
187
- - Clean and standardize the data
188
- - Assign confidence scores (0.0-1.0)
189
- - Mark inferred information
190
- - Include source context where relevant
191
-
192
- 3. Quality requirements:
193
- - Use consistent date formats (YYYY-MM-DD)
194
- - Standardize company names and titles
195
- - Use empty arrays [] for missing information
196
- - Never use null for array fields
197
- - Include confidence scores for all extracted data
198
-
199
- 4. Handle missing information:
200
- - Use empty arrays [] rather than null
201
- - Mark inferred information clearly
202
- - Include partial information when complete data isn't available
203
- - Note uncertainty in confidence scores
204
-
205
- Remember to:
206
- - Process each category thoroughly
207
- - Cross-reference information for consistency
208
- - Make reasonable inferences when appropriate
209
- - Maintain consistent formatting
210
- - Include all required fields even if empty
211
- """
212
 
213
  class ProfileBuilder:
214
  def __init__(self):
@@ -216,21 +139,17 @@ class ProfileBuilder:
216
  self.client = None
217
 
218
  def _initialize_client(self, api_key: str) -> None:
219
- """Initialize AsyncOpenAI client with API key."""
220
  if not api_key.startswith("sk-"):
221
  raise ValueError("Invalid API key format")
222
  self.client = AsyncOpenAI(api_key=api_key)
223
 
224
  async def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
225
- """Process a user message through conversation phase."""
226
  try:
227
  if not self.client:
228
  self._initialize_client(api_key)
229
 
230
- # Add message to history
231
  self.conversation_history.append({"role": "user", "content": message})
232
 
233
- # Get AI response - properly awaited
234
  completion = await self.client.chat.completions.create(
235
  model="gpt-4o-mini",
236
  messages=[
@@ -250,18 +169,15 @@ class ProfileBuilder:
250
  return {"error": str(e)}
251
 
252
  async def generate_profile(self) -> Dict[str, Any]:
253
- """Process conversation history into structured profile."""
254
  try:
255
  if not self.client:
256
  raise ValueError("OpenAI client not initialized")
257
 
258
- # Convert conversation history to text
259
  conversation_text = "\n".join(
260
  f"{msg['role']}: {msg['content']}"
261
  for msg in self.conversation_history
262
  )
263
 
264
- # Extract structured information - properly awaited
265
  completion = await self.client.chat.completions.create(
266
  model="gpt-4o-mini",
267
  messages=[
@@ -271,21 +187,12 @@ class ProfileBuilder:
271
  temperature=0.3
272
  )
273
 
274
- # Get the raw output text from the model
275
- raw_output = completion.choices[0].message.content
276
- logger.info(f"Raw extraction output: {raw_output}")
277
-
278
- # Attempt to parse the JSON
279
- try:
280
- profile_data = json.loads(raw_output)
281
- except json.JSONDecodeError as decode_error:
282
- logger.error("Failed to decode JSON. The output may not be valid JSON.")
283
- profile_data = None # Indicate failure to parse
284
 
285
- # Build the profile output including metadata and raw output
286
  profile = {
287
  "profile_data": profile_data,
288
- "raw_output": raw_output,
289
  "metadata": {
290
  "generated_at": datetime.now().isoformat(),
291
  "conversation_length": len(self.conversation_history)
@@ -298,17 +205,16 @@ class ProfileBuilder:
298
  with open(filename, 'w', encoding='utf-8') as f:
299
  json.dump(profile, f, indent=2)
300
 
301
- return {
302
- "profile": profile,
303
- "filename": filename
304
- }
305
 
 
 
 
306
  except Exception as e:
307
  logger.error(f"Error generating profile: {str(e)}")
308
- return {"error": str(e)}
309
 
310
  def create_gradio_interface():
311
- """Create the Gradio interface."""
312
  builder = ProfileBuilder()
313
 
314
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -333,13 +239,9 @@ def create_gradio_interface():
333
 
334
  with gr.Column(scale=1):
335
  generate_btn = gr.Button("Generate Profile")
336
- # JSON output for structured profile
337
- profile_output = gr.JSON(label="Generated Profile (Parsed JSON)")
338
- # Markdown output to always show the raw AI output
339
- raw_output_markdown = gr.Markdown(label="Raw Output from AI")
340
  download_btn = gr.File(label="Download Profile")
341
 
342
- # Event handlers
343
  async def on_message(message: str, history: List[List[str]], key: str):
344
  if not message.strip():
345
  return history, None
@@ -353,16 +255,11 @@ def create_gradio_interface():
353
  return history, None
354
 
355
  async def on_generate():
356
- result = await builder.generate_profile()
357
- if "error" in result:
358
- error_text = f"Error generating profile: {result['error']}"
359
- return {"error": error_text}, None, error_text
360
- profile = result["profile"]
361
- # Prepare the raw output as markdown. Wrapping in triple backticks for code formatting.
362
- raw_markdown = f"```json\n{profile.get('raw_output', '')}\n```"
363
- return profile, result["filename"], raw_markdown
364
 
365
- # Bind events
366
  msg.submit(
367
  on_message,
368
  inputs=[msg, chatbot, api_key],
@@ -377,15 +274,12 @@ def create_gradio_interface():
377
 
378
  generate_btn.click(
379
  on_generate,
380
- outputs=[profile_output, download_btn, raw_output_markdown]
381
  )
382
 
383
  return demo
384
 
385
  if __name__ == "__main__":
386
  demo = create_gradio_interface()
387
- demo.queue() # Add queue for async support
388
- demo.launch(
389
- server_name="0.0.0.0",
390
- server_port=7860
391
- )
 
3
  from datetime import datetime
4
  from typing import Dict, List, Optional, Any
5
  import gradio as gr
6
+ from openai import AsyncOpenAI
7
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
 
12
  CONVERSATION_PROMPT = """You are LOSS DOG, a professional profile builder. Your goal is to have natural conversations
13
  with users to gather information about their professional background across 9 categories:
14
 
 
27
  have nothing more to share), let them know they can click 'Generate Profile' to proceed.
28
  """
29
 
30
+ EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object. Do not include any explanatory text before or after the JSON.
31
 
32
+ Return the data in this exact structure:
 
 
 
 
 
 
 
33
  {
34
  "work_history_experience": {
35
  "positions": [
 
40
  "location": string,
41
  "employment_type": string,
42
  "adaptability": {
43
+ "career_shifts": [],
44
+ "upskilling": []
45
  },
46
+ "promotions": [],
47
+ "confidence": number
48
  }
49
  ]
50
  },
51
  "salary_compensation": {
52
  "history": [
53
  {
54
+ "base_salary": number,
55
+ "bonus_structure": string,
56
  "stock_options": {
57
  "type": string,
58
  "details": string
59
  },
60
+ "commission": null,
61
  "benefits": {
62
  "health": string,
63
  "pto": string,
64
  "retirement": string,
65
+ "other": []
66
  },
67
+ "confidence": number
68
  }
69
  ]
70
  },
71
  "skills_certifications": {
72
+ "hard_skills": [],
73
+ "soft_skills": [],
74
+ "certifications": [],
75
+ "licenses": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  },
77
  "education_learning": {
78
+ "formal_education": [],
 
 
 
 
 
 
 
 
 
 
 
 
79
  "online_courses": [],
80
  "executive_education": []
81
  },
82
  "personal_branding": {
83
  "portfolio": {
84
+ "github": null,
85
+ "behance": null,
86
+ "other": []
87
  },
88
  "blog_posts": [],
89
  "blockchain_projects": {
 
91
  "defi": [],
92
  "dapps": []
93
  },
 
94
  "social_media": {
95
  "platforms": [],
96
  "influence_metrics": {}
 
108
  "social_proof_networking": {
109
  "mentors": [],
110
  "references": [],
111
+ "memberships": [],
 
 
 
 
 
 
 
112
  "conference_engagement": []
113
  },
114
  "project_contributions": {
 
118
  "patents": [],
119
  "impact": {
120
  "description": string,
121
+ "metrics": [],
122
+ "confidence": number
123
  }
124
  },
125
  "work_performance_metrics": {
 
131
  }
132
  }
133
 
134
+ IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  class ProfileBuilder:
137
  def __init__(self):
 
139
  self.client = None
140
 
141
  def _initialize_client(self, api_key: str) -> None:
 
142
  if not api_key.startswith("sk-"):
143
  raise ValueError("Invalid API key format")
144
  self.client = AsyncOpenAI(api_key=api_key)
145
 
146
  async def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
 
147
  try:
148
  if not self.client:
149
  self._initialize_client(api_key)
150
 
 
151
  self.conversation_history.append({"role": "user", "content": message})
152
 
 
153
  completion = await self.client.chat.completions.create(
154
  model="gpt-4o-mini",
155
  messages=[
 
169
  return {"error": str(e)}
170
 
171
  async def generate_profile(self) -> Dict[str, Any]:
 
172
  try:
173
  if not self.client:
174
  raise ValueError("OpenAI client not initialized")
175
 
 
176
  conversation_text = "\n".join(
177
  f"{msg['role']}: {msg['content']}"
178
  for msg in self.conversation_history
179
  )
180
 
 
181
  completion = await self.client.chat.completions.create(
182
  model="gpt-4o-mini",
183
  messages=[
 
187
  temperature=0.3
188
  )
189
 
190
+ # Clean and parse the JSON response
191
+ response_text = completion.choices[0].message.content.strip()
192
+ profile_data = json.loads(response_text)
 
 
 
 
 
 
 
193
 
 
194
  profile = {
195
  "profile_data": profile_data,
 
196
  "metadata": {
197
  "generated_at": datetime.now().isoformat(),
198
  "conversation_length": len(self.conversation_history)
 
205
  with open(filename, 'w', encoding='utf-8') as f:
206
  json.dump(profile, f, indent=2)
207
 
208
+ return profile, filename
 
 
 
209
 
210
+ except json.JSONDecodeError as e:
211
+ logger.error(f"JSON parsing error: {str(e)}\nRaw output: {response_text}")
212
+ return {"error": "Failed to parse profile data"}, None
213
  except Exception as e:
214
  logger.error(f"Error generating profile: {str(e)}")
215
+ return {"error": str(e)}, None
216
 
217
  def create_gradio_interface():
 
218
  builder = ProfileBuilder()
219
 
220
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
239
 
240
  with gr.Column(scale=1):
241
  generate_btn = gr.Button("Generate Profile")
242
+ profile_output = gr.JSON(label="Generated Profile")
 
 
 
243
  download_btn = gr.File(label="Download Profile")
244
 
 
245
  async def on_message(message: str, history: List[List[str]], key: str):
246
  if not message.strip():
247
  return history, None
 
255
  return history, None
256
 
257
  async def on_generate():
258
+ profile, filename = await builder.generate_profile()
259
+ if "error" in profile:
260
+ return profile, None
261
+ return profile["profile_data"], filename
 
 
 
 
262
 
 
263
  msg.submit(
264
  on_message,
265
  inputs=[msg, chatbot, api_key],
 
274
 
275
  generate_btn.click(
276
  on_generate,
277
+ outputs=[profile_output, download_btn]
278
  )
279
 
280
  return demo
281
 
282
  if __name__ == "__main__":
283
  demo = create_gradio_interface()
284
+ demo.queue()
285
+ demo.launch(server_name="0.0.0.0", server_port=7860)