jzou19950715 commited on
Commit
210eb9d
·
verified ·
1 Parent(s): d3c9017

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +514 -535
app.py CHANGED
@@ -5,429 +5,531 @@ from datetime import datetime
5
  from typing import Dict, List, Optional, Any, Tuple
6
  from dataclasses import dataclass, field
7
 
8
- import openai # We'll use the official openai package
9
  import gradio as gr
10
 
11
-
12
  logging.basicConfig(
13
  level=logging.INFO,
14
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
  handlers=[
16
  logging.StreamHandler(),
17
- logging.FileHandler('app.log')
18
  ]
19
  )
20
  logger = logging.getLogger(__name__)
21
 
22
- SYSTEM_PROMPT = """
23
- You are LOSS DOG, a Career and Education Information Extraction Assistant, designed to help users craft a compelling
24
- and well-structured resume by extracting and organizing key details from conversations.
25
-
26
- Core Capabilities:
27
- - Proactively ask users about their career history, education, skills, certifications, projects, and achievements.
28
- - Guide users through a step-by-step process to ensure all essential resume details are collected.
29
- - Offer suggestions to improve clarity and impact based on best resume-building practices.
30
- - Maintain a friendly, engaging, and professional tone to encourage users to share relevant information.
31
- - Structure extracted data into well-organized resume sections.
32
-
33
- Your goal is to make resume-building effortless by asking the right questions, extracting key information,
34
- and presenting it in a clear, professional format.
35
- """
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  @dataclass
39
- class ExtractedInfo:
40
- """Structure for storing extracted information relevant to building a resume."""
41
- text: str
42
  category: str
 
43
  confidence: float
44
- timestamp: datetime = field(default_factory=datetime.now)
 
 
45
  metadata: Dict[str, Any] = field(default_factory=dict)
46
 
47
-
48
  @dataclass
49
  class ConversationState:
50
- """Tracks the conversation state and progress regarding extracted resume info."""
51
- extracted_items: List[ExtractedInfo] = field(default_factory=list)
52
- categories_covered: List[str] = field(default_factory=list)
 
 
53
  current_focus: Optional[str] = None
54
- completion_percentage: float = 0.0
55
- last_error: Optional[str] = None
56
- last_update: datetime = field(default_factory=datetime.now)
57
-
58
- def add_extracted_info(self, info: ExtractedInfo) -> None:
59
- """Add new extracted information and update state accordingly."""
60
- self.extracted_items.append(info)
61
- if info.category not in self.categories_covered:
62
- self.categories_covered.append(info.category)
63
- self.last_update = datetime.now()
64
- class InformationExtractor:
65
- """
66
- Core class for handling information extraction from user messages to build a structured resume.
67
 
68
- Attributes:
69
- conversation_history: A list of dictionaries storing each message and its role (user/assistant).
70
- state: An instance of ConversationState, which tracks the extraction progress and items.
71
- extraction_categories: A list of main categories we want to extract for building the resume.
72
  """
73
-
74
- def __init__(self) -> None:
75
- """Initialize the InformationExtractor with default settings."""
76
- self.conversation_history: List[Dict[str, str]] = []
 
 
 
 
77
  self.state = ConversationState()
78
- self.extraction_categories = [
79
- "personal_info",
80
- "education",
81
- "work_experience",
82
- "skills",
83
- "achievements"
84
- ]
85
- # We'll store the API key in a protected variable to re-use as needed
86
  self._api_key: Optional[str] = None
87
-
88
- def _validate_api_key(self, api_key: str) -> bool:
89
- """
90
- Validate the OpenAI API key format.
91
-
92
- Args:
93
- api_key: The user's OpenAI API key.
94
-
95
- Returns:
96
- True if the API key is valid, raises ValueError otherwise.
97
- """
98
- if not api_key.strip():
99
- raise ValueError("API key cannot be empty.")
100
- if not api_key.startswith("sk-"):
101
- raise ValueError("Invalid API key format. It must start with 'sk-'.")
102
- return True
103
 
104
  def _initialize_client(self, api_key: str) -> None:
105
- """
106
- Initialize openai with the given API key. Uses error handling to catch any issue.
107
-
108
- Args:
109
- api_key: The user's OpenAI API key.
110
- """
111
  try:
112
- if self._validate_api_key(api_key):
113
- openai.api_key = api_key
114
- self._api_key = api_key
 
 
115
  except Exception as e:
116
- logger.error(f"Error initializing OpenAI client: {str(e)}")
117
  raise
118
 
119
- def _add_to_history(self, role: str, content: str) -> None:
120
- """
121
- Add a message to the conversation history with a timestamp.
122
-
123
- Args:
124
- role: Either 'user' or 'assistant' to denote who sent the message.
125
- content: The message content.
126
- """
127
- self.conversation_history.append({
128
- "role": role,
129
- "content": content,
130
- "timestamp": datetime.now().isoformat()
131
- })
132
-
133
- def _get_ai_response(self, retries: int = 3) -> str:
134
- """
135
- Get an AI response from OpenAI's ChatCompletion endpoint.
136
-
137
- Args:
138
- retries: Number of times to retry upon failure.
139
-
140
- Returns:
141
- The text content of the AI's reply.
142
- """
143
- if not self._api_key:
144
- raise ValueError("OpenAI client not initialized (API key missing).")
145
-
146
- for attempt in range(retries):
147
- try:
148
- # We use a context manager for the create call
149
- with openai.ChatCompletion.create(
150
- model="gpt-4o-mini", # or "gpt-4" or any other available model
151
- messages=[
152
- {"role": "system", "content": SYSTEM_PROMPT},
153
- *[{"role": msg["role"], "content": msg["content"]} for msg in self.conversation_history]
154
- ],
155
- temperature=0.7,
156
- max_tokens=2000
157
- ) as response:
158
- return response["choices"][0]["message"]["content"]
159
- except Exception as e:
160
- logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
161
- if attempt == retries - 1:
162
- raise Exception(f"Failed after {retries} attempts: {str(e)}")
163
- return ""
164
-
165
- def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
166
- """
167
- Extract structured career and education-related information from the given text.
168
-
169
- Args:
170
- text: The combined user and AI text from which to extract relevant info.
171
-
172
- Returns:
173
- A list of ExtractedInfo objects with the extracted details.
174
- """
175
- if not self._api_key:
176
- raise ValueError("OpenAI client not initialized (API key missing).")
177
-
178
- # We'll ask GPT to produce JSON with extracted items
179
- extraction_prompt = f"""
180
- Analyze the following text and extract relevant information for resume building.
181
- Focus on these key categories: {', '.join(self.extraction_categories)}.
182
-
183
- For each piece of extracted data, output a JSON structure with:
184
- {{
185
- "extracted_items": [
186
- {{
187
- "text": "...",
188
- "category": "...",
189
- "confidence": 0.0,
190
- "metadata": {{ ... }}
191
- }},
192
- ...
193
  ]
194
- }}
195
-
196
- Text to analyze: {text}
197
- """
198
 
199
- try:
200
- with openai.ChatCompletion.create(
201
- model="gpt-4o-mini",
202
- messages=[
203
- {"role": "system", "content": SYSTEM_PROMPT},
204
- {"role": "user", "content": extraction_prompt}
205
- ],
206
  temperature=0.3,
207
- max_tokens=1000
208
- ) as response:
209
- raw_content = response["choices"][0]["message"]["content"]
210
-
211
- # Now parse the content
212
- analysis = json.loads(raw_content)
213
- extracted_items = []
214
-
215
- # This line must NOT be truncated or it will cause a syntax error
216
- for item in analysis.get("extracted_items", []):
217
- new_info = ExtractedInfo(
218
- text=item.get("text", ""),
219
- category=item.get("category", "misc"),
220
- confidence=float(item.get("confidence", 0.0)),
221
- metadata=item.get("metadata", {})
 
222
  )
223
- extracted_items.append(new_info)
224
 
225
- return extracted_items
 
 
226
 
227
- except json.JSONDecodeError as e:
228
- logger.error(f"Error parsing extraction response: {str(e)}")
229
- return []
230
  except Exception as e:
231
- logger.error(f"Error during information extraction: {str(e)}")
232
- return []
233
 
234
- def _update_completion_status(self) -> None:
235
- """
236
- Update completion status based on categories covered and confidence levels.
237
- """
238
- total_categories = len(self.extraction_categories)
239
- covered_categories = len(self.state.categories_covered)
240
- base_completion = (covered_categories / total_categories) * 100 if total_categories > 0 else 0
241
-
242
- if self.state.extracted_items:
243
- avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items)
244
- adjusted_completion = base_completion * avg_confidence
245
- else:
246
- adjusted_completion = 0.0
247
-
248
- # Cap at 100%
249
- self.state.completion_percentage = min(adjusted_completion, 100.0)
250
- def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
251
- """
252
- Process a user message:
253
- 1. Initialize OpenAI if needed,
254
- 2. Add user message to history,
255
- 3. Get AI response,
256
- 4. Extract resume information,
257
- 5. Update the conversation state,
258
- 6. Return structured data.
259
-
260
- Args:
261
- message: The user's chat input.
262
- api_key: The user's OpenAI API key.
263
-
264
- Returns:
265
- A dictionary with AI response, extracted info, and updated completion status.
266
- """
267
- # Always return a dictionary so that the UI can parse it
268
  try:
269
- if not message.strip():
270
- # Return placeholders if message is empty
271
- return {
272
- "response": "Please enter a message.",
273
- "extracted_info": [],
274
- "completion_status": {
275
- "percentage": self.state.completion_percentage,
276
- "categories_covered": self.state.categories_covered,
277
- "current_focus": self.state.current_focus
278
- },
279
- "analysis_text": "No new information extracted.",
280
- "history_message": "(No change in history)"
281
- }
282
-
283
- # Initialize the client if not done yet
284
- if not self._api_key:
285
- self._initialize_client(api_key)
286
-
287
- # Add user message to conversation history
288
- self._add_to_history("user", message)
289
- ai_response = self._get_ai_response()
290
- self._add_to_history("assistant", ai_response)
291
-
292
- # Extract new info from the full conversation
293
- new_info = self._extract_resume_information(text=message + "\n" + ai_response)
294
 
295
- # Update the conversation state
296
- for info_item in new_info:
297
- self.state.add_extracted_info(info_item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
 
 
 
 
 
 
 
 
 
 
 
299
  self._update_completion_status()
 
 
 
 
 
 
300
 
301
  return {
302
  "response": ai_response,
303
- "extracted_info": [
304
- {
305
- "text": info.text,
306
- "category": info.category,
307
- "confidence": info.confidence
308
  }
309
- for info in new_info
310
- ],
311
- "completion_status": {
312
- "percentage": self.state.completion_percentage,
313
- "categories_covered": self.state.categories_covered,
314
- "current_focus": self.state.current_focus
315
  },
316
- "analysis_text": (
317
- "Successfully extracted new information."
318
- if new_info else
319
- "No new information extracted."
320
- ),
321
- "history_message": f"Added user message '{message}' and assistant response to history."
322
  }
323
 
324
  except Exception as e:
325
- error_msg = f"Error processing message: {str(e)}"
326
- logger.error(error_msg)
327
- self.state.last_error = error_msg
328
  return {
329
- "response": "",
330
- "extracted_info": [],
331
- "completion_status": {
332
- "percentage": self.state.completion_percentage,
333
- "categories_covered": self.state.categories_covered,
334
- "current_focus": self.state.current_focus
335
- },
336
- "analysis_text": error_msg,
337
- "history_message": "(Processing failed)"
338
  }
339
 
340
- def generate_output(self) -> Dict[str, Any]:
341
- """
342
- Generate structured JSON output containing all extracted information,
343
- store it in a file, and return the file name and content.
344
-
345
- Returns:
346
- A dict with fields: filename, content, and status.
347
- """
348
  try:
349
- categorized_info = {}
350
- for category in self.extraction_categories:
351
- items_in_cat = [
352
- {
353
- "text": item.text,
354
- "confidence": item.confidence,
355
- "timestamp": item.timestamp.isoformat(),
356
- "metadata": item.metadata
357
  }
358
- for item in self.state.extracted_items
359
- if item.category == category
360
- ]
361
- if items_in_cat:
362
- categorized_info[category] = items_in_cat
363
-
364
- output = {
365
- "extracted_information": categorized_info,
366
- "analysis_summary": {
367
- "total_items": len(self.state.extracted_items),
368
- "categories_covered": self.state.categories_covered,
369
- "completion_percentage": self.state.completion_percentage
370
  },
371
  "metadata": {
372
  "generated_at": datetime.now().isoformat(),
373
- "conversation_length": len(self.conversation_history),
374
- "version": "2.0"
 
 
375
  }
376
  }
377
 
 
378
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
379
- filename = f"extracted_info_{timestamp}.json"
380
-
381
- # Use a context manager for safe file operations
382
  with open(filename, 'w', encoding='utf-8') as f:
383
- json.dump(output, f, indent=2, ensure_ascii=False)
384
 
 
385
  return {
 
386
  "filename": filename,
387
- "content": output,
388
  "status": "success"
389
  }
390
 
391
  except Exception as e:
392
- error_msg = f"Error generating output: {str(e)}"
393
- logger.error(error_msg)
394
  return {
395
- "error": error_msg,
396
  "status": "error"
397
  }
398
 
399
-
400
  def create_gradio_interface() -> gr.Blocks:
401
- """
402
- Create the Gradio interface for the InformationExtractor.
403
-
404
- Returns:
405
- The gradio Blocks application interface object.
406
- """
407
- extractor = InformationExtractor()
408
-
409
- css = """
410
- .container { max-width: 900px; margin: auto; }
411
- .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
412
- .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; }
413
- .status-badge {
414
- display: inline-block;
415
- padding: 0.25rem 0.5rem;
416
- border-radius: 0.25rem;
417
- margin: 0.25rem;
418
- background: #e0e0e0;
419
- }
420
- .extraction-highlight {
421
- background: #e8f4f8;
422
- border-left: 4px solid #4a90e2;
423
- padding: 0.5rem;
424
- margin: 0.5rem 0;
425
- }
426
- """
427
-
428
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
429
- gr.Markdown("# 🔍 Information Extraction Assistant\n")
430
 
 
 
 
431
  with gr.Row():
432
  with gr.Column(scale=2):
433
  api_key = gr.Textbox(
@@ -435,212 +537,89 @@ def create_gradio_interface() -> gr.Blocks:
435
  type="password",
436
  placeholder="Enter your OpenAI API key (sk-...)"
437
  )
438
-
439
  chatbot = gr.Chatbot(
440
  label="Conversation",
441
- value=[],
442
  height=400
443
  )
444
-
445
  with gr.Row():
446
  msg = gr.Textbox(
447
  label="Message",
448
- placeholder="Type your message here..."
449
  )
450
- submit = gr.Button("Send", variant="primary")
451
-
452
- with gr.Row():
453
- clear = gr.Button("Clear Chat")
454
- generate = gr.Button("Generate Report", variant="secondary")
455
 
456
  with gr.Column(scale=1):
457
- with gr.Group():
458
- gr.Markdown("### Extraction Progress")
459
- progress = gr.Slider(
460
- label="Completion",
461
- minimum=0,
462
- maximum=100,
463
- value=0,
464
- interactive=False
465
- )
466
- categories_covered = gr.JSON(label="Categories Covered", value={})
467
- current_focus = gr.Textbox(
468
- label="Current Focus",
469
- value="Not started",
470
- interactive=False
471
- )
472
-
473
- with gr.Tabs() as result_tabs:
474
- with gr.Tab("Extracted Information"):
475
- extracted_info = gr.JSON(label="Extracted Details", value={})
476
- with gr.Tab("Download"):
477
- file_output = gr.File(label="Download Report")
478
- with gr.Tab("Analysis"):
479
- analysis_text = gr.Markdown("Analysis will appear here after processing.")
480
-
481
- def format_extraction_summary(extracted_items_list: List[Dict[str, Any]]) -> str:
482
- """
483
- Utility function to format extracted data for user-friendly display.
484
-
485
- Args:
486
- extracted_items_list: List of dictionaries with 'category', 'confidence', and 'text'.
487
-
488
- Returns:
489
- A string summary of the extracted items.
490
- """
491
- if not extracted_items_list:
492
- return "No information extracted yet."
493
- lines = ["### Recently Extracted Information"]
494
- for itm in extracted_items_list:
495
- lines.append(
496
- f"- **{itm['category']}** ({itm['confidence']*100:.1f}% confidence)\n"
497
- f" {itm['text']}"
498
- )
499
- return "\n".join(lines)
500
-
501
- def process_message(user_input: str, history: List[Dict[str, str]], key: str) -> Tuple[Any, float, Dict[str, Any], str, str]:
502
- """
503
- Event handler to process a user message. Returns a 5-element tuple matching the
504
- outputs: (new_chat_history, progress_value, categories_json, focus_text, analysis_message).
505
-
506
- Args:
507
- user_input: The current user message.
508
- history: The existing chat history.
509
- key: The user's OpenAI API key.
510
-
511
- Returns:
512
- A tuple with updated chatbot messages, progress, categories_covered, current_focus, and analysis text.
513
- """
514
- result = extractor.process_message(user_input, key)
515
-
516
- # Update chat history: append the user message + assistant response
517
- history.append({"role": "user", "content": user_input})
518
- history.append({"role": "assistant", "content": result["response"]})
519
-
520
- # Update progress
521
- prog_val = result["completion_status"]["percentage"]
522
- cat_cov = {"categories": result["completion_status"]["categories_covered"]}
523
- focus_val = result["completion_status"]["current_focus"] or "Not specified"
524
-
525
- # If we have newly extracted info, let's show it
526
- extract_list = result.get("extracted_info", [])
527
- if extract_list:
528
- analysis = format_extraction_summary(extract_list)
529
- else:
530
- analysis = result["analysis_text"]
531
-
532
- return history, prog_val, cat_cov, focus_val, analysis
533
-
534
- def generate_report() -> Tuple[Optional[str], Dict[str, Any], str, str]:
535
- """
536
- Generate a JSON report of extracted resume info.
537
-
538
- Returns:
539
- A tuple of: (filename, extracted_json, focus_message, analysis_text).
540
- """
541
- gen_result = extractor.generate_output()
542
- if gen_result["status"] == "success":
543
- filename = gen_result["filename"]
544
- content = gen_result["content"]
545
-
546
- # Summarize categories, etc. for user
547
- content_preview = {
548
- "summary": content["analysis_summary"],
549
- "categories": list(content["extracted_information"].keys()),
550
- "total_items": len(content["extracted_information"])
551
- }
552
-
553
- # Flatten everything for a final analysis string
554
- flat_items = []
555
- for cat_key, cat_items in content["extracted_information"].items():
556
- for item_data in cat_items:
557
- flat_items.append({
558
- "category": cat_key,
559
- "confidence": item_data["confidence"],
560
- "text": item_data["text"]
561
- })
562
-
563
- final_analysis = format_extraction_summary(flat_items)
564
- return filename, content_preview, "Report generated successfully!", final_analysis
565
- else:
566
- return None, {"error": gen_result["error"]}, "Error generating report.", "No analysis."
567
-
568
- def clear_interface() -> Tuple[List[Dict[str, str]], float, Dict[str, Any], str, Dict[str, Any], None, str, str]:
569
- """
570
- Reset all UI components to their initial state.
571
-
572
- Returns:
573
- A tuple specifying the reset states of:
574
- - Chatbot
575
- - Progress
576
- - Categories
577
- - Current Focus
578
- - Extracted Info
579
- - File Output
580
- - Analysis
581
- - Message Box
582
- """
583
- # Re-instantiate the extractor to clear its internal state
584
- global extractor
585
- extractor = InformationExtractor()
586
-
587
- return [], 0.0, {"categories": []}, "Not started", {}, None, "Ready to start new extraction.", ""
588
 
589
  # Bind events
590
  msg.submit(
591
- fn=process_message,
592
  inputs=[msg, chatbot, api_key],
593
- outputs=[chatbot, progress, categories_covered, current_focus, analysis_text]
594
- ).then(lambda: "", None, msg)
595
-
596
- submit.click(
597
- fn=process_message,
598
  inputs=[msg, chatbot, api_key],
599
- outputs=[chatbot, progress, categories_covered, current_focus, analysis_text]
600
  ).then(lambda: "", None, msg)
601
-
602
- generate.click(
603
- fn=generate_report,
604
- outputs=[file_output, extracted_info, current_focus, analysis_text]
605
- )
606
-
607
- clear.click(
608
- fn=clear_interface,
609
- outputs=[
610
- chatbot,
611
- progress,
612
- categories_covered,
613
- current_focus,
614
- extracted_info,
615
- file_output,
616
- analysis_text,
617
- msg
618
- ]
619
  )
620
 
621
  return demo
622
 
623
-
624
- def main() -> None:
625
- """
626
- Main function to launch the Gradio application on port 7860, with share=True.
627
- """
628
- logging.basicConfig(
629
- level=logging.INFO,
630
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
631
- )
632
-
633
- demo_app = create_gradio_interface()
634
- try:
635
- demo_app.launch(
636
- server_name="0.0.0.0",
637
- server_port=7860,
638
- share=True,
639
- show_api=False
640
- )
641
- except Exception as e:
642
- logger.error(f"Application failed to start: {str(e)}")
643
-
644
-
645
  if __name__ == "__main__":
646
- main()
 
 
 
 
 
 
5
  from typing import Dict, List, Optional, Any, Tuple
6
  from dataclasses import dataclass, field
7
 
8
+ import openai
9
  import gradio as gr
10
 
11
+ # Set up logging
12
  logging.basicConfig(
13
  level=logging.INFO,
14
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
  handlers=[
16
  logging.StreamHandler(),
17
+ logging.FileHandler('loss_dog.log')
18
  ]
19
  )
20
  logger = logging.getLogger(__name__)
21
 
22
+ # System Prompts
23
+ CONVERSATION_PROMPT = '''
24
+ <?xml version="1.0" encoding="UTF-8"?>
25
+ <system_prompt>
26
+ <assistant_identity>
27
+ <name>LOSS DOG</name>
28
+ <role>Digital Profile Assistant</role>
29
+ <purpose>To help users build comprehensive professional and digital presence profiles through natural, comfortable conversation</purpose>
30
+ </assistant_identity>
31
+
32
+ <core_personality>
33
+ <traits>
34
+ <trait>Friendly and approachable</trait>
35
+ <trait>Attentive listener</trait>
36
+ <trait>Professionally insightful</trait>
37
+ <trait>Respectful of boundaries</trait>
38
+ <trait>Naturally curious</trait>
39
+ </traits>
40
+ <voice_characteristics>
41
+ <characteristic>Warm and encouraging</characteristic>
42
+ <characteristic>Clear and professional</characteristic>
43
+ <characteristic>Adaptable to user's style</characteristic>
44
+ </voice_characteristics>
45
+ </core_personality>
46
+
47
+ <information_categories>
48
+ <category name="professional_background">
49
+ <fields>
50
+ <field>Current role and responsibilities</field>
51
+ <field>Career history and progression</field>
52
+ <field>Notable achievements</field>
53
+ <field>Professional goals</field>
54
+ </fields>
55
+ <approach>
56
+ <step>Begin with current role</step>
57
+ <step>Explore career journey naturally</step>
58
+ <step>Discuss achievements organically</step>
59
+ <step>Note specific metrics when shared</step>
60
+ </approach>
61
+ </category>
62
+
63
+ <category name="education_training">
64
+ <fields>
65
+ <field>Formal education</field>
66
+ <field>Certifications</field>
67
+ <field>Specialized training</field>
68
+ <field>Continuous learning</field>
69
+ </fields>
70
+ <data_points>
71
+ <point>Institution names</point>
72
+ <point>Degree details</point>
73
+ <point>Time periods</point>
74
+ <point>Special achievements</point>
75
+ </data_points>
76
+ </category>
77
+
78
+ <category name="skills_expertise">
79
+ <fields>
80
+ <field>Technical skills</field>
81
+ <field>Soft skills</field>
82
+ <field>Tools and technologies</field>
83
+ <field>Domain expertise</field>
84
+ </fields>
85
+ <metrics>
86
+ <metric>Proficiency levels</metric>
87
+ <metric>Years of experience</metric>
88
+ <metric>Project applications</metric>
89
+ </metrics>
90
+ </category>
91
+
92
+ <category name="digital_presence">
93
+ <fields>
94
+ <field>Social media impact</field>
95
+ <field>Content creation</field>
96
+ <field>Community engagement</field>
97
+ <field>Digital assets</field>
98
+ </fields>
99
+ <metrics>
100
+ <metric>Follower counts</metric>
101
+ <metric>Engagement rates</metric>
102
+ <metric>Content reach</metric>
103
+ <metric>Portfolio value</metric>
104
+ </metrics>
105
+ </category>
106
+
107
+ <category name="projects_contributions">
108
+ <fields>
109
+ <field>Major projects</field>
110
+ <field>Open source contributions</field>
111
+ <field>Creative works</field>
112
+ <field>Impact metrics</field>
113
+ </fields>
114
+ <data_collection>
115
+ <point>Project descriptions</point>
116
+ <point>Role and responsibilities</point>
117
+ <point>Technologies used</point>
118
+ <point>Measurable outcomes</point>
119
+ </data_collection>
120
+ </category>
121
+ </information_categories>
122
+
123
+ <conversation_strategies>
124
+ <engagement_patterns>
125
+ <pattern type="initial_contact">
126
+ <approach>Open with friendly, professional greeting</approach>
127
+ <focus>Establish comfortable rapport</focus>
128
+ <goal>Begin natural information gathering</goal>
129
+ </pattern>
130
+
131
+ <pattern type="information_gathering">
132
+ <approach>Use natural conversation flow</approach>
133
+ <focus>Follow user's narrative</focus>
134
+ <goal>Collect relevant details organically</goal>
135
+ </pattern>
136
+
137
+ <pattern type="follow_up">
138
+ <approach>Ask relevant, contextual questions</approach>
139
+ <focus>Deepen understanding of shared information</focus>
140
+ <goal>Gather additional context and details</goal>
141
+ </pattern>
142
+ </engagement_patterns>
143
+
144
+ <response_handling>
145
+ <scenario type="shared_information">
146
+ <action>Acknowledge and validate</action>
147
+ <action>Note key points</action>
148
+ <action>Ask natural follow-up if appropriate</action>
149
+ </scenario>
150
+
151
+ <scenario type="hesitation">
152
+ <action>Respect boundaries</action>
153
+ <action>Shift to comfortable topics</action>
154
+ <action>Leave door open for later sharing</action>
155
+ </scenario>
156
+
157
+ <scenario type="completion">
158
+ <action>Summarize collected information</action>
159
+ <action>Verify accuracy</action>
160
+ <action>Transition smoothly to next topic</action>
161
+ </scenario>
162
+ </response_handling>
163
+ </conversation_strategies>
164
+
165
+ <output_guidelines>
166
+ <quality_standards>
167
+ <standard>Professional language</standard>
168
+ <standard>Accurate representation</standard>
169
+ <standard>Structured organization</standard>
170
+ <standard>Clear categorization</standard>
171
+ </quality_standards>
172
+ </output_guidelines>
173
+
174
+ <ethics_guidelines>
175
+ <principle>Respect user privacy</principle>
176
+ <principle>Never pressure for information</principle>
177
+ <principle>Maintain professional boundaries</principle>
178
+ <principle>Ensure data accuracy</principle>
179
+ </ethics_guidelines>
180
+ </system_prompt>
181
+ '''
182
+
183
+ EXTRACTION_PROMPT = '''
184
+ <?xml version="1.0" encoding="UTF-8"?>
185
+ <system_prompt>
186
+ <assistant_identity>
187
+ <name>LOSS DOG - Information Processor</name>
188
+ <role>Conversation Analyzer and Information Extractor</role>
189
+ <purpose>Process conversation history to extract and structure professional profile information</purpose>
190
+ </assistant_identity>
191
+
192
+ <task_description>
193
+ Your task is to analyze the provided conversation history and extract structured profile information:
194
+ 1. Process natural conversation into structured data
195
+ 2. Identify and categorize relevant information
196
+ 3. Make intelligent inferences when appropriate
197
+ 4. Maintain high accuracy and data quality
198
+ 5. Handle messy or non-linear conversation flows
199
+ </task_description>
200
+
201
+ <extraction_guidelines>
202
+ <primary_objective>
203
+ Convert conversation data into clean, structured JSON that matches these categories:
204
+ - personal_info (name, contact, location)
205
+ - education (degree, institution, field, dates)
206
+ - work_experience (title, company, duration, responsibilities)
207
+ - skills (technical, soft_skills, tools)
208
+ - achievements (awards, publications, projects)
209
+ - digital_presence (social_media, content_creation, community_impact)
210
+ </primary_objective>
211
+
212
+ <processing_rules>
213
+ <rule>Focus on factual information over casual conversation</rule>
214
+ <rule>Handle partial or incomplete information gracefully</rule>
215
+ <rule>Use context to resolve ambiguities</rule>
216
+ <rule>Track confidence levels for all extracted data</rule>
217
+ <rule>Mark any inferred information clearly</rule>
218
+ <rule>Maintain source context for future reference</rule>
219
+ </processing_rules>
220
+
221
+ <data_handling>
222
+ <instruction>For each piece of extracted information, provide:
223
+ - Category classification
224
+ - Confidence score (0.0-1.0)
225
+ - Source context (relevant conversation snippet)
226
+ - List of any inferred fields
227
+ - Structured data in appropriate format
228
+ </instruction>
229
+ </data_handling>
230
+ </extraction_guidelines>
231
+
232
+ <output_format>
233
+ <format_rules>
234
+ <rule>Return JSON object with categorized sections</rule>
235
+ <rule>Include confidence scores (0.0-1.0) for each section</rule>
236
+ <rule>Mark inferred information with "inferred": true</rule>
237
+ <rule>Include source context for traceability</rule>
238
+ <rule>Use consistent date formats (YYYY-MM-DD where possible)</rule>
239
+ </format_rules>
240
+
241
+ <structure>
242
+ {
243
+ "category_name": {
244
+ "data": {
245
+ // Structured data specific to category
246
+ },
247
+ "confidence": float,
248
+ "source_context": string,
249
+ "inferred_fields": [string],
250
+ "metadata": {
251
+ // Additional category-specific metadata
252
+ }
253
+ }
254
+ }
255
+ </structure>
256
+ </output_format>
257
+
258
+ <quality_controls>
259
+ <validations>
260
+ <validation>Check date consistency and sequences</validation>
261
+ <validation>Verify logical relationships between entries</validation>
262
+ <validation>Ensure required fields are present or marked missing</validation>
263
+ <validation>Confirm confidence scores are justified</validation>
264
+ </validations>
265
+
266
+ <error_handling>
267
+ <case>Handle conflicting information by preferring most recent/confident</case>
268
+ <case>Mark ambiguous information with multiple possible interpretations</case>
269
+ <case>Skip unverifiable information rather than making weak inferences</case>
270
+ </error_handling>
271
+ </quality_controls>
272
+ </system_prompt>
273
+ '''
274
 
275
  @dataclass
276
+ class ProfileSection:
277
+ """Represents a section of the professional profile with structured data."""
 
278
  category: str
279
+ data: Dict[str, Any]
280
  confidence: float
281
+ source_context: str
282
+ inferred_fields: List[str] = field(default_factory=list)
283
+ last_updated: datetime = field(default_factory=datetime.now)
284
  metadata: Dict[str, Any] = field(default_factory=dict)
285
 
 
286
  @dataclass
287
  class ConversationState:
288
+ """Tracks the state of the information gathering conversation."""
289
+ collected_sections: Dict[str, ProfileSection] = field(default_factory=dict)
290
+ missing_information: List[str] = field(default_factory=list)
291
+ conversation_history: List[Dict[str, str]] = field(default_factory=list)
292
+ completion_status: Dict[str, float] = field(default_factory=dict)
293
  current_focus: Optional[str] = None
294
+ extraction_history: List[Dict[str, Any]] = field(default_factory=list)
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ class ProfileBuilder:
 
 
 
297
  """
298
+ Core class for building professional profiles through conversation and extraction.
299
+ Implements two-phase approach:
300
+ 1. Interactive conversation for information gathering
301
+ 2. Structured information extraction and processing
302
+ """
303
+
304
+ def __init__(self):
305
+ """Initialize the ProfileBuilder with default settings."""
306
  self.state = ConversationState()
307
+ self.required_sections = {
308
+ "personal_info": ["name", "contact", "location"],
309
+ "education": ["degree", "institution", "field", "dates"],
310
+ "work_experience": ["title", "company", "duration", "responsibilities"],
311
+ "skills": ["technical", "soft_skills", "tools"],
312
+ "achievements": ["awards", "publications", "projects"],
313
+ "digital_presence": ["platforms", "metrics", "content"]
314
+ }
315
  self._api_key: Optional[str] = None
316
+ self._setup_logging()
317
+
318
+ def _setup_logging(self) -> None:
319
+ """Configure logging for the profile builder."""
320
+ self.logger = logging.getLogger(__name__)
321
+ handler = logging.FileHandler('profile_builder.log')
322
+ handler.setFormatter(logging.Formatter(
323
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
324
+ ))
325
+ self.logger.addHandler(handler)
326
+ self.logger.setLevel(logging.INFO)
 
 
 
 
 
327
 
328
  def _initialize_client(self, api_key: str) -> None:
329
+ """Initialize OpenAI client with API key."""
 
 
 
 
 
330
  try:
331
+ if not api_key.startswith("sk-"):
332
+ raise ValueError("Invalid API key format")
333
+ self._api_key = api_key
334
+ openai.api_key = api_key
335
+ self.logger.info("OpenAI client initialized successfully")
336
  except Exception as e:
337
+ self.logger.error(f"Failed to initialize OpenAI client: {str(e)}")
338
  raise
339
 
340
+ async def _extract_information(self) -> Dict[str, ProfileSection]:
341
+ """Extract structured information from the conversation history."""
342
+ try:
343
+ # Prepare conversation context
344
+ conversation_text = "\n".join(
345
+ f"{msg['role']}: {msg['content']}"
346
+ for msg in self.state.conversation_history
347
+ )
348
+
349
+ messages = [
350
+ {"role": "system", "content": EXTRACTION_PROMPT},
351
+ {"role": "user", "content": f"Extract professional profile information from this conversation:\n\n{conversation_text}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  ]
 
 
 
 
353
 
354
+ response = await openai.ChatCompletion.acreate(
355
+ model="gpt-4",
356
+ messages=messages,
 
 
 
 
357
  temperature=0.3,
358
+ max_tokens=2000
359
+ )
360
+
361
+ # Parse and validate the response
362
+ extracted_data = self._parse_extraction_response(response.choices[0].message.content)
363
+
364
+ # Convert to ProfileSection objects
365
+ sections = {}
366
+ for category, data in extracted_data.items():
367
+ sections[category] = ProfileSection(
368
+ category=category,
369
+ data=data.get("data", {}),
370
+ confidence=data.get("confidence", 0.0),
371
+ source_context=data.get("source_context", ""),
372
+ inferred_fields=data.get("inferred_fields", []),
373
+ metadata=data.get("metadata", {})
374
  )
 
375
 
376
+ # Log extraction results
377
+ self.logger.info(f"Successfully extracted information for {len(sections)} sections")
378
+ return sections
379
 
 
 
 
380
  except Exception as e:
381
+ self.logger.error(f"Error in extraction phase: {str(e)}")
382
+ raise
383
 
384
+ def _parse_extraction_response(self, response_text: str) -> Dict[str, Any]:
385
+ """Parse and validate the extraction response."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  try:
387
+ extracted_data = json.loads(response_text)
388
+ self._validate_extracted_data(extracted_data)
389
+ return extracted_data
390
+ except json.JSONDecodeError as e:
391
+ self.logger.error(f"Failed to parse extraction response: {str(e)}")
392
+ return {}
393
+ except Exception as e:
394
+ self.logger.error(f"Error processing extraction response: {str(e)}")
395
+ return {}
396
+
397
+ def _validate_extracted_data(self, data: Dict[str, Any]) -> None:
398
+ """Validate the structure and content of extracted data."""
399
+ required_keys = ["data", "confidence", "source_context"]
400
+ for category, section in data.items():
401
+ missing_keys = [key for key in required_keys if key not in section]
402
+ if missing_keys:
403
+ self.logger.warning(f"Missing required keys {missing_keys} in category {category}")
404
+ raise ValueError(f"Invalid data structure for category {category}")
 
 
 
 
 
 
 
405
 
406
+ def _update_completion_status(self) -> None:
407
+ """Update the completion status based on collected information."""
408
+ status = {}
409
+ for section, required_fields in self.required_sections.items():
410
+ if section in self.state.collected_sections:
411
+ profile_section = self.state.collected_sections[section]
412
+ fields_present = sum(
413
+ 1 for field in required_fields
414
+ if field in profile_section.data
415
+ )
416
+ confidence_factor = profile_section.confidence
417
+ status[section] = (fields_present / len(required_fields)) * confidence_factor
418
+ else:
419
+ status[section] = 0.0
420
+
421
+ self.state.completion_status = status
422
+ self.logger.info(f"Updated completion status: {status}")
423
+
424
+ def _get_missing_information(self) -> List[str]:
425
+ """Identify missing required information."""
426
+ missing = []
427
+ for section, required_fields in self.required_sections.items():
428
+ if section not in self.state.collected_sections:
429
+ missing.extend([f"{section}.{field}" for field in required_fields])
430
+ else:
431
+ profile_section = self.state.collected_sections[section]
432
+ missing.extend([
433
+ f"{section}.{field}"
434
+ for field in required_fields
435
+ if field not in profile_section.data
436
+ ])
437
+ return missing
438
+
439
+ async def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
440
+ """Process a user message through both conversation and extraction phases."""
441
+ if not self._api_key:
442
+ self._initialize_client(api_key)
443
 
444
+ try:
445
+ # Phase 1: Conversation
446
+ self.state.conversation_history.append({"role": "user", "content": message})
447
+ ai_response = await self._get_conversation_response(message)
448
+ self.state.conversation_history.append({"role": "assistant", "content": ai_response})
449
+
450
+ # Phase 2: Information Extraction
451
+ extracted_sections = await self._extract_information()
452
+
453
+ # Update state with new information
454
+ self.state.collected_sections.update(extracted_sections)
455
  self._update_completion_status()
456
+
457
+ # Track extraction history
458
+ self.state.extraction_history.append({
459
+ "timestamp": datetime.now().isoformat(),
460
+ "sections_extracted": list(extracted_sections.keys())
461
+ })
462
 
463
  return {
464
  "response": ai_response,
465
+ "extracted_sections": {
466
+ category: {
467
+ "data": section.data,
468
+ "confidence": section.confidence,
469
+ "inferred_fields": section.inferred_fields
470
  }
471
+ for category, section in extracted_sections.items()
 
 
 
 
 
472
  },
473
+ "completion_status": self.state.completion_status,
474
+ "missing_information": self._get_missing_information()
 
 
 
 
475
  }
476
 
477
  except Exception as e:
478
+ self.logger.error(f"Error processing message: {str(e)}")
 
 
479
  return {
480
+ "error": str(e),
481
+ "completion_status": self.state.completion_status
 
 
 
 
 
 
 
482
  }
483
 
484
+ def generate_profile(self) -> Dict[str, Any]:
485
+ """Generate the final structured profile with all collected information."""
 
 
 
 
 
 
486
  try:
487
+ profile = {
488
+ "profile_data": {
489
+ category: {
490
+ "data": section.data,
491
+ "confidence": section.confidence,
492
+ "inferred_fields": section.inferred_fields,
493
+ "metadata": section.metadata
 
494
  }
495
+ for category, section in self.state.collected_sections.items()
 
 
 
 
 
 
 
 
 
 
 
496
  },
497
  "metadata": {
498
  "generated_at": datetime.now().isoformat(),
499
+ "completion_status": self.state.completion_status,
500
+ "missing_information": self._get_missing_information(),
501
+ "conversation_length": len(self.state.conversation_history),
502
+ "extraction_history": self.state.extraction_history
503
  }
504
  }
505
 
506
+ # Save profile to file
507
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
508
+ filename = f"profile_{timestamp}.json"
 
 
509
  with open(filename, 'w', encoding='utf-8') as f:
510
+ json.dump(profile, f, indent=2, ensure_ascii=False)
511
 
512
+ self.logger.info(f"Generated profile saved to {filename}")
513
  return {
514
+ "profile": profile,
515
  "filename": filename,
 
516
  "status": "success"
517
  }
518
 
519
  except Exception as e:
520
+ self.logger.error(f"Error generating profile: {str(e)}")
 
521
  return {
522
+ "error": str(e),
523
  "status": "error"
524
  }
525
 
 
526
  def create_gradio_interface() -> gr.Blocks:
527
+ """Create the Gradio interface for the profile builder."""
528
+ builder = ProfileBuilder()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
531
+ gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
532
+
533
  with gr.Row():
534
  with gr.Column(scale=2):
535
  api_key = gr.Textbox(
 
537
  type="password",
538
  placeholder="Enter your OpenAI API key (sk-...)"
539
  )
540
+
541
  chatbot = gr.Chatbot(
542
  label="Conversation",
 
543
  height=400
544
  )
545
+
546
  with gr.Row():
547
  msg = gr.Textbox(
548
  label="Message",
549
+ placeholder="Chat with LOSS DOG..."
550
  )
551
+ send = gr.Button("Send", variant="primary")
 
 
 
 
552
 
553
  with gr.Column(scale=1):
554
+ with gr.Tabs():
555
+ with gr.Tab("Extracted Info"):
556
+ extracted_info = gr.JSON(
557
+ label="Extracted Information",
558
+ show_label=True
559
+ )
560
+ with gr.Tab("Progress"):
561
+ completion = gr.JSON(
562
+ label="Completion Status",
563
+ show_label=True
564
+ )
565
+ missing = gr.JSON(
566
+ label="Missing Information",
567
+ show_label=True
568
+ )
569
+
570
+ generate_btn = gr.Button("Generate Profile", variant="secondary")
571
+ profile_output = gr.JSON(label="Generated Profile")
572
+ download_btn = gr.File(label="Download Profile")
573
+
574
+ # Event handlers
575
+ async def on_message(message: str, history: List[List[str]], key: str) -> Tuple[Any, Any, Any, Any]:
576
+ if not message.strip():
577
+ return history, None, None, None
578
+
579
+ result = await builder.process_message(message, key)
580
+
581
+ if "error" in result:
582
+ return history, None, None, {"error": result["error"]}
583
+
584
+ history = history + [[message, result["response"]]]
585
+
586
+ return (
587
+ history,
588
+ result["extracted_sections"],
589
+ result["completion_status"],
590
+ result["missing_information"]
591
+ )
592
+
593
+ def on_generate() -> Tuple[Dict[str, Any], str]:
594
+ result = builder.generate_profile()
595
+ if result["status"] == "success":
596
+ return result["profile"], result["filename"]
597
+ return {"error": result["error"]}, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
  # Bind events
600
  msg.submit(
601
+ on_message,
602
  inputs=[msg, chatbot, api_key],
603
+ outputs=[chatbot, extracted_info, completion, missing]
604
+ ).then(lambda: "", None, msg) # Clear message box after sending
605
+
606
+ send.click(
607
+ on_message,
608
  inputs=[msg, chatbot, api_key],
609
+ outputs=[chatbot, extracted_info, completion, missing]
610
  ).then(lambda: "", None, msg)
611
+
612
+ generate_btn.click(
613
+ on_generate,
614
+ outputs=[profile_output, download_btn]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  )
616
 
617
  return demo
618
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
  if __name__ == "__main__":
620
+ demo = create_gradio_interface()
621
+ demo.launch(
622
+ server_name="0.0.0.0",
623
+ server_port=7860,
624
+ share=True
625
+ )