jzou19950715 commited on
Commit
50e3198
·
verified ·
1 Parent(s): d965c95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -379
app.py CHANGED
@@ -4,13 +4,11 @@ import os
4
  from datetime import datetime
5
  from typing import Dict, List, Optional, Any, Tuple
6
  from dataclasses import dataclass, field
7
- from pathlib import Path
8
 
9
- # Third-party imports
10
  import gradio as gr
11
- from openai import OpenAI
12
 
13
- # Configure logging
14
  logging.basicConfig(
15
  level=logging.INFO,
16
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -21,9 +19,9 @@ logging.basicConfig(
21
  )
22
  logger = logging.getLogger(__name__)
23
 
24
- # System prompt for the AI assistant
25
  SYSTEM_PROMPT = """
26
- You are LOSS DOG, a Career and Education Information Extraction Assistant, designed to help users craft a compelling and well-structured resume by extracting and organizing key details from conversations.
 
27
 
28
  Core Capabilities:
29
  - Proactively ask users about their career history, education, skills, certifications, projects, and achievements.
@@ -32,21 +30,24 @@ Core Capabilities:
32
  - Maintain a friendly, engaging, and professional tone to encourage users to share relevant information.
33
  - Structure extracted data into well-organized resume sections.
34
 
35
- Your goal is to make resume-building effortless by asking the right questions, extracting key information, and presenting it in a clear, professional format.
 
36
  """
37
 
 
38
  @dataclass
39
  class ExtractedInfo:
40
- """Structure for storing extracted information."""
41
  text: str
42
  category: str
43
  confidence: float
44
  timestamp: datetime = field(default_factory=datetime.now)
45
  metadata: Dict[str, Any] = field(default_factory=dict)
46
 
 
47
  @dataclass
48
  class ConversationState:
49
- """Tracks the state and progress of the conversation."""
50
  extracted_items: List[ExtractedInfo] = field(default_factory=list)
51
  categories_covered: List[str] = field(default_factory=list)
52
  current_focus: Optional[str] = None
@@ -55,19 +56,25 @@ class ConversationState:
55
  last_update: datetime = field(default_factory=datetime.now)
56
 
57
  def add_extracted_info(self, info: ExtractedInfo) -> None:
58
- """Add new extracted information and update state."""
59
  self.extracted_items.append(info)
60
  if info.category not in self.categories_covered:
61
  self.categories_covered.append(info.category)
62
  self.last_update = datetime.now()
63
-
64
  class InformationExtractor:
65
- """Core class for handling information extraction from conversations."""
66
-
67
- def __init__(self):
 
 
 
 
 
 
 
 
68
  self.conversation_history: List[Dict[str, str]] = []
69
  self.state = ConversationState()
70
- self.client: Optional[OpenAI] = None
71
  self.extraction_categories = [
72
  "personal_info",
73
  "education",
@@ -75,26 +82,48 @@ class InformationExtractor:
75
  "skills",
76
  "achievements"
77
  ]
78
-
 
 
79
  def _validate_api_key(self, api_key: str) -> bool:
80
- """Validate OpenAI API key format."""
 
 
 
 
 
 
 
 
81
  if not api_key.strip():
82
- raise ValueError("API key cannot be empty")
83
- if not api_key.startswith('sk-'):
84
- raise ValueError("Invalid API key format")
85
  return True
86
-
87
  def _initialize_client(self, api_key: str) -> None:
88
- """Initialize OpenAI client with error handling."""
 
 
 
 
 
89
  try:
90
  if self._validate_api_key(api_key):
91
- self.client = OpenAI(api_key=api_key)
 
92
  except Exception as e:
93
  logger.error(f"Error initializing OpenAI client: {str(e)}")
94
  raise
95
 
96
  def _add_to_history(self, role: str, content: str) -> None:
97
- """Add a message to conversation history with timestamp."""
 
 
 
 
 
 
98
  self.conversation_history.append({
99
  "role": role,
100
  "content": content,
@@ -102,140 +131,138 @@ class InformationExtractor:
102
  })
103
 
104
  def _get_ai_response(self, retries: int = 3) -> str:
105
- """Get response from OpenAI with retry mechanism."""
106
- if not self.client:
107
- raise ValueError("OpenAI client not initialized")
108
-
 
 
 
 
 
 
 
 
109
  for attempt in range(retries):
110
  try:
111
- response = self.client.chat.completions.create(
112
- model="gpt-4o-mini", # Changed from "gpt-4" to "gpt-4o-mini"
113
  messages=[
114
  {"role": "system", "content": SYSTEM_PROMPT},
115
- *[{
116
- "role": msg["role"],
117
- "content": msg["content"]
118
- } for msg in self.conversation_history]
119
  ],
120
  temperature=0.7,
121
  max_tokens=2000
122
- )
123
-
124
- return response.choices[0].message.content
125
-
126
  except Exception as e:
127
  logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
128
  if attempt == retries - 1:
129
  raise Exception(f"Failed after {retries} attempts: {str(e)}")
130
- continue
131
 
132
- def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
133
- """Extract structured career and education-related information from text for resume building."""
134
- try:
 
 
 
 
 
 
 
 
 
 
 
135
  extraction_prompt = f"""
136
  Analyze the following text and extract relevant information for resume building.
137
- Focus on key resume elements based on the specified extraction categories:
138
- {', '.join(self.extraction_categories)}
139
-
140
- For each extracted item, determine its category dynamically and format it accordingly.
141
 
142
- The JSON output structure should align with the category of extracted information.
143
- Example format:
144
-
145
  {{
146
- "extracted_items": {{
147
- "category_name_1": [
148
- {{
149
- "text": "extracted detail",
150
- "confidence": 0.95,
151
- "metadata": {{}}
152
- }}
153
- ],
154
- "category_name_2": [
155
- {{
156
- "text": "extracted detail",
157
- "confidence": 0.92,
158
- "metadata": {{}}
159
- }}
160
- ]
161
- }}
162
  }}
163
-
164
- Ensure extracted details are relevant for resume-building purposes.
165
  Text to analyze: {text}
166
  """
167
 
168
- response = self.client.chat.completions.create(
169
- model="gpt-4o-mini", # Changed from "gpt-4" to "gpt-4o-mini"
170
- messages=[
171
- {"role": "system", "content": SYSTEM_PROMPT},
172
- {"role": "user", "content": extraction_prompt}
173
- ],
174
- temperature=0.3
175
- )
176
-
177
- # Parse response and create ExtractedInfo objects
178
- analysis = json.loads(response.choices[0].message.content)
179
- extracted_items = []
180
-
181
- for item in analysis.get("extracted_items", []):
182
- extracted_info = ExtractedInfo(
183
- text=item["text"],
184
- category=item["category"],
185
- confidence=item["confidence"],
186
- metadata=item.get("metadata", {})
187
- )
188
- extracted_items.append(extracted_info)
189
-
190
- return extracted_items
191
-
192
- except json.JSONDecodeError as e:
193
- logger.error(f"Error parsing extraction response: {str(e)}")
194
- return []
195
- except Exception as e:
196
- logger.error(f"Error during information extraction: {str(e)}")
197
- return []
198
- def _update_completion_status(self) -> None:
199
- """Update completion status based on extracted information."""
200
- total_categories = len(self.extraction_categories)
201
- covered_categories = len(self.state.categories_covered)
202
-
203
- # Calculate base completion percentage
204
- base_completion = (covered_categories / total_categories) * 100
205
-
206
- # Adjust based on confidence levels
207
- if self.state.extracted_items:
208
- avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items)
209
- adjusted_completion = base_completion * avg_confidence
210
- else:
211
- adjusted_completion = 0.0
212
-
213
- self.state.completion_percentage = min(adjusted_completion, 100.0)
214
 
 
215
  def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
216
- """Process a user message and extract information."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  try:
218
- # Initialize client if needed
219
- if not self.client:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  self._initialize_client(api_key)
221
 
222
- # Add user message to history
223
  self._add_to_history("user", message)
224
-
225
- # Get AI response
226
  ai_response = self._get_ai_response()
227
  self._add_to_history("assistant", ai_response)
228
-
229
- # Extract information from the entire conversation
230
- new_information = self._extract_information(message + "\n" + ai_response)
231
-
232
- # Update state with new information
233
- for info in new_information:
234
- self.state.add_extracted_info(info)
235
-
236
- # Update completion status
237
  self._update_completion_status()
238
-
239
  return {
240
  "response": ai_response,
241
  "extracted_info": [
@@ -243,35 +270,46 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
243
  "text": info.text,
244
  "category": info.category,
245
  "confidence": info.confidence
246
- } for info in new_information
 
247
  ],
248
  "completion_status": {
249
  "percentage": self.state.completion_percentage,
250
  "categories_covered": self.state.categories_covered,
251
  "current_focus": self.state.current_focus
252
- }
 
 
253
  }
254
-
255
  except Exception as e:
256
  error_msg = f"Error processing message: {str(e)}"
257
  logger.error(error_msg)
258
  self.state.last_error = error_msg
259
  return {
260
- "error": error_msg,
 
261
  "completion_status": {
262
  "percentage": self.state.completion_percentage,
263
  "categories_covered": self.state.categories_covered,
264
  "current_focus": self.state.current_focus
265
- }
 
 
266
  }
267
 
268
  def generate_output(self) -> Dict[str, Any]:
269
- """Generate structured output from all extracted information."""
 
 
 
 
 
 
270
  try:
271
- # Organize extracted information by category
272
  categorized_info = {}
273
  for category in self.extraction_categories:
274
- category_items = [
275
  {
276
  "text": item.text,
277
  "confidence": item.confidence,
@@ -281,10 +319,9 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
281
  for item in self.state.extracted_items
282
  if item.category == category
283
  ]
284
- if category_items:
285
- categorized_info[category] = category_items
286
 
287
- # Create output structure
288
  output = {
289
  "extracted_information": categorized_info,
290
  "analysis_summary": {
@@ -299,10 +336,10 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
299
  }
300
  }
301
 
302
- # Save to file
303
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
304
  filename = f"extracted_info_{timestamp}.json"
305
-
 
306
  with open(filename, 'w', encoding='utf-8') as f:
307
  json.dump(output, f, indent=2, ensure_ascii=False)
308
 
@@ -320,11 +357,16 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
320
  "status": "error"
321
  }
322
 
323
- def create_gradio_interface():
324
- """Create the Gradio interface for information extraction."""
 
 
 
 
 
 
325
  extractor = InformationExtractor()
326
-
327
- # Custom CSS for better styling
328
  css = """
329
  .container { max-width: 900px; margin: auto; }
330
  .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
@@ -345,59 +387,36 @@ def create_gradio_interface():
345
  """
346
 
347
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
348
- gr.Markdown("""
349
- # 🔍 Information Extraction Assistant
350
-
351
- Have a natural conversation while we extract and organize important information.
352
- The system will automatically identify and categorize relevant details.
353
- """)
354
 
355
  with gr.Row():
356
  with gr.Column(scale=2):
357
- # API Key input
358
  api_key = gr.Textbox(
359
  label="OpenAI API Key",
360
  type="password",
361
- placeholder="Enter your OpenAI API key (sk-...)",
362
- show_label=True
363
  )
364
 
365
- # Chat interface
366
  chatbot = gr.Chatbot(
 
367
  value=[],
368
- height=400,
369
- type="messages",
370
- show_label=False
371
  )
372
 
373
- # Message input
374
  with gr.Row():
375
  msg = gr.Textbox(
376
  label="Message",
377
- placeholder="Type your message here...",
378
- scale=4
379
- )
380
- submit = gr.Button(
381
- "Send",
382
- variant="primary",
383
- scale=1
384
  )
 
385
 
386
- # Action buttons
387
  with gr.Row():
388
- clear = gr.Button("Clear Chat", scale=1)
389
- generate = gr.Button(
390
- "Generate Report",
391
- variant="secondary",
392
- scale=2
393
- )
394
 
395
  with gr.Column(scale=1):
396
- # Extraction Status Panel
397
- with gr.Group(visible=True) as status_panel:
398
  gr.Markdown("### Extraction Progress")
399
-
400
- # Progress indicator
401
  progress = gr.Slider(
402
  label="Completion",
403
  minimum=0,
@@ -405,244 +424,176 @@ def create_gradio_interface():
405
  value=0,
406
  interactive=False
407
  )
408
-
409
- # Categories covered
410
- categories_covered = gr.JSON(
411
- label="Categories Covered",
412
- value={"categories": []}
413
- )
414
-
415
- # Current focus
416
  current_focus = gr.Textbox(
417
  label="Current Focus",
418
  value="Not started",
419
  interactive=False
420
  )
421
 
422
- # Extraction Results
423
  with gr.Tabs() as result_tabs:
424
  with gr.Tab("Extracted Information"):
425
- extracted_info = gr.JSON(
426
- label="Extracted Details",
427
- value={}
428
- )
429
-
430
  with gr.Tab("Download"):
431
- file_output = gr.File(
432
- label="Download Report"
433
- )
434
-
435
  with gr.Tab("Analysis"):
436
- analysis_text = gr.Markdown(
437
- "Analysis will appear here after processing."
438
- )
439
-
440
- # Helper Functions
441
- def format_extraction_summary(extracted_items: List[Dict]) -> str:
442
- """Format extracted information for display."""
443
- if not extracted_items:
 
 
 
 
 
444
  return "No information extracted yet."
445
-
446
- summary = ["### Recently Extracted Information"]
447
- for item in extracted_items:
448
- summary.append(
449
- f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n"
450
- f" {item['text']}"
451
  )
452
- return "\n".join(summary)
453
-
454
- def update_interface_state(state: Dict[str, Any]) -> tuple:
455
- """Update all interface components based on current state."""
456
- return (
457
- state['completion_status']['percentage'],
458
- {"categories": state['completion_status']['categories_covered']},
459
- state['completion_status']['current_focus']
460
- )
461
-
462
- # Event Handlers
463
- def process_message(message: str, history: list, key: str) -> tuple:
464
- """Handle message processing and update interface."""
465
- if not message.strip():
466
- return history, 0, {}, "Please enter a message"
467
-
468
- try:
469
- # Process message
470
- result = extractor.process_message(message, key)
471
-
472
- if "error" in result:
473
- return (
474
- history,
475
- 0,
476
- {"categories": []},
477
- f"Error: {result['error']}"
478
- )
479
-
480
- # Update chat history
481
- history.append({
482
- "role": "user",
483
- "content": message
484
- })
485
- history.append({
486
- "role": "assistant",
487
- "content": result["response"]
488
- })
489
-
490
- # Update status components
491
- progress_value = result["completion_status"]["percentage"]
492
- categories = {
493
- "categories": result["completion_status"]["categories_covered"]
 
 
 
 
 
 
 
 
 
 
 
494
  }
495
- current_focus = result["completion_status"]["current_focus"] or "Processing..."
496
-
497
- # Update extraction display
498
- if result.get("extracted_info"):
499
- analysis_text = format_extraction_summary(result["extracted_info"])
500
- else:
501
- analysis_text = "No new information extracted."
502
-
503
- return (
504
- history,
505
- progress_value,
506
- categories,
507
- current_focus,
508
- analysis_text
509
- )
510
-
511
- except Exception as e:
512
- logger.error(f"Error in process_message: {str(e)}")
513
- return (
514
- history,
515
- 0,
516
- {"categories": []},
517
- f"Error: {str(e)}",
518
- "An error occurred during processing."
519
- )
520
 
521
- def generate_report() -> tuple:
522
- """Generate and return report file."""
523
- try:
524
- result = extractor.generate_output()
525
-
526
- if result["status"] == "success":
527
- # Update JSON preview
528
- content_preview = {
529
- "summary": result["content"]["analysis_summary"],
530
- "categories": list(result["content"]["extracted_information"].keys()),
531
- "total_items": len(result["content"]["extracted_information"])
532
- }
533
-
534
- return (
535
- result["filename"],
536
- content_preview,
537
- "Report generated successfully! 🎉",
538
- gr.update(value=format_extraction_summary(
539
- [item for items in result["content"]["extracted_information"].values()
540
- for item in items]
541
- ))
542
- )
543
- else:
544
- return (
545
- None,
546
- {"error": result["error"]},
547
- f"Error generating report: {result['error']}",
548
- "Failed to generate analysis."
549
- )
550
-
551
- except Exception as e:
552
- logger.error(f"Error in generate_report: {str(e)}")
553
- return (
554
- None,
555
- {"error": str(e)},
556
- f"Error: {str(e)}",
557
- "An error occurred during report generation."
558
- )
559
 
560
- def clear_interface() -> tuple:
561
- """Reset all interface components."""
562
- # Reset extractor state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  global extractor
564
  extractor = InformationExtractor()
565
-
566
- return (
567
- [], # Clear chat history
568
- 0.0, # Reset progress
569
- {"categories": []}, # Clear categories
570
- "Not started", # Reset focus
571
- {}, # Clear extracted info
572
- None, # Clear file output
573
- "Ready to start new extraction.", # Reset analysis
574
- gr.update(value="") # Clear message input
575
- )
576
-
577
- # Event Bindings
578
  msg.submit(
579
- process_message,
580
  inputs=[msg, chatbot, api_key],
581
- outputs=[
582
- chatbot,
583
- progress,
584
- categories_covered,
585
- current_focus,
586
- analysis_text
587
- ]
588
- ).then(
589
- lambda: "",
590
- None,
591
- msg
592
- )
593
 
594
  submit.click(
595
- process_message,
596
  inputs=[msg, chatbot, api_key],
597
- outputs=[
598
- chatbot,
599
- progress,
600
- categories_covered,
601
- current_focus,
602
- analysis_text
603
- ]
604
- ).then(
605
- lambda: "",
606
- None,
607
- msg
608
- )
609
 
610
  generate.click(
611
- generate_report,
612
- outputs=[
613
- file_output,
614
- extracted_info,
615
- current_focus,
616
- analysis_text
617
- ]
618
  )
619
 
620
  clear.click(
621
- clear_interface,
622
  outputs=[
623
- chatbot,
624
- progress,
625
- categories_covered,
626
- current_focus,
627
- extracted_info,
628
- file_output,
629
- analysis_text,
630
- msg
631
  ]
632
  )
633
 
634
  return demo
635
 
636
- if __name__ == "__main__":
637
- # Set up logging for the main application
 
 
 
638
  logging.basicConfig(
639
  level=logging.INFO,
640
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
641
  )
642
-
 
643
  try:
644
- demo = create_gradio_interface()
645
- demo.launch(
646
  server_name="0.0.0.0",
647
  server_port=7860,
648
  share=True,
@@ -650,4 +601,7 @@ if __name__ == "__main__":
650
  )
651
  except Exception as e:
652
  logger.error(f"Application failed to start: {str(e)}")
653
- raise
 
 
 
 
4
  from datetime import datetime
5
  from typing import Dict, List, Optional, Any, Tuple
6
  from dataclasses import dataclass, field
 
7
 
8
+ import openai # We'll use the official openai package
9
  import gradio as gr
 
10
 
11
+
12
  logging.basicConfig(
13
  level=logging.INFO,
14
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 
19
  )
20
  logger = logging.getLogger(__name__)
21
 
 
22
  SYSTEM_PROMPT = """
23
+ You are LOSS DOG, a Career and Education Information Extraction Assistant, designed to help users craft a compelling
24
+ and well-structured resume by extracting and organizing key details from conversations.
25
 
26
  Core Capabilities:
27
  - Proactively ask users about their career history, education, skills, certifications, projects, and achievements.
 
30
  - Maintain a friendly, engaging, and professional tone to encourage users to share relevant information.
31
  - Structure extracted data into well-organized resume sections.
32
 
33
+ Your goal is to make resume-building effortless by asking the right questions, extracting key information,
34
+ and presenting it in a clear, professional format.
35
  """
36
 
37
+
38
  @dataclass
39
  class ExtractedInfo:
40
+ """Structure for storing extracted information relevant to building a resume."""
41
  text: str
42
  category: str
43
  confidence: float
44
  timestamp: datetime = field(default_factory=datetime.now)
45
  metadata: Dict[str, Any] = field(default_factory=dict)
46
 
47
+
48
  @dataclass
49
  class ConversationState:
50
+ """Tracks the conversation state and progress regarding extracted resume info."""
51
  extracted_items: List[ExtractedInfo] = field(default_factory=list)
52
  categories_covered: List[str] = field(default_factory=list)
53
  current_focus: Optional[str] = None
 
56
  last_update: datetime = field(default_factory=datetime.now)
57
 
58
  def add_extracted_info(self, info: ExtractedInfo) -> None:
59
+ """Add new extracted information and update state accordingly."""
60
  self.extracted_items.append(info)
61
  if info.category not in self.categories_covered:
62
  self.categories_covered.append(info.category)
63
  self.last_update = datetime.now()
 
64
  class InformationExtractor:
65
+ """
66
+ Core class for handling information extraction from user messages to build a structured resume.
67
+
68
+ Attributes:
69
+ conversation_history: A list of dictionaries storing each message and its role (user/assistant).
70
+ state: An instance of ConversationState, which tracks the extraction progress and items.
71
+ extraction_categories: A list of main categories we want to extract for building the resume.
72
+ """
73
+
74
+ def __init__(self) -> None:
75
+ """Initialize the InformationExtractor with default settings."""
76
  self.conversation_history: List[Dict[str, str]] = []
77
  self.state = ConversationState()
 
78
  self.extraction_categories = [
79
  "personal_info",
80
  "education",
 
82
  "skills",
83
  "achievements"
84
  ]
85
+ # We'll store the API key in a protected variable to re-use as needed
86
+ self._api_key: Optional[str] = None
87
+
88
  def _validate_api_key(self, api_key: str) -> bool:
89
+ """
90
+ Validate the OpenAI API key format.
91
+
92
+ Args:
93
+ api_key: The user's OpenAI API key.
94
+
95
+ Returns:
96
+ True if the API key is valid, raises ValueError otherwise.
97
+ """
98
  if not api_key.strip():
99
+ raise ValueError("API key cannot be empty.")
100
+ if not api_key.startswith("sk-"):
101
+ raise ValueError("Invalid API key format. It must start with 'sk-'.")
102
  return True
103
+
104
  def _initialize_client(self, api_key: str) -> None:
105
+ """
106
+ Initialize openai with the given API key. Uses error handling to catch any issue.
107
+
108
+ Args:
109
+ api_key: The user's OpenAI API key.
110
+ """
111
  try:
112
  if self._validate_api_key(api_key):
113
+ openai.api_key = api_key
114
+ self._api_key = api_key
115
  except Exception as e:
116
  logger.error(f"Error initializing OpenAI client: {str(e)}")
117
  raise
118
 
119
  def _add_to_history(self, role: str, content: str) -> None:
120
+ """
121
+ Add a message to the conversation history with a timestamp.
122
+
123
+ Args:
124
+ role: Either 'user' or 'assistant' to denote who sent the message.
125
+ content: The message content.
126
+ """
127
  self.conversation_history.append({
128
  "role": role,
129
  "content": content,
 
131
  })
132
 
133
  def _get_ai_response(self, retries: int = 3) -> str:
134
+ """
135
+ Get an AI response from OpenAI's ChatCompletion endpoint.
136
+
137
+ Args:
138
+ retries: Number of times to retry upon failure.
139
+
140
+ Returns:
141
+ The text content of the AI's reply.
142
+ """
143
+ if not self._api_key:
144
+ raise ValueError("OpenAI client not initialized (API key missing).")
145
+
146
  for attempt in range(retries):
147
  try:
148
+ with openai.ChatCompletion.create(
149
+ model="gpt-4o-mini", # or "gpt-4" or any other available model
150
  messages=[
151
  {"role": "system", "content": SYSTEM_PROMPT},
152
+ *[{"role": msg["role"], "content": msg["content"]} for msg in self.conversation_history]
 
 
 
153
  ],
154
  temperature=0.7,
155
  max_tokens=2000
156
+ ) as response:
157
+ return response["choices"][0]["message"]["content"]
 
 
158
  except Exception as e:
159
  logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
160
  if attempt == retries - 1:
161
  raise Exception(f"Failed after {retries} attempts: {str(e)}")
162
+ return ""
163
 
164
+ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
165
+ """
166
+ Extract structured career and education-related information from the given text.
167
+
168
+ Args:
169
+ text: The combined user and AI text from which to extract relevant info.
170
+
171
+ Returns:
172
+ A list of ExtractedInfo objects with the extracted details.
173
+ """
174
+ if not self._api_key:
175
+ raise ValueError("OpenAI client not initialized (API key missing).")
176
+
177
+ # We'll ask GPT to produce JSON with extracted items
178
  extraction_prompt = f"""
179
  Analyze the following text and extract relevant information for resume building.
180
+ Focus on these key categories: {', '.join(self.extraction_categories)}.
 
 
 
181
 
182
+ For each piece of extracted data, output a JSON structure with:
 
 
183
  {{
184
+ "extracted_items": [
185
+ {{
186
+ "text": "...",
187
+ "category": "...",
188
+ "confidence": 0.0,
189
+ "metadata": {{ ... }}
190
+ }},
191
+ ...
192
+ ]
 
 
 
 
 
 
 
193
  }}
194
+
 
195
  Text to analyze: {text}
196
  """
197
 
198
+ try:
199
+ with openai.ChatCompletion.create(
200
+ model="gpt-4o-mini",
201
+ messages=[
202
+ {"role": "system", "content": SYSTEM_PROMPT},
203
+ {"role": "user", "content": extraction_prompt}
204
+ ],
205
+ temperature=0.3,
206
+ max_tokens=1000
207
+ ) as response:
208
+ raw_content = response["choices"][0]["message"]["content"]
209
+
210
+ # Now parse the content
211
+ analysis = json.loads(raw_content)
212
+ extracted_items = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
+ for item in analysis.get("extracted_
215
  def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
216
+ """
217
+ Process a user message:
218
+ 1. Initialize OpenAI if needed,
219
+ 2. Add user message to history,
220
+ 3. Get AI response,
221
+ 4. Extract resume information,
222
+ 5. Update the conversation state,
223
+ 6. Return structured data.
224
+
225
+ Args:
226
+ message: The user's chat input.
227
+ api_key: The user's OpenAI API key.
228
+
229
+ Returns:
230
+ A dictionary with AI response, extracted info, and updated completion status.
231
+ """
232
+ # Always return a dictionary so that the UI can parse it
233
  try:
234
+ if not message.strip():
235
+ # Return a 5-element tuple anyway (the UI needs 5 outputs)
236
+ return {
237
+ "response": "Please enter a message.",
238
+ "extracted_info": [],
239
+ "completion_status": {
240
+ "percentage": self.state.completion_percentage,
241
+ "categories_covered": self.state.categories_covered,
242
+ "current_focus": self.state.current_focus
243
+ },
244
+ "analysis_text": "No new information extracted.",
245
+ "history_message": "(No change in history)"
246
+ }
247
+
248
+ # Initialize the client if not done yet
249
+ if not self._api_key:
250
  self._initialize_client(api_key)
251
 
252
+ # Add user message to conversation history
253
  self._add_to_history("user", message)
 
 
254
  ai_response = self._get_ai_response()
255
  self._add_to_history("assistant", ai_response)
256
+
257
+ # Extract new info from the full conversation
258
+ new_info = self._extract_resume_information(text=message + "\n" + ai_response)
259
+
260
+ # Update the conversation state
261
+ for info_item in new_info:
262
+ self.state.add_extracted_info(info_item)
263
+
 
264
  self._update_completion_status()
265
+
266
  return {
267
  "response": ai_response,
268
  "extracted_info": [
 
270
  "text": info.text,
271
  "category": info.category,
272
  "confidence": info.confidence
273
+ }
274
+ for info in new_info
275
  ],
276
  "completion_status": {
277
  "percentage": self.state.completion_percentage,
278
  "categories_covered": self.state.categories_covered,
279
  "current_focus": self.state.current_focus
280
+ },
281
+ "analysis_text": "Successfully extracted new information." if new_info else "No new information extracted.",
282
+ "history_message": f"Added user message '{message}' and assistant response to history."
283
  }
284
+
285
  except Exception as e:
286
  error_msg = f"Error processing message: {str(e)}"
287
  logger.error(error_msg)
288
  self.state.last_error = error_msg
289
  return {
290
+ "response": "",
291
+ "extracted_info": [],
292
  "completion_status": {
293
  "percentage": self.state.completion_percentage,
294
  "categories_covered": self.state.categories_covered,
295
  "current_focus": self.state.current_focus
296
+ },
297
+ "analysis_text": error_msg,
298
+ "history_message": "(Processing failed)"
299
  }
300
 
301
  def generate_output(self) -> Dict[str, Any]:
302
+ """
303
+ Generate structured JSON output containing all extracted information,
304
+ store it in a file, and return the file name and content.
305
+
306
+ Returns:
307
+ A dict with fields: filename, content, and status.
308
+ """
309
  try:
 
310
  categorized_info = {}
311
  for category in self.extraction_categories:
312
+ items_in_cat = [
313
  {
314
  "text": item.text,
315
  "confidence": item.confidence,
 
319
  for item in self.state.extracted_items
320
  if item.category == category
321
  ]
322
+ if items_in_cat:
323
+ categorized_info[category] = items_in_cat
324
 
 
325
  output = {
326
  "extracted_information": categorized_info,
327
  "analysis_summary": {
 
336
  }
337
  }
338
 
 
339
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
340
  filename = f"extracted_info_{timestamp}.json"
341
+
342
+ # Use a context manager for safe file operations
343
  with open(filename, 'w', encoding='utf-8') as f:
344
  json.dump(output, f, indent=2, ensure_ascii=False)
345
 
 
357
  "status": "error"
358
  }
359
 
360
+
361
+ def create_gradio_interface() -> gr.Blocks:
362
+ """
363
+ Create the Gradio interface for the InformationExtractor.
364
+
365
+ Returns:
366
+ The gradio Blocks application interface object.
367
+ """
368
  extractor = InformationExtractor()
369
+
 
370
  css = """
371
  .container { max-width: 900px; margin: auto; }
372
  .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
 
387
  """
388
 
389
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
390
+ gr.Markdown("# 🔍 Information Extraction Assistant\n")
 
 
 
 
 
391
 
392
  with gr.Row():
393
  with gr.Column(scale=2):
 
394
  api_key = gr.Textbox(
395
  label="OpenAI API Key",
396
  type="password",
397
+ placeholder="Enter your OpenAI API key (sk-...)"
 
398
  )
399
 
 
400
  chatbot = gr.Chatbot(
401
+ label="Conversation",
402
  value=[],
403
+ height=400
 
 
404
  )
405
 
 
406
  with gr.Row():
407
  msg = gr.Textbox(
408
  label="Message",
409
+ placeholder="Type your message here..."
 
 
 
 
 
 
410
  )
411
+ submit = gr.Button("Send", variant="primary")
412
 
 
413
  with gr.Row():
414
+ clear = gr.Button("Clear Chat")
415
+ generate = gr.Button("Generate Report", variant="secondary")
 
 
 
 
416
 
417
  with gr.Column(scale=1):
418
+ with gr.Group():
 
419
  gr.Markdown("### Extraction Progress")
 
 
420
  progress = gr.Slider(
421
  label="Completion",
422
  minimum=0,
 
424
  value=0,
425
  interactive=False
426
  )
427
+ categories_covered = gr.JSON(label="Categories Covered", value={})
 
 
 
 
 
 
 
428
  current_focus = gr.Textbox(
429
  label="Current Focus",
430
  value="Not started",
431
  interactive=False
432
  )
433
 
 
434
  with gr.Tabs() as result_tabs:
435
  with gr.Tab("Extracted Information"):
436
+ extracted_info = gr.JSON(label="Extracted Details", value={})
 
 
 
 
437
  with gr.Tab("Download"):
438
+ file_output = gr.File(label="Download Report")
 
 
 
439
  with gr.Tab("Analysis"):
440
+ analysis_text = gr.Markdown("Analysis will appear here after processing.")
441
+
442
+ def format_extraction_summary(extracted_items_list: List[Dict[str, Any]]) -> str:
443
+ """
444
+ Utility function to format extracted data for user-friendly display.
445
+
446
+ Args:
447
+ extracted_items_list: List of dictionaries with 'category', 'confidence', and 'text'.
448
+
449
+ Returns:
450
+ A string summary of the extracted items.
451
+ """
452
+ if not extracted_items_list:
453
  return "No information extracted yet."
454
+ lines = ["### Recently Extracted Information"]
455
+ for itm in extracted_items_list:
456
+ lines.append(
457
+ f"- **{itm['category']}** ({itm['confidence']*100:.1f}% confidence)\n"
458
+ f" {itm['text']}"
 
459
  )
460
+ return "\n".join(lines)
461
+
462
+ def process_message(user_input: str, history: List[Dict[str, str]], key: str) -> Tuple[Any, float, Dict[str, Any], str, str]:
463
+ """
464
+ Event handler to process a user message. Returns a 5-element tuple matching the
465
+ outputs: (new_chat_history, progress_value, categories_json, focus_text, analysis_message).
466
+
467
+ Args:
468
+ user_input: The current user message.
469
+ history: The existing chat history.
470
+ key: The user's OpenAI API key.
471
+
472
+ Returns:
473
+ A tuple with updated chatbot messages, progress, categories_covered, current_focus, and analysis text.
474
+ """
475
+ result = extractor.process_message(user_input, key)
476
+
477
+ # Update chat history
478
+ # We will append the user message + assistant response
479
+ history.append({"role": "user", "content": user_input})
480
+ history.append({"role": "assistant", "content": result["response"]})
481
+
482
+ # Update progress
483
+ prog_val = result["completion_status"]["percentage"]
484
+ cat_cov = {"categories": result["completion_status"]["categories_covered"]}
485
+ focus_val = result["completion_status"]["current_focus"] or "Not specified"
486
+
487
+ # If we have newly extracted info, let's show it
488
+ extract_list = result.get("extracted_info", [])
489
+ if extract_list:
490
+ analysis = format_extraction_summary(extract_list)
491
+ else:
492
+ analysis = result["analysis_text"]
493
+
494
+ return history, prog_val, cat_cov, focus_val, analysis
495
+
496
+ def generate_report() -> Tuple[Optional[str], Dict[str, Any], str, str]:
497
+ """
498
+ Generate a JSON report of extracted resume info.
499
+
500
+ Returns:
501
+ A tuple of: (filename, extracted_json, focus_message, analysis_text).
502
+ """
503
+ gen_result = extractor.generate_output()
504
+ if gen_result["status"] == "success":
505
+ filename = gen_result["filename"]
506
+ content = gen_result["content"]
507
+
508
+ # Summarize categories, etc. for user
509
+ content_preview = {
510
+ "summary": content["analysis_summary"],
511
+ "categories": list(content["extracted_information"].keys()),
512
+ "total_items": len(content["extracted_information"])
513
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ # Flatten everything for a final analysis string
516
+ flat_items = []
517
+ for cat_items in content["extracted_information"].values():
518
+ flat_items.extend(cat_items)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
+ final_analysis = format_extraction_summary([
521
+ {
522
+ "text": i["text"],
523
+ "confidence": i["confidence"],
524
+ "category": cat
525
+ }
526
+ for cat in content["extracted_information"].keys()
527
+ for i in content["extracted_information"][cat]
528
+ ])
529
+
530
+ return filename, content_preview, "Report generated successfully!", final_analysis
531
+ else:
532
+ return None, {"error": gen_result["error"]}, "Error generating report.", "No analysis."
533
+
534
+ def clear_interface() -> Tuple[List[Dict[str, str]], float, Dict[str, Any], str, Dict[str, Any], None, str, str]:
535
+ """
536
+ Reset all UI components to their initial state.
537
+
538
+ Returns:
539
+ A tuple specifying the reset states of:
540
+ - Chatbot
541
+ - Progress
542
+ - Categories
543
+ - Current Focus
544
+ - Extracted Info
545
+ - File Output
546
+ - Analysis
547
+ - Message Box
548
+ """
549
+ # Re-instantiate the extractor to clear its internal state
550
  global extractor
551
  extractor = InformationExtractor()
552
+
553
+ return [], 0.0, {"categories": []}, "Not started", {}, None, "Ready to start new extraction.", ""
554
+
555
+ # Bind events
 
 
 
 
 
 
 
 
 
556
  msg.submit(
557
+ fn=process_message,
558
  inputs=[msg, chatbot, api_key],
559
+ outputs=[chatbot, progress, categories_covered, current_focus, analysis_text]
560
+ ).then(lambda: "", None, msg)
 
 
 
 
 
 
 
 
 
 
561
 
562
  submit.click(
563
+ fn=process_message,
564
  inputs=[msg, chatbot, api_key],
565
+ outputs=[chatbot, progress, categories_covered, current_focus, analysis_text]
566
+ ).then(lambda: "", None, msg)
 
 
 
 
 
 
 
 
 
 
567
 
568
  generate.click(
569
+ fn=generate_report,
570
+ outputs=[file_output, extracted_info, current_focus, analysis_text]
 
 
 
 
 
571
  )
572
 
573
  clear.click(
574
+ fn=clear_interface,
575
  outputs=[
576
+ chatbot, progress, categories_covered,
577
+ current_focus, extracted_info, file_output,
578
+ analysis_text, msg
 
 
 
 
 
579
  ]
580
  )
581
 
582
  return demo
583
 
584
+
585
+ def main() -> None:
586
+ """
587
+ Main function to launch the Gradio application on port 7860, with share=True.
588
+ """
589
  logging.basicConfig(
590
  level=logging.INFO,
591
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
592
  )
593
+
594
+ demo_app = create_gradio_interface()
595
  try:
596
+ demo_app.launch(
 
597
  server_name="0.0.0.0",
598
  server_port=7860,
599
  share=True,
 
601
  )
602
  except Exception as e:
603
  logger.error(f"Application failed to start: {str(e)}")
604
+
605
+
606
+ if __name__ == "__main__":
607
+ main()