jzou19950715 commited on
Commit
c5fc623
·
verified ·
1 Parent(s): a699c1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -15
app.py CHANGED
@@ -29,22 +29,85 @@ Be friendly and conversational. Ask follow-up questions naturally. When appropri
29
  but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they
30
  have nothing more to share), let them know they can click 'Generate Profile' to proceed.
31
  """
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- EXTRACTION_PROMPT = """You are LOSS DOG's data processing system. Analyze the provided conversation and proactively interpret them for possible duduced
34
- information that can be extracted. Turn the structured information into the following categories as json:
35
-
36
- 1. Work History & Experience: Job titles, companies, industries, locations, adaptability, promotions
37
- 2. Salary & Compensation: Base salary, bonuses, equity, benefits (if shared)
38
- 3. Skills & Certifications: Technical skills, languages, certifications, licenses
39
- 4. Education & Learning: Degrees, institutions, courses, research
40
- 5. Personal Branding: Online presence, portfolio, blockchain projects, social media
41
- 6. Achievements & Awards: Industry recognition, hackathons, creative projects
42
- 7. Social Proof: Mentors, references, memberships, conferences
43
- 8. Project Contributions: Major projects, open-source, patents, impact
44
- 9. Performance Metrics: KPIs, revenue impact, growth metrics
45
-
46
- Format the output as clean, structured JSON.
47
- Mark any inferred information clearly."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  class ProfileBuilder:
49
  def __init__(self):
50
  self.conversation_history = []
 
29
  but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they
30
  have nothing more to share), let them know they can click 'Generate Profile' to proceed.
31
  """
32
+ EXTRACTION_PROMPT = """You are a data extraction specialist. Your task is to:
33
+ 1. Read through the provided conversation
34
+ 2. Identify relevant information across 9 categories:
35
+ - Work History & Experience (jobs, roles, companies)
36
+ - Salary & Compensation (if shared)
37
+ - Skills & Certifications
38
+ - Education & Learning
39
+ - Personal Branding & Online Presence
40
+ - Achievements & Awards
41
+ - Social Proof & Networking
42
+ - Project Contributions & Leadership
43
+ - Work Performance & Impact Metrics
44
 
45
+ 3. Clean and structure the information:
46
+ - Deduplicate repeated information
47
+ - Resolve any inconsistencies
48
+ - Make reasonable inferences when dates or details are partial
49
+ - Standardize formatting (dates, company names, titles)
50
+
51
+ 4. Output a VALID JSON object with this exact structure:
52
+ {
53
+ "work_history_experience": {
54
+ "positions": [
55
+ {
56
+ "title": "cleaned job title",
57
+ "company": "cleaned company name",
58
+ "duration": "standardized duration",
59
+ "description": "cleaned description",
60
+ "confidence": 0.95,
61
+ "inferred": false
62
+ }
63
+ ]
64
+ },
65
+ "skills_certifications": {
66
+ "technical_skills": ["skill1", "skill2"],
67
+ "certifications": [
68
+ {
69
+ "name": "certification name",
70
+ "issuer": "issuing organization",
71
+ "date": "YYYY-MM",
72
+ "confidence": 0.9
73
+ }
74
+ ]
75
+ }
76
+ // ... other categories following similar structure
77
+ }
78
+
79
+ IMPORTANT:
80
+ - Return ONLY valid JSON
81
+ - Always include confidence scores (0.0-1.0)
82
+ - Mark any inferred information
83
+ - Use consistent date formats (YYYY-MM-DD)
84
+ - Clean and standardize all text fields
85
+ - Return empty arrays [] for missing sections rather than null
86
+
87
+ Example conversation snippet:
88
+ User: "I worked at Google for a few years"
89
+ Assistant: "That's interesting! What was your role there?"
90
+ User: "I was a senior engineer, mostly doing ML stuff"
91
+
92
+ Should extract to:
93
+ {
94
+ "work_history_experience": {
95
+ "positions": [
96
+ {
97
+ "title": "Senior ML Engineer",
98
+ "company": "Google",
99
+ "duration": {
100
+ "start": null,
101
+ "end": null,
102
+ "description": "multiple years",
103
+ "inferred": true
104
+ },
105
+ "description": "Machine learning engineering",
106
+ "confidence": 0.85
107
+ }
108
+ ]
109
+ }
110
+ }"""
111
  class ProfileBuilder:
112
  def __init__(self):
113
  self.conversation_history = []