Update app.py
Browse files
app.py
CHANGED
@@ -29,85 +29,186 @@ Be friendly and conversational. Ask follow-up questions naturally. When appropri
|
|
29 |
but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they
|
30 |
have nothing more to share), let them know they can click 'Generate Profile' to proceed.
|
31 |
"""
|
32 |
-
EXTRACTION_PROMPT = """You are a
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
- Project Contributions & Leadership
|
43 |
-
- Work Performance & Impact Metrics
|
44 |
-
|
45 |
-
3. Clean and structure the information:
|
46 |
-
- Deduplicate repeated information
|
47 |
-
- Resolve any inconsistencies
|
48 |
-
- Make reasonable inferences when dates or details are partial
|
49 |
-
- Standardize formatting (dates, company names, titles)
|
50 |
-
|
51 |
-
4. Output a VALID JSON object with this exact structure:
|
52 |
{
|
53 |
"work_history_experience": {
|
54 |
"positions": [
|
55 |
{
|
56 |
-
"title":
|
57 |
-
"company":
|
58 |
-
"
|
59 |
-
"
|
60 |
-
"
|
61 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
}
|
63 |
]
|
64 |
},
|
65 |
"skills_certifications": {
|
66 |
-
"
|
|
|
|
|
|
|
67 |
"certifications": [
|
68 |
{
|
69 |
-
"name":
|
70 |
-
"issuer":
|
71 |
-
"date":
|
72 |
-
"confidence":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
}
|
74 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
}
|
76 |
-
// ... other categories following similar structure
|
77 |
}
|
78 |
|
79 |
-
|
80 |
-
- Return ONLY valid JSON
|
81 |
-
- Always include confidence scores (0.0-1.0)
|
82 |
-
- Mark any inferred information
|
83 |
-
- Use consistent date formats (YYYY-MM-DD)
|
84 |
-
- Clean and standardize all text fields
|
85 |
-
- Return empty arrays [] for missing sections rather than null
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
class ProfileBuilder:
|
112 |
def __init__(self):
|
113 |
self.conversation_history = []
|
|
|
29 |
but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they
|
30 |
have nothing more to share), let them know they can click 'Generate Profile' to proceed.
|
31 |
"""
|
32 |
+
EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to methodically analyze conversations and organize information into 9 specific categories. Process each category thoroughly and output in structured JSON format.
|
33 |
+
|
34 |
+
ANALYTICAL PROCESS:
|
35 |
+
1. Read entire conversation history
|
36 |
+
2. Extract explicit and implicit information
|
37 |
+
3. Make reasonable inferences when appropriate
|
38 |
+
4. Structure data according to defined schema
|
39 |
+
5. Include confidence scores for all extracted information
|
40 |
+
|
41 |
+
OUTPUT SCHEMA:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
{
|
43 |
"work_history_experience": {
|
44 |
"positions": [
|
45 |
{
|
46 |
+
"title": string,
|
47 |
+
"company": string,
|
48 |
+
"industry": string,
|
49 |
+
"location": string,
|
50 |
+
"employment_type": string,
|
51 |
+
"adaptability": {
|
52 |
+
"career_shifts": string[],
|
53 |
+
"upskilling": string[]
|
54 |
+
},
|
55 |
+
"promotions": string[],
|
56 |
+
"confidence": float
|
57 |
+
}
|
58 |
+
]
|
59 |
+
},
|
60 |
+
"salary_compensation": {
|
61 |
+
"history": [
|
62 |
+
{
|
63 |
+
"base_salary": number | null,
|
64 |
+
"bonus_structure": string | null,
|
65 |
+
"stock_options": {
|
66 |
+
"type": string,
|
67 |
+
"details": string
|
68 |
+
},
|
69 |
+
"commission": string | null,
|
70 |
+
"benefits": {
|
71 |
+
"health": string,
|
72 |
+
"pto": string,
|
73 |
+
"retirement": string,
|
74 |
+
"other": string[]
|
75 |
+
},
|
76 |
+
"confidence": float
|
77 |
}
|
78 |
]
|
79 |
},
|
80 |
"skills_certifications": {
|
81 |
+
"hard_skills": string[],
|
82 |
+
"soft_skills": string[],
|
83 |
+
"programming_languages": string[],
|
84 |
+
"spoken_languages": string[],
|
85 |
"certifications": [
|
86 |
{
|
87 |
+
"name": string,
|
88 |
+
"issuer": string,
|
89 |
+
"date": string,
|
90 |
+
"confidence": float
|
91 |
+
}
|
92 |
+
],
|
93 |
+
"licenses": [
|
94 |
+
{
|
95 |
+
"type": string,
|
96 |
+
"issuer": string,
|
97 |
+
"valid_until": string,
|
98 |
+
"confidence": float
|
99 |
}
|
100 |
]
|
101 |
+
},
|
102 |
+
"education_learning": {
|
103 |
+
"formal_education": [
|
104 |
+
{
|
105 |
+
"degree": string,
|
106 |
+
"institution": string,
|
107 |
+
"gpa": number | null,
|
108 |
+
"research": string[],
|
109 |
+
"period": {
|
110 |
+
"start": string,
|
111 |
+
"end": string | null
|
112 |
+
},
|
113 |
+
"confidence": float
|
114 |
+
}
|
115 |
+
],
|
116 |
+
"online_courses": [],
|
117 |
+
"executive_education": []
|
118 |
+
},
|
119 |
+
"personal_branding": {
|
120 |
+
"portfolio": {
|
121 |
+
"github": string | null,
|
122 |
+
"behance": string | null,
|
123 |
+
"other": string[]
|
124 |
+
},
|
125 |
+
"blog_posts": [],
|
126 |
+
"blockchain_projects": {
|
127 |
+
"nfts": [],
|
128 |
+
"defi": [],
|
129 |
+
"dapps": []
|
130 |
+
},
|
131 |
+
"public_speaking": [],
|
132 |
+
"social_media": {
|
133 |
+
"platforms": [],
|
134 |
+
"influence_metrics": {}
|
135 |
+
}
|
136 |
+
},
|
137 |
+
"achievements_awards": {
|
138 |
+
"industry_awards": [],
|
139 |
+
"hackathons": [],
|
140 |
+
"peer_endorsements": [],
|
141 |
+
"creative_projects": {
|
142 |
+
"ai_art": [],
|
143 |
+
"other": []
|
144 |
+
}
|
145 |
+
},
|
146 |
+
"social_proof_networking": {
|
147 |
+
"mentors": [],
|
148 |
+
"references": [],
|
149 |
+
"memberships": [
|
150 |
+
{
|
151 |
+
"organization": string,
|
152 |
+
"type": string,
|
153 |
+
"period": string,
|
154 |
+
"confidence": float
|
155 |
+
}
|
156 |
+
],
|
157 |
+
"conference_engagement": []
|
158 |
+
},
|
159 |
+
"project_contributions": {
|
160 |
+
"major_projects": [],
|
161 |
+
"open_source": [],
|
162 |
+
"team_leadership": [],
|
163 |
+
"patents": [],
|
164 |
+
"impact": {
|
165 |
+
"description": string,
|
166 |
+
"metrics": string[],
|
167 |
+
"confidence": float
|
168 |
+
}
|
169 |
+
},
|
170 |
+
"work_performance_metrics": {
|
171 |
+
"kpis": [],
|
172 |
+
"revenue_impact": [],
|
173 |
+
"efficiency_gains": [],
|
174 |
+
"career_growth": [],
|
175 |
+
"leadership_influence": []
|
176 |
}
|
|
|
177 |
}
|
178 |
|
179 |
+
EXTRACTION GUIDELINES:
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
1. Process systematically:
|
182 |
+
- Analyze conversation thoroughly
|
183 |
+
- Look for both direct statements and implied information
|
184 |
+
- Cross-reference information across different parts of conversation
|
185 |
+
- Make reasonable inferences when appropriate
|
186 |
|
187 |
+
2. For each piece of information:
|
188 |
+
- Clean and standardize the data
|
189 |
+
- Assign confidence scores (0.0-1.0)
|
190 |
+
- Mark inferred information
|
191 |
+
- Include source context where relevant
|
192 |
+
|
193 |
+
3. Quality requirements:
|
194 |
+
- Use consistent date formats (YYYY-MM-DD)
|
195 |
+
- Standardize company names and titles
|
196 |
+
- Use empty arrays [] for missing information
|
197 |
+
- Never use null for array fields
|
198 |
+
- Include confidence scores for all extracted data
|
199 |
+
|
200 |
+
4. Handle missing information:
|
201 |
+
- Use empty arrays [] rather than null
|
202 |
+
- Mark inferred information clearly
|
203 |
+
- Include partial information when complete data isn't available
|
204 |
+
- Note uncertainty in confidence scores
|
205 |
+
|
206 |
+
Remember to:
|
207 |
+
- Process each category thoroughly
|
208 |
+
- Cross-reference information for consistency
|
209 |
+
- Make reasonable inferences when appropriate
|
210 |
+
- Maintain consistent formatting
|
211 |
+
- Include all required fields even if empty"""
|
212 |
class ProfileBuilder:
|
213 |
def __init__(self):
|
214 |
self.conversation_history = []
|