File size: 16,637 Bytes
c734c14
 
 
af26e7b
c734c14
7f04463
2091cb6
 
16af053
af26e7b
 
c734c14
16af053
f6b0589
 
495bab0
 
 
 
3bc126e
 
495bab0
 
 
 
3bc126e
 
495bab0
 
 
3bc126e
 
 
 
 
 
 
 
495bab0
 
3bc126e
 
 
 
 
 
 
495bab0
 
3bc126e
 
 
 
 
 
 
495bab0
 
3bc126e
 
 
 
 
 
 
495bab0
 
3bc126e
 
 
 
 
 
 
495bab0
 
3bc126e
 
 
 
 
 
 
495bab0
 
 
3bc126e
a8dab3c
 
 
 
 
 
 
 
3bc126e
495bab0
3bc126e
495bab0
3bc126e
 
 
 
 
 
 
 
 
 
f6b0589
c5fc623
3bc126e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5fc623
3bc126e
f6b0589
 
 
3bc126e
 
f6b0589
 
 
 
 
3bc126e
f6b0589
 
3bc126e
d5eccc9
3bc126e
 
 
 
 
c5fc623
 
af26e7b
3bc126e
 
 
 
 
 
 
 
884390a
210eb9d
 
8a34f80
c0f87d7
8a34f80
 
884390a
 
 
c0f87d7
884390a
c0f87d7
 
884390a
 
 
 
 
 
 
 
 
c0f87d7
 
e576ca7
c0f87d7
 
884390a
c0f87d7
 
 
 
 
 
 
 
 
 
50e3198
2091cb6
 
 
 
 
 
 
884390a
2091cb6
c734c14
c0f87d7
2091cb6
c734c14
2091cb6
210eb9d
c0f87d7
8a34f80
2091cb6
c0f87d7
210eb9d
9f9ed4e
5a28fe7
af26e7b
 
2091cb6
af26e7b
 
210eb9d
 
7f04463
 
210eb9d
c0f87d7
af26e7b
c734c14
 
2091cb6
c734c14
 
 
c0f87d7
 
c734c14
c0f87d7
2091cb6
 
884390a
c0f87d7
884390a
c0f87d7
884390a
 
c0f87d7
 
 
 
 
 
 
 
5a28fe7
c0f87d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c734c14
af26e7b
210eb9d
c734c14
210eb9d
 
 
2091cb6
 
 
 
 
 
 
 
c0f87d7
2091cb6
f6b0589
2091cb6
 
 
 
c734c14
2091cb6
 
f6b0589
c0f87d7
f6b0589
 
2091cb6
c0f87d7
 
 
 
c734c14
 
 
f6b0589
e576ca7
c734c14
af26e7b
c734c14
2091cb6
c0f87d7
2091cb6
 
 
c0f87d7
 
f6b0589
c0f87d7
884390a
c0f87d7
 
e576ca7
c0f87d7
 
f6b0589
c0f87d7
2091cb6
 
c0f87d7
2091cb6
 
 
 
 
 
 
 
 
 
 
 
 
 
210eb9d
884390a
 
7f04463
c0f87d7
7f04463
50e3198
c734c14
210eb9d
c734c14
f6b0589
c0f87d7
210eb9d
 
 
c734c14
f6b0589
c0f87d7
210eb9d
2091cb6
 
 
 
 
 
210eb9d
 
884390a
7f04463
c734c14
 
 
 
50e3198
210eb9d
7f04463
c0f87d7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
import json
import logging
from datetime import datetime
from typing import Dict, List, Optional, Any
import gradio as gr
from openai import AsyncOpenAI
import PyPDF2
import io

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Updated FINN Conversation Prompt
CONVERSATION_PROMPT = """
<SYSTEM_PROMPT>
    <NAME>FINN's Conversation Guide for Information Extraction</NAME>

    <ROLE>
        <DESCRIPTION>You are FINN, Lossdog's dedicated AI assistant specializing in information extraction.</DESCRIPTION>
        <OBJECTIVE>Help users build compelling resumes through natural conversation, drawing out both qualitative and quantitative achievements.</OBJECTIVE>
        <MEANING>FINN stands for Facts, Insights, Numbers, and Narrative.</MEANING>
    </ROLE>

    <COMMUNICATION_STYLE>
        <ADAPTABILITY>Adapt to each user's unique background and experience level.</ADAPTABILITY>
        <TONE>Professional yet approachable, encouraging detailed responses.</TONE>
    </COMMUNICATION_STYLE>

    <DATA_COLLECTION_CATEGORIES>
        <CATEGORY name="Work Experience">
            <PROMPT>Tell me about your professional experience.</PROMPT>
            <APPROACH>
                - Gather core details (title, company, dates, compensation)
                - Encourage storytelling about responsibilities and achievements
                - Naturally probe for quantitative metrics when relevant
                - Help discover and highlight impactful contributions
            </APPROACH>
        </CATEGORY>

        <CATEGORY name="Volunteer & Community Impact">
            <PROMPT>Tell me about any volunteer work or community involvement you'd like to highlight.</PROMPT>
            <APPROACH>
                - Focus on impact and contributions
                - Draw out measurable outcomes where possible
                - Connect activities to professional skills
            </APPROACH>
        </CATEGORY>

        <CATEGORY name="Education">
            <PROMPT>Let's talk about your educational background and ongoing learning.</PROMPT>
            <APPROACH>
                - Gather formal education details
                - Explore continuing education and self-development
                - Identify relevant projects and achievements
            </APPROACH>
        </CATEGORY>

        <CATEGORY name="Skills">
            <PROMPT>What key skills would you like to highlight?</PROMPT>
            <APPROACH>
                - Help identify and articulate both technical and soft skills
                - Draw out expertise from experience
                - Connect skills to practical applications
            </APPROACH>
        </CATEGORY>

        <CATEGORY name="Notable Achievements & Certifications">
            <PROMPT>Tell me about your professional achievements and certifications.</PROMPT>
            <APPROACH>
                - Encourage sharing of recognition and accomplishments
                - Help quantify impact where possible
                - Draw out specific examples
            </APPROACH>
        </CATEGORY>

        <CATEGORY name="Professional Bio">
            <PROMPT>Based on our conversation, let's create your professional bio. What would you like to highlight about yourself and your aspirations?</PROMPT>
            <APPROACH>
                - Synthesize key themes from previous categories
                - Include future goals and aspirations
                - Create a cohesive narrative
            </APPROACH>
        </CATEGORY>
    </DATA_COLLECTION_CATEGORIES>

    <CONVERSATION_PRINCIPLES>
		- Maintain a natural, friendly, and non-judgmental conversation flow with proactive use of emojis and emotional intelligence for effective wording.
		- Stay focused on the core tasks.
		- Adapt questions based on the user's background while staying task-driven.
		- Consistently seek quantifiable impacts in addition to qualitative information.
		- Help users discover and articulate their value through creative nudging and exploration.
		- Guide without being prescriptive, and accommodate users with diverse work experience.
		- Encourage specific examples and metrics needed for an effective resume.
		- Once all information has been collected, or the user indicates that they have finished providing details, naturally congratulate them for successfully creating their resume and invite them to click "Generate Profile" to see the result.
    </CONVERSATION_PRINCIPLES>
</SYSTEM_PROMPT>
"""

EXTRACTION_PROMPT = """
You are a professional information extraction system. Extract information from the conversation and return ONLY a valid JSON object that matches FINN's six core categories. Proactively determine how to fill the JSON schema using the provided information. Do not include any explanatory text before or after the JSON. Return the data in this exact structure: Analyze the conversation and intelligently categorize all information into appropriate sections. Use your judgment to place information under the most relevant category, even if it wasn’t explicitly labeled as such in the conversation. Always creatively generate and synthesize a professional bio based on the provided from the entire conversation in the professional_bio section.
Key Guidelines:
- Flexibly categorize information while maintaining core structure
- Place unique or unexpected information under the most relevant category
- Capture both qualitative and quantitative elements
- Ensure no valuable information is lost
- Create a compelling professional bio that synthesizes the entire profile

Return the data in this format, adding relevant fields as needed:

{
    "work_experience": [
        {
            "title": string,
            "company": string,
            "period": string,
            "compensation": {
                "salary": string,
                "benefits": string
            },
            "details": string,  // Combined responsibilities, achievements, and metrics
            "quantitative_highlights": [],  // Array of measurable impacts
            "additional_information": {}  // Flexible object for unique elements
        }
    ],
    "volunteer_community": [
        {
            "organization": string,
            "role": string,
            "period": string,
            "impact": string,
            "metrics": []
        }
    ],
    "education": [
        {
            "degree": string,
            "institution": string,
            "period": string,
            "highlights": string,
            "honors": []
        }
    ],
    "skills": {
        "technical": [],
        "soft": [],
        "tools": [],
        "languages": [],
        "industry_expertise": []
    },
    "achievements": {
        "certifications": [
            {
                "name": string,
                "issuer": string,
                "date": string
            }
        ],
        "recognition": [
            {
                "title": string,
                "details": string,
                "impact": string
            }
        ]
    },
    "professional_bio": {
        "summary": string,  // Synthesized narrative of professional journey
        "aspirations": string,  // Future goals and direction
        "key_strengths": [],  // Core competencies and unique value
        "style": "narrative"  // Ensures bio is written in engaging, story-telling format
    }
}

Notes on Bio Generation:
- Always generate a professional bio section regardless of explicit bio information
- Bio should synthesize key achievements, experience, and aspirations
- Include relevant metrics and impactful contributions
- Capture professional journey and future direction
- Style should be engaging and narrative while maintaining professionalism

IMPORTANT: Return ONLY the JSON. Do not include any explanatory text."""

class ProfileBuilder:
    def __init__(self):
        self.client = None
        self.pdf_text = None
    
    def _initialize_client(self, api_key: str) -> None:
        if not api_key.startswith("sk-"):
            raise ValueError("Invalid API key format")
        self.client = AsyncOpenAI(api_key=api_key)

    async def process_message(self, message: str, history: List[List[str]], api_key: str) -> Dict[str, Any]:
        try:
            self._initialize_client(api_key)
            
            conversation_history = []
            for human, assistant in history:
                conversation_history.extend([
                    {"role": "user", "content": human},
                    {"role": "assistant", "content": assistant}
                ])
            
            conversation_history.append({"role": "user", "content": message})
            
            completion = await self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": CONVERSATION_PROMPT},
                    *conversation_history
                ],
                temperature=0.7
            )
            
            ai_message = completion.choices[0].message.content
            return {"response": ai_message}

        except Exception as e:
            logger.error(f"Error in message processing: {str(e)}")
            return {"error": str(e)}

    async def extract_from_pdf(self, pdf_content: bytes) -> str:
        try:
            pdf_file = io.BytesIO(pdf_content)
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
            self.pdf_text = text
            return text
        except Exception as e:
            logger.error(f"PDF extraction error: {str(e)}")
            raise

    async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]:
        try:
            self._initialize_client(api_key)

            with open(pdf_path, 'rb') as file:
                resume_text = await self.extract_from_pdf(file.read())

            completion = await self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": EXTRACTION_PROMPT},
                    {"role": "user", "content": f"Extract profile information from this resume:\n\n{resume_text}"}
                ],
                temperature=0.3
            )

            response_text = completion.choices[0].message.content.strip()
            profile_data = json.loads(response_text)

            profile = {
                "profile_data": profile_data,
                "metadata": {
                    "generated_at": datetime.now().isoformat(),
                    "source": "pdf_resume"
                }
            }

            return profile

        except Exception as e:
            logger.error(f"PDF processing error: {str(e)}")
            return {"error": str(e)}

    async def generate_profile(self, history: List[List[str]], api_key: str) -> tuple[Dict[str, Any], Optional[str]]:
        try:
            self._initialize_client(api_key)

            if history:
                content = "\n".join(f"User: {msg[0]}\nAssistant: {msg[1]}" for msg in history)
                source = "conversation"
            elif self.pdf_text:
                content = self.pdf_text
                source = "pdf"
            else:
                raise ValueError("No content available for profile generation")

            completion = await self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": EXTRACTION_PROMPT},
                    {"role": "user", "content": f"Extract profile information from this {source}:\n\n{content}"}
                ],
                temperature=0.3
            )

            response_text = completion.choices[0].message.content.strip()
            profile_data = json.loads(response_text)

            profile = {
                "profile_data": profile_data,
                "metadata": {
                    "generated_at": datetime.now().isoformat(),
                    "source": source
                }
            }

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"profile_{timestamp}.json"
            with open(filename, 'w', encoding='utf-8') as f:
                json.dump(profile, f, indent=2)

            return profile, filename

        except Exception as e:
            logger.error(f"Profile generation error: {str(e)}")
            return {"error": str(e)}, None

def create_gradio_interface():
    builder = ProfileBuilder()

    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
        
        api_key = gr.Textbox(
            label="OpenAI API Key",
            type="password",
            placeholder="Enter your OpenAI API key"
        )

        with gr.Tabs() as tabs:
            with gr.Tab("Upload Resume"):
                gr.Markdown("""
                # Upload Your Resume
                Upload your existing resume in PDF format and let FINN extract your professional profile.
                """)
                pdf_file = gr.File(
                    label="Upload PDF Resume",
                    file_types=[".pdf"]
                )
                process_pdf_btn = gr.Button("Process Resume")

            with gr.Tab("Chat with FINN"):
                gr.Markdown("""
                # Chat with FINN
                Start a conversation with FINN to build your professional profile from scratch.
                """)
                chatbot = gr.Chatbot(
                    label="Conversation",
                    height=400
                )
                with gr.Row():
                    msg = gr.Textbox(
                        label="Message",
                        placeholder="Chat with FINN...",
                        show_label=False
                    )
                    send = gr.Button("Send")

        with gr.Column():
            generate_btn = gr.Button("Generate Profile", variant="primary")
            profile_output = gr.JSON(label="Generated Profile")
            download_btn = gr.File(label="Download Profile")

        async def on_message(message: str, history: List[List[str]], key: str):
            if not message.strip():
                return history, None, None, ""

            result = await builder.process_message(message, history, key)
            
            if "error" in result:
                return history, {"error": result["error"]}, None, message

            new_history = history + [[message, result["response"]]]
            return new_history, None, None, ""

        async def on_pdf_upload(pdf, key):
            if not pdf:
                return {"error": "No PDF file uploaded"}, None
            
            try:
                result = await builder.process_pdf(pdf.name, key)
                if "error" in result:
                    return {"error": result["error"]}, None
                
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                filename = f"profile_{timestamp}.json"
                with open(filename, 'w', encoding='utf-8') as f:
                    json.dump(result, f, indent=2)
                
                return result["profile_data"], filename
            except Exception as e:
                return {"error": str(e)}, None

        async def on_generate(history: List[List[str]], key: str):
            profile, filename = await builder.generate_profile(history, key)
            if "error" in profile:
                return {"error": profile["error"]}, None
            return profile["profile_data"], filename

        msg.submit(
            on_message,
            inputs=[msg, chatbot, api_key],
            outputs=[chatbot, profile_output, download_btn, msg]
        )
        
        send.click(
            on_message,
            inputs=[msg, chatbot, api_key],
            outputs=[chatbot, profile_output, download_btn, msg]
        )
        
        process_pdf_btn.click(
            on_pdf_upload,
            inputs=[pdf_file, api_key],
            outputs=[profile_output, download_btn]
        )
        
        generate_btn.click(
            on_generate,
            inputs=[chatbot, api_key],
            outputs=[profile_output, download_btn]
        )

    return demo

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.queue()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )