jzou19950715 commited on
Commit
2091cb6
·
verified ·
1 Parent(s): 7f04463

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -60
app.py CHANGED
@@ -4,6 +4,8 @@ from datetime import datetime
4
  from typing import Dict, List, Optional, Any
5
  import gradio as gr
6
  from openai import AsyncOpenAI
 
 
7
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
@@ -27,8 +29,9 @@ but respect their boundaries. Once you believe you have gathered sufficient info
27
  have nothing more to share), let them know they can click 'Generate Profile' to proceed.
28
  """
29
 
30
- EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object. Do not include any explanatory text before or after the JSON.
31
-
 
32
  Return the data in this exact structure:
33
  {
34
  "work_history_experience": {
@@ -132,6 +135,16 @@ Return the data in this exact structure:
132
  }
133
 
134
  IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
 
 
 
 
 
 
 
 
 
 
135
 
136
  class ProfileBuilder:
137
  def __init__(self):
@@ -142,77 +155,56 @@ class ProfileBuilder:
142
  if not api_key.startswith("sk-"):
143
  raise ValueError("Invalid API key format")
144
  self.client = AsyncOpenAI(api_key=api_key)
145
-
146
- async def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
147
- try:
148
- if not self.client:
149
- self._initialize_client(api_key)
150
-
151
- self.conversation_history.append({"role": "user", "content": message})
152
-
153
- completion = await self.client.chat.completions.create(
154
- model="gpt-4o-mini",
155
- messages=[
156
- {"role": "system", "content": CONVERSATION_PROMPT},
157
- *self.conversation_history
158
- ],
159
- temperature=0.7
160
- )
161
-
162
- ai_message = completion.choices[0].message.content
163
- self.conversation_history.append({"role": "assistant", "content": ai_message})
164
-
165
- return {"response": ai_message}
166
 
 
 
 
 
 
 
 
 
 
167
  except Exception as e:
168
- logger.error(f"Error processing message: {str(e)}")
169
- return {"error": str(e)}
170
 
171
- async def generate_profile(self) -> Dict[str, Any]:
 
172
  try:
173
  if not self.client:
174
- raise ValueError("OpenAI client not initialized")
175
 
176
- conversation_text = "\n".join(
177
- f"{msg['role']}: {msg['content']}"
178
- for msg in self.conversation_history
179
- )
180
 
 
181
  completion = await self.client.chat.completions.create(
182
  model="gpt-4o-mini",
183
  messages=[
184
  {"role": "system", "content": EXTRACTION_PROMPT},
185
- {"role": "user", "content": f"Extract profile information from this conversation:\n\n{conversation_text}"}
186
  ],
187
  temperature=0.3
188
  )
189
 
190
- # Clean and parse the JSON response
191
  response_text = completion.choices[0].message.content.strip()
192
  profile_data = json.loads(response_text)
193
 
194
- profile = {
195
  "profile_data": profile_data,
196
  "metadata": {
197
  "generated_at": datetime.now().isoformat(),
198
- "conversation_length": len(self.conversation_history)
199
  }
200
  }
201
 
202
- # Save to file
203
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
204
- filename = f"profile_{timestamp}.json"
205
- with open(filename, 'w', encoding='utf-8') as f:
206
- json.dump(profile, f, indent=2)
207
-
208
- return profile, filename
209
-
210
- except json.JSONDecodeError as e:
211
- logger.error(f"JSON parsing error: {str(e)}\nRaw output: {response_text}")
212
- return {"error": "Failed to parse profile data"}, None
213
  except Exception as e:
214
- logger.error(f"Error generating profile: {str(e)}")
215
- return {"error": str(e)}, None
 
 
216
 
217
  def create_gradio_interface():
218
  builder = ProfileBuilder()
@@ -220,16 +212,30 @@ def create_gradio_interface():
220
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
221
  gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
222
 
223
- with gr.Row():
224
- with gr.Column(scale=2):
225
- api_key = gr.Textbox(
226
- label="OpenAI API Key",
227
- type="password",
228
- placeholder="Enter your OpenAI API key"
 
 
 
 
 
 
 
 
 
229
  )
230
-
 
 
 
 
 
 
231
  chatbot = gr.Chatbot(label="Conversation")
232
-
233
  with gr.Row():
234
  msg = gr.Textbox(
235
  label="Message",
@@ -237,11 +243,31 @@ def create_gradio_interface():
237
  )
238
  send = gr.Button("Send")
239
 
240
- with gr.Column(scale=1):
241
- generate_btn = gr.Button("Generate Profile")
242
- profile_output = gr.JSON(label="Generated Profile")
243
- download_btn = gr.File(label="Download Profile")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
 
245
  async def on_message(message: str, history: List[List[str]], key: str):
246
  if not message.strip():
247
  return history, None
@@ -260,6 +286,7 @@ def create_gradio_interface():
260
  return profile, None
261
  return profile["profile_data"], filename
262
 
 
263
  msg.submit(
264
  on_message,
265
  inputs=[msg, chatbot, api_key],
@@ -272,6 +299,12 @@ def create_gradio_interface():
272
  outputs=[chatbot, profile_output]
273
  ).then(lambda: "", None, msg)
274
 
 
 
 
 
 
 
275
  generate_btn.click(
276
  on_generate,
277
  outputs=[profile_output, download_btn]
 
4
  from typing import Dict, List, Optional, Any
5
  import gradio as gr
6
  from openai import AsyncOpenAI
7
+ import PyPDF2
8
+ import io
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
 
29
  have nothing more to share), let them know they can click 'Generate Profile' to proceed.
30
  """
31
 
32
+ EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object.
33
+ Proactively determine how to fill the json schema using limited information provided.
34
+ Do not include any explanatory text before or after the JSON.
35
  Return the data in this exact structure:
36
  {
37
  "work_history_experience": {
 
135
  }
136
 
137
  IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
138
+ import json
139
+ import logging
140
+ from datetime import datetime
141
+ from typing import Dict, List, Optional, Any
142
+ import gradio as gr
143
+ from openai import AsyncOpenAI
144
+ import PyPDF2
145
+ import io
146
+
147
+ # ... (previous imports and prompts remain the same)
148
 
149
  class ProfileBuilder:
150
  def __init__(self):
 
155
  if not api_key.startswith("sk-"):
156
  raise ValueError("Invalid API key format")
157
  self.client = AsyncOpenAI(api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ async def extract_from_pdf(self, pdf_content: bytes) -> str:
160
+ """Extract text from PDF file"""
161
+ try:
162
+ pdf_file = io.BytesIO(pdf_content)
163
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
164
+ text = ""
165
+ for page in pdf_reader.pages:
166
+ text += page.extract_text()
167
+ return text
168
  except Exception as e:
169
+ logger.error(f"Error extracting PDF: {str(e)}")
170
+ raise
171
 
172
+ async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]:
173
+ """Process PDF resume and extract information"""
174
  try:
175
  if not self.client:
176
+ self._initialize_client(api_key)
177
 
178
+ with open(pdf_path, 'rb') as file:
179
+ pdf_content = file.read()
180
+ resume_text = await self.extract_from_pdf(pdf_content)
 
181
 
182
+ # Use the extraction prompt directly on PDF content
183
  completion = await self.client.chat.completions.create(
184
  model="gpt-4o-mini",
185
  messages=[
186
  {"role": "system", "content": EXTRACTION_PROMPT},
187
+ {"role": "user", "content": f"Extract profile information from this resume:\n\n{resume_text}"}
188
  ],
189
  temperature=0.3
190
  )
191
 
 
192
  response_text = completion.choices[0].message.content.strip()
193
  profile_data = json.loads(response_text)
194
 
195
+ return {
196
  "profile_data": profile_data,
197
  "metadata": {
198
  "generated_at": datetime.now().isoformat(),
199
+ "source": "pdf_resume"
200
  }
201
  }
202
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
+ logger.error(f"Error processing PDF: {str(e)}")
205
+ return {"error": str(e)}
206
+
207
+ # ... (rest of the ProfileBuilder class remains the same)
208
 
209
  def create_gradio_interface():
210
  builder = ProfileBuilder()
 
212
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
213
  gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
214
 
215
+ api_key = gr.Textbox(
216
+ label="OpenAI API Key",
217
+ type="password",
218
+ placeholder="Enter your OpenAI API key"
219
+ )
220
+
221
+ with gr.Tabs() as tabs:
222
+ with gr.Tab("Upload Resume"):
223
+ upload_text = gr.Markdown("""
224
+ # Upload Your Resume
225
+ Upload your existing resume in PDF format and let LOSS DOG extract your professional profile.
226
+ """)
227
+ pdf_file = gr.File(
228
+ label="Upload PDF Resume",
229
+ file_types=[".pdf"]
230
  )
231
+ process_pdf_btn = gr.Button("Process Resume")
232
+
233
+ with gr.Tab("Chat with AI"):
234
+ chat_text = gr.Markdown("""
235
+ # Chat with LOSS DOG
236
+ Start a conversation with LOSS DOG to build your professional profile from scratch.
237
+ """)
238
  chatbot = gr.Chatbot(label="Conversation")
 
239
  with gr.Row():
240
  msg = gr.Textbox(
241
  label="Message",
 
243
  )
244
  send = gr.Button("Send")
245
 
246
+ with gr.Column():
247
+ generate_btn = gr.Button("Generate Profile")
248
+ profile_output = gr.JSON(label="Generated Profile")
249
+ download_btn = gr.File(label="Download Profile")
250
+
251
+ async def on_pdf_upload(pdf, key):
252
+ if not pdf:
253
+ return {"error": "No PDF file uploaded"}
254
+
255
+ try:
256
+ result = await builder.process_pdf(pdf.name, key)
257
+ if "error" in result:
258
+ return {"error": result["error"]}, None
259
+
260
+ # Save profile
261
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
262
+ filename = f"profile_{timestamp}.json"
263
+ with open(filename, 'w', encoding='utf-8') as f:
264
+ json.dump(result, f, indent=2)
265
+
266
+ return result["profile_data"], filename
267
+ except Exception as e:
268
+ return {"error": str(e)}, None
269
 
270
+ # Event handlers remain the same for chat functionality
271
  async def on_message(message: str, history: List[List[str]], key: str):
272
  if not message.strip():
273
  return history, None
 
286
  return profile, None
287
  return profile["profile_data"], filename
288
 
289
+ # Bind events
290
  msg.submit(
291
  on_message,
292
  inputs=[msg, chatbot, api_key],
 
299
  outputs=[chatbot, profile_output]
300
  ).then(lambda: "", None, msg)
301
 
302
+ process_pdf_btn.click(
303
+ on_pdf_upload,
304
+ inputs=[pdf_file, api_key],
305
+ outputs=[profile_output, download_btn]
306
+ )
307
+
308
  generate_btn.click(
309
  on_generate,
310
  outputs=[profile_output, download_btn]