Spaces:

jzou19950715
/

Newco_information_extraction_agent

Sleeping

App Files Files Community

jzou19950715 commited on Feb 5

Commit

2091cb6

verified ·

1 Parent(s): 7f04463

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -60

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ from datetime import datetime
 from typing import Dict, List, Optional, Any
 import gradio as gr
 from openai import AsyncOpenAI
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -27,8 +29,9 @@ but respect their boundaries. Once you believe you have gathered sufficient info
 have nothing more to share), let them know they can click 'Generate Profile' to proceed.
 """
-EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object. Do not include any explanatory text before or after the JSON.
 Return the data in this exact structure:
 {
     "work_history_experience": {
@@ -132,6 +135,16 @@ Return the data in this exact structure:
 }
 IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
 class ProfileBuilder:
     def __init__(self):
@@ -142,77 +155,56 @@ class ProfileBuilder:
         if not api_key.startswith("sk-"):
             raise ValueError("Invalid API key format")
         self.client = AsyncOpenAI(api_key=api_key)
-    async def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
-        try:
-            if not self.client:
-                self._initialize_client(api_key)
-            self.conversation_history.append({"role": "user", "content": message})
-            completion = await self.client.chat.completions.create(
-                model="gpt-4o-mini",
-                messages=[
-                    {"role": "system", "content": CONVERSATION_PROMPT},
-                    *self.conversation_history
-                ],
-                temperature=0.7
-            )
-            ai_message = completion.choices[0].message.content
-            self.conversation_history.append({"role": "assistant", "content": ai_message})
-            return {"response": ai_message}
         except Exception as e:
-            logger.error(f"Error processing message: {str(e)}")
-            return {"error": str(e)}
-    async def generate_profile(self) -> Dict[str, Any]:
         try:
             if not self.client:
-                raise ValueError("OpenAI client not initialized")
-            conversation_text = "\n".join(
-                f"{msg['role']}: {msg['content']}"
-                for msg in self.conversation_history
-            )
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                     {"role": "system", "content": EXTRACTION_PROMPT},
-                    {"role": "user", "content": f"Extract profile information from this conversation:\n\n{conversation_text}"}
                 ],
                 temperature=0.3
             )
-            # Clean and parse the JSON response
             response_text = completion.choices[0].message.content.strip()
             profile_data = json.loads(response_text)
-            profile = {
                 "profile_data": profile_data,
                 "metadata": {
                     "generated_at": datetime.now().isoformat(),
-                    "conversation_length": len(self.conversation_history)
                 }
             }
-            # Save to file
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            filename = f"profile_{timestamp}.json"
-            with open(filename, 'w', encoding='utf-8') as f:
-                json.dump(profile, f, indent=2)
-            return profile, filename
-        except json.JSONDecodeError as e:
-            logger.error(f"JSON parsing error: {str(e)}\nRaw output: {response_text}")
-            return {"error": "Failed to parse profile data"}, None
         except Exception as e:
-            logger.error(f"Error generating profile: {str(e)}")
-            return {"error": str(e)}, None
 def create_gradio_interface():
     builder = ProfileBuilder()
@@ -220,16 +212,30 @@ def create_gradio_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
-        with gr.Row():
-            with gr.Column(scale=2):
-                api_key = gr.Textbox(
-                    label="OpenAI API Key",
-                    type="password",
-                    placeholder="Enter your OpenAI API key"
                 )
                 chatbot = gr.Chatbot(label="Conversation")
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Message",
@@ -237,11 +243,31 @@ def create_gradio_interface():
                     )
                     send = gr.Button("Send")
-            with gr.Column(scale=1):
-                generate_btn = gr.Button("Generate Profile")
-                profile_output = gr.JSON(label="Generated Profile")
-                download_btn = gr.File(label="Download Profile")
         async def on_message(message: str, history: List[List[str]], key: str):
             if not message.strip():
                 return history, None
@@ -260,6 +286,7 @@ def create_gradio_interface():
                 return profile, None
             return profile["profile_data"], filename
         msg.submit(
             on_message,
             inputs=[msg, chatbot, api_key],
@@ -272,6 +299,12 @@ def create_gradio_interface():
             outputs=[chatbot, profile_output]
         ).then(lambda: "", None, msg)
         generate_btn.click(
             on_generate,
             outputs=[profile_output, download_btn]

 from typing import Dict, List, Optional, Any
 import gradio as gr
 from openai import AsyncOpenAI
+import PyPDF2
+import io
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 have nothing more to share), let them know they can click 'Generate Profile' to proceed.
 """
+EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object.
+Proactively determine how to fill the json schema using limited information provided.
+Do not include any explanatory text before or after the JSON.
 Return the data in this exact structure:
 {
     "work_history_experience": {
 }
 IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
+import json
+import logging
+from datetime import datetime
+from typing import Dict, List, Optional, Any
+import gradio as gr
+from openai import AsyncOpenAI
+import PyPDF2
+import io
+# ... (previous imports and prompts remain the same)
 class ProfileBuilder:
     def __init__(self):
         if not api_key.startswith("sk-"):
             raise ValueError("Invalid API key format")
         self.client = AsyncOpenAI(api_key=api_key)
+    async def extract_from_pdf(self, pdf_content: bytes) -> str:
+        """Extract text from PDF file"""
+        try:
+            pdf_file = io.BytesIO(pdf_content)
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+            return text
         except Exception as e:
+            logger.error(f"Error extracting PDF: {str(e)}")
+            raise
+    async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]:
+        """Process PDF resume and extract information"""
         try:
             if not self.client:
+                self._initialize_client(api_key)
+            with open(pdf_path, 'rb') as file:
+                pdf_content = file.read()
+                resume_text = await self.extract_from_pdf(pdf_content)
+            # Use the extraction prompt directly on PDF content
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                     {"role": "system", "content": EXTRACTION_PROMPT},
+                    {"role": "user", "content": f"Extract profile information from this resume:\n\n{resume_text}"}
                 ],
                 temperature=0.3
             )
             response_text = completion.choices[0].message.content.strip()
             profile_data = json.loads(response_text)
+            return {
                 "profile_data": profile_data,
                 "metadata": {
                     "generated_at": datetime.now().isoformat(),
+                    "source": "pdf_resume"
                 }
             }
         except Exception as e:
+            logger.error(f"Error processing PDF: {str(e)}")
+            return {"error": str(e)}
+    # ... (rest of the ProfileBuilder class remains the same)
 def create_gradio_interface():
     builder = ProfileBuilder()
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
+        api_key = gr.Textbox(
+            label="OpenAI API Key",
+            type="password",
+            placeholder="Enter your OpenAI API key"
+        )
+        with gr.Tabs() as tabs:
+            with gr.Tab("Upload Resume"):
+                upload_text = gr.Markdown("""
+                # Upload Your Resume
+                Upload your existing resume in PDF format and let LOSS DOG extract your professional profile.
+                """)
+                pdf_file = gr.File(
+                    label="Upload PDF Resume",
+                    file_types=[".pdf"]
                 )
+                process_pdf_btn = gr.Button("Process Resume")
+            with gr.Tab("Chat with AI"):
+                chat_text = gr.Markdown("""
+                # Chat with LOSS DOG
+                Start a conversation with LOSS DOG to build your professional profile from scratch.
+                """)
                 chatbot = gr.Chatbot(label="Conversation")
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Message",
                     )
                     send = gr.Button("Send")
+        with gr.Column():
+            generate_btn = gr.Button("Generate Profile")
+            profile_output = gr.JSON(label="Generated Profile")
+            download_btn = gr.File(label="Download Profile")
+        async def on_pdf_upload(pdf, key):
+            if not pdf:
+                return {"error": "No PDF file uploaded"}
+            try:
+                result = await builder.process_pdf(pdf.name, key)
+                if "error" in result:
+                    return {"error": result["error"]}, None
+                # Save profile
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                filename = f"profile_{timestamp}.json"
+                with open(filename, 'w', encoding='utf-8') as f:
+                    json.dump(result, f, indent=2)
+                return result["profile_data"], filename
+            except Exception as e:
+                return {"error": str(e)}, None
+        # Event handlers remain the same for chat functionality
         async def on_message(message: str, history: List[List[str]], key: str):
             if not message.strip():
                 return history, None
                 return profile, None
             return profile["profile_data"], filename
+        # Bind events
         msg.submit(
             on_message,
             inputs=[msg, chatbot, api_key],
             outputs=[chatbot, profile_output]
         ).then(lambda: "", None, msg)
+        process_pdf_btn.click(
+            on_pdf_upload,
+            inputs=[pdf_file, api_key],
+            outputs=[profile_output, download_btn]
+        )
         generate_btn.click(
             on_generate,
             outputs=[profile_output, download_btn]