Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

gperdrizet commited on Jul 9

Commit

1084ca5

1 Parent(s): cbb592a

Updated tests for new PDF parsing function.

Browse files

Files changed (2) hide show

functions/context_acquisition.py +0 -4
tests/test_context_acquisition.py +193 -0

functions/context_acquisition.py CHANGED Viewed

@@ -136,10 +136,6 @@ def _structure_resume_text(text: str) -> dict:
         "education": r"(?i)(education|academic|university|college|school)",
         "skills": r"(?i)(skills|competencies|technologies|technical)",
         "certifications": r"(?i)(certification|certificate|license)",
-        "projects": r"(?i)(project|portfolio)",
-        "achievements": r"(?i)(achievement|award|honor|recognition)",
-        "languages": r"(?i)(language|linguistic)",
-        "volunteer": r"(?i)(volunteer|community|charity)"
     }
     # Split text into lines for processing

         "education": r"(?i)(education|academic|university|college|school)",
         "skills": r"(?i)(skills|competencies|technologies|technical)",
         "certifications": r"(?i)(certification|certificate|license)",
     }
     # Split text into lines for processing

tests/test_context_acquisition.py CHANGED Viewed

@@ -1,3 +1,196 @@
 """
 Unit tests for the context_acquisition module.
 """

 """
 Unit tests for the context_acquisition module.
 """
+import unittest
+import tempfile
+import os
+from unittest.mock import patch, MagicMock
+from functions import context_acquisition as ca
+class TestCleanExtractedText(unittest.TestCase):
+    """Test cases for the _clean_extracted_text function."""
+    def test_normalize_multiple_newlines(self):
+        """Test normalization of multiple newlines."""
+        raw = "Line 1\n\nLine 2\n\n\nLine 3"
+        expected = "Line 1\nLine 2\nLine 3"
+        self.assertEqual(ca._clean_extracted_text(raw), expected)
+    def test_remove_artifacts(self):
+        """Test removal of PDF artifacts."""
+        raw = "  123  \n|---|\nSome text\n"
+        expected = "Some text"
+        self.assertEqual(ca._clean_extracted_text(raw), expected)
+    def test_normalize_spaces(self):
+        """Test normalization of multiple spaces."""
+        raw = "A  B   C"
+        expected = "A B C"
+        self.assertEqual(ca._clean_extracted_text(raw), expected)
+    def test_empty_string(self):
+        """Test handling of empty string."""
+        self.assertEqual(ca._clean_extracted_text(""), "")
+    def test_none_input(self):
+        """Test handling of None input."""
+        self.assertEqual(ca._clean_extracted_text(None), "")
+class TestStructureResumeText(unittest.TestCase):
+    """Test cases for the _structure_resume_text function."""
+    def test_basic_structure(self):
+        """Test basic resume text structuring."""
+        text = "Contact Info\nJohn Doe\nSummary\nExperienced dev\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++"
+        result = ca._structure_resume_text(text)
+        self.assertIn("contact_info", result["sections"])
+        self.assertIn("summary", result["sections"])
+        self.assertIn("experience", result["sections"])
+        self.assertIn("education", result["sections"])
+        self.assertIn("skills", result["sections"])
+        self.assertGreater(result["word_count"], 0)
+        self.assertGreaterEqual(result["section_count"], 5)
+    def test_empty_text(self):
+        """Test handling of empty text."""
+        result = ca._structure_resume_text("")
+        self.assertEqual(result["sections"], {})
+        self.assertEqual(result["full_text"], "")
+        self.assertEqual(result["word_count"], 0)
+        self.assertEqual(result["section_count"], 0)
+    def test_contains_required_fields(self):
+        """Test that result contains all required fields."""
+        text = "Some basic text"
+        result = ca._structure_resume_text(text)
+        required_fields = ["sections", "full_text", "llm_formatted", "summary",
+                          "format", "word_count", "section_count"]
+        for field in required_fields:
+            self.assertIn(field, result)
+class TestFormatForLLM(unittest.TestCase):
+    """Test cases for the _format_for_llm function."""
+    def test_section_formatting(self):
+        """Test proper formatting of sections for LLM."""
+        sections = {
+            "summary": "A summary.",
+            "contact_info": "Contact details.",
+            "experience": "Work exp.",
+            "education": "School info.",
+            "skills": "Python, C++"
+        }
+        full_text = "..."
+        formatted = ca._format_for_llm(sections, full_text)
+        self.assertIn("[SUMMARY]", formatted)
+        self.assertIn("[CONTACT INFO]", formatted)
+        self.assertIn("[EXPERIENCE]", formatted)
+        self.assertIn("[EDUCATION]", formatted)
+        self.assertIn("[SKILLS]", formatted)
+        self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
+        self.assertTrue(formatted.endswith("=== END RESUME ==="))
+    def test_empty_sections(self):
+        """Test handling of empty sections."""
+        sections = {}
+        full_text = "test"
+        formatted = ca._format_for_llm(sections, full_text)
+        self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
+        self.assertTrue(formatted.endswith("=== END RESUME ==="))
+class TestGetLLMContextFromResume(unittest.TestCase):
+    """Test cases for the get_llm_context_from_resume function."""
+    def test_success_with_llm_formatted(self):
+        """Test successful extraction with LLM formatted text."""
+        extraction_result = {
+            "status": "success",
+            "structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"}
+        }
+        result = ca.get_llm_context_from_resume(extraction_result)
+        self.assertEqual(result, "LLM text")
+    def test_fallback_to_full_text(self):
+        """Test fallback to full text when LLM formatted not available."""
+        extraction_result = {
+            "status": "success",
+            "structured_text": {"full_text": "Full text"}
+        }
+        result = ca.get_llm_context_from_resume(extraction_result)
+        self.assertEqual(result, "Full text")
+    def test_error_status(self):
+        """Test handling of error status."""
+        extraction_result = {"status": "error"}
+        result = ca.get_llm_context_from_resume(extraction_result)
+        self.assertEqual(result, "")
+    def test_missing_structured_text(self):
+        """Test handling of missing structured_text."""
+        extraction_result = {"status": "success"}
+        result = ca.get_llm_context_from_resume(extraction_result)
+        self.assertEqual(result, "")
+class TestExtractTextFromLinkedInPDF(unittest.TestCase):
+    """Test cases for the extract_text_from_linkedin_pdf function."""
+    def test_none_input(self):
+        """Test handling of None input."""
+        result = ca.extract_text_from_linkedin_pdf(None)
+        self.assertEqual(result["status"], "error")
+        self.assertIn("No PDF file provided", result["message"])
+    @patch('PyPDF2.PdfReader')
+    @patch('builtins.open')
+    def test_successful_extraction(self, mock_open, mock_pdf_reader):
+        """Test successful PDF text extraction with mocked PyPDF2."""
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+            tmp_path = tmp.name
+        try:
+            # Mock file reading
+            mock_file = MagicMock()
+            mock_file.read.return_value = b"fake pdf content"
+            mock_open.return_value.__enter__.return_value = mock_file
+            # Mock PDF reader and page
+            mock_page = MagicMock()
+            mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary\nDeveloper\nExperience\nCompany X"
+            mock_reader_instance = MagicMock()
+            mock_reader_instance.pages = [mock_page]
+            mock_pdf_reader.return_value = mock_reader_instance
+            # Test the function
+            result = ca.extract_text_from_linkedin_pdf(tmp_path)
+            self.assertEqual(result["status"], "success")
+            self.assertIn("structured_text", result)
+            self.assertIn("metadata", result)
+            self.assertIn("contact_info", result["structured_text"]["sections"])
+        finally:
+            # Clean up
+            if os.path.exists(tmp_path):
+                os.remove(tmp_path)
+    def test_nonexistent_file(self):
+        """Test handling of non-existent file."""
+        result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf")
+        self.assertEqual(result["status"], "error")
+        self.assertIn("Failed to extract text from PDF", result["message"])
+if __name__ == '__main__':
+    unittest.main()