Spaces:
Configuration error
Configuration error
Moved multiple instances of 'pylint: disable=broad-exception-caught' to top of file
f9a80bc
verified
| """ | |
| Unit tests for the context_acquisition module. | |
| """ | |
| import unittest | |
| import tempfile | |
| import os | |
| from unittest.mock import patch, MagicMock | |
| from functions import linkedin_resume as ca | |
| # pylint: disable=protected-access | |
| class TestCleanExtractedText(unittest.TestCase): | |
| """Test cases for the _clean_extracted_text function.""" | |
| def test_normalize_multiple_newlines(self): | |
| """Test normalization of multiple newlines.""" | |
| raw = "Line 1\n\nLine 2\n\n\nLine 3" | |
| expected = "Line 1\nLine 2\nLine 3" | |
| self.assertEqual(ca._clean_extracted_text(raw), expected) | |
| def test_remove_artifacts(self): | |
| """Test removal of PDF artifacts.""" | |
| raw = " 123 \n|---|\nSome text\n" | |
| expected = "Some text" | |
| self.assertEqual(ca._clean_extracted_text(raw), expected) | |
| def test_normalize_spaces(self): | |
| """Test normalization of multiple spaces.""" | |
| raw = "A B C" | |
| expected = "A B C" | |
| self.assertEqual(ca._clean_extracted_text(raw), expected) | |
| def test_empty_string(self): | |
| """Test handling of empty string.""" | |
| self.assertEqual(ca._clean_extracted_text(""), "") | |
| def test_none_input(self): | |
| """Test handling of None input.""" | |
| self.assertEqual(ca._clean_extracted_text(None), "") | |
| class TestStructureResumeText(unittest.TestCase): | |
| """Test cases for the _structure_resume_text function.""" | |
| def test_basic_structure(self): | |
| """Test basic resume text structuring.""" | |
| text = "Contact Info\nJohn Doe\nSummary\nExperienced dev" + \ | |
| "\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++" | |
| result = ca._structure_resume_text(text) | |
| self.assertIn("contact_info", result["sections"]) | |
| self.assertIn("summary", result["sections"]) | |
| self.assertIn("experience", result["sections"]) | |
| self.assertIn("education", result["sections"]) | |
| self.assertIn("skills", result["sections"]) | |
| self.assertGreater(result["word_count"], 0) | |
| self.assertGreaterEqual(result["section_count"], 5) | |
| def test_empty_text(self): | |
| """Test handling of empty text.""" | |
| result = ca._structure_resume_text("") | |
| self.assertEqual(result["sections"], {}) | |
| self.assertEqual(result["full_text"], "") | |
| self.assertEqual(result["word_count"], 0) | |
| self.assertEqual(result["section_count"], 0) | |
| def test_contains_required_fields(self): | |
| """Test that result contains all required fields.""" | |
| text = "Some basic text" | |
| result = ca._structure_resume_text(text) | |
| required_fields = ["sections", "full_text", "llm_formatted", "summary", | |
| "format", "word_count", "section_count"] | |
| for field in required_fields: | |
| self.assertIn(field, result) | |
| class TestFormatForLLM(unittest.TestCase): | |
| """Test cases for the _format_for_llm function.""" | |
| def test_section_formatting(self): | |
| """Test proper formatting of sections for LLM.""" | |
| sections = { | |
| "summary": "A summary.", | |
| "contact_info": "Contact details.", | |
| "experience": "Work exp.", | |
| "education": "School info.", | |
| "skills": "Python, C++" | |
| } | |
| formatted = ca._format_for_llm(sections) | |
| self.assertIn("[SUMMARY]", formatted) | |
| self.assertIn("[CONTACT INFO]", formatted) | |
| self.assertIn("[EXPERIENCE]", formatted) | |
| self.assertIn("[EDUCATION]", formatted) | |
| self.assertIn("[SKILLS]", formatted) | |
| self.assertTrue(formatted.startswith("=== RESUME CONTENT ===")) | |
| self.assertTrue(formatted.endswith("=== END RESUME ===")) | |
| def test_empty_sections(self): | |
| """Test handling of empty sections.""" | |
| sections = {} | |
| formatted = ca._format_for_llm(sections) | |
| self.assertTrue(formatted.startswith("=== RESUME CONTENT ===")) | |
| self.assertTrue(formatted.endswith("=== END RESUME ===")) | |
| class TestGetLLMContextFromResume(unittest.TestCase): | |
| """Test cases for the get_llm_context_from_resume function.""" | |
| def test_success_with_llm_formatted(self): | |
| """Test successful extraction with LLM formatted text.""" | |
| extraction_result = { | |
| "status": "success", | |
| "structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"} | |
| } | |
| result = ca.get_llm_context_from_resume(extraction_result) | |
| self.assertEqual(result, "LLM text") | |
| def test_fallback_to_full_text(self): | |
| """Test fallback to full text when LLM formatted not available.""" | |
| extraction_result = { | |
| "status": "success", | |
| "structured_text": {"full_text": "Full text"} | |
| } | |
| result = ca.get_llm_context_from_resume(extraction_result) | |
| self.assertEqual(result, "Full text") | |
| def test_error_status(self): | |
| """Test handling of error status.""" | |
| extraction_result = {"status": "error"} | |
| result = ca.get_llm_context_from_resume(extraction_result) | |
| self.assertEqual(result, "") | |
| def test_missing_structured_text(self): | |
| """Test handling of missing structured_text.""" | |
| extraction_result = {"status": "success"} | |
| result = ca.get_llm_context_from_resume(extraction_result) | |
| self.assertEqual(result, "") | |
| class TestExtractTextFromLinkedInPDF(unittest.TestCase): | |
| """Test cases for the extract_text_from_linkedin_pdf function.""" | |
| def test_none_input(self): | |
| """Test handling of None input.""" | |
| result = ca.extract_text_from_linkedin_pdf(None) | |
| self.assertEqual(result["status"], "error") | |
| self.assertIn("No PDF file provided", result["message"]) | |
| def test_successful_extraction(self, mock_open, mock_pdf_reader): | |
| """Test successful PDF text extraction with mocked PyPDF2.""" | |
| # Create a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp_path = tmp.name | |
| try: | |
| # Mock file reading | |
| mock_file = MagicMock() | |
| mock_file.read.return_value = b"fake pdf content" | |
| mock_open.return_value.__enter__.return_value = mock_file | |
| # Mock PDF reader and page | |
| mock_page = MagicMock() | |
| mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \ | |
| "\nDeveloper\nExperience\nCompany X" | |
| mock_reader_instance = MagicMock() | |
| mock_reader_instance.pages = [mock_page] | |
| mock_pdf_reader.return_value = mock_reader_instance | |
| # Test the function | |
| result = ca.extract_text_from_linkedin_pdf(tmp_path) | |
| self.assertEqual(result["status"], "success") | |
| self.assertIn("structured_text", result) | |
| self.assertIn("metadata", result) | |
| self.assertIn("contact_info", result["structured_text"]["sections"]) | |
| finally: | |
| # Clean up | |
| if os.path.exists(tmp_path): | |
| os.remove(tmp_path) | |
| def test_nonexistent_file(self): | |
| """Test handling of non-existent file.""" | |
| result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf") | |
| self.assertEqual(result["status"], "error") | |
| self.assertIn("Failed to extract text from PDF", result["message"]) | |
| if __name__ == '__main__': | |
| unittest.main() | |