""" Unit tests for the context_acquisition module. """ import unittest import tempfile import os from unittest.mock import patch, MagicMock from functions import linkedin_resume as ca # pylint: disable=protected-access class TestCleanExtractedText(unittest.TestCase): """Test cases for the _clean_extracted_text function.""" def test_normalize_multiple_newlines(self): """Test normalization of multiple newlines.""" raw = "Line 1\n\nLine 2\n\n\nLine 3" expected = "Line 1\nLine 2\nLine 3" self.assertEqual(ca._clean_extracted_text(raw), expected) def test_remove_artifacts(self): """Test removal of PDF artifacts.""" raw = " 123 \n|---|\nSome text\n" expected = "Some text" self.assertEqual(ca._clean_extracted_text(raw), expected) def test_normalize_spaces(self): """Test normalization of multiple spaces.""" raw = "A B C" expected = "A B C" self.assertEqual(ca._clean_extracted_text(raw), expected) def test_empty_string(self): """Test handling of empty string.""" self.assertEqual(ca._clean_extracted_text(""), "") def test_none_input(self): """Test handling of None input.""" self.assertEqual(ca._clean_extracted_text(None), "") class TestStructureResumeText(unittest.TestCase): """Test cases for the _structure_resume_text function.""" def test_basic_structure(self): """Test basic resume text structuring.""" text = "Contact Info\nJohn Doe\nSummary\nExperienced dev" + \ "\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++" result = ca._structure_resume_text(text) self.assertIn("contact_info", result["sections"]) self.assertIn("summary", result["sections"]) self.assertIn("experience", result["sections"]) self.assertIn("education", result["sections"]) self.assertIn("skills", result["sections"]) self.assertGreater(result["word_count"], 0) self.assertGreaterEqual(result["section_count"], 5) def test_empty_text(self): """Test handling of empty text.""" result = ca._structure_resume_text("") self.assertEqual(result["sections"], {}) self.assertEqual(result["full_text"], "") self.assertEqual(result["word_count"], 0) self.assertEqual(result["section_count"], 0) def test_contains_required_fields(self): """Test that result contains all required fields.""" text = "Some basic text" result = ca._structure_resume_text(text) required_fields = ["sections", "full_text", "llm_formatted", "summary", "format", "word_count", "section_count"] for field in required_fields: self.assertIn(field, result) class TestFormatForLLM(unittest.TestCase): """Test cases for the _format_for_llm function.""" def test_section_formatting(self): """Test proper formatting of sections for LLM.""" sections = { "summary": "A summary.", "contact_info": "Contact details.", "experience": "Work exp.", "education": "School info.", "skills": "Python, C++" } formatted = ca._format_for_llm(sections) self.assertIn("[SUMMARY]", formatted) self.assertIn("[CONTACT INFO]", formatted) self.assertIn("[EXPERIENCE]", formatted) self.assertIn("[EDUCATION]", formatted) self.assertIn("[SKILLS]", formatted) self.assertTrue(formatted.startswith("=== RESUME CONTENT ===")) self.assertTrue(formatted.endswith("=== END RESUME ===")) def test_empty_sections(self): """Test handling of empty sections.""" sections = {} formatted = ca._format_for_llm(sections) self.assertTrue(formatted.startswith("=== RESUME CONTENT ===")) self.assertTrue(formatted.endswith("=== END RESUME ===")) class TestGetLLMContextFromResume(unittest.TestCase): """Test cases for the get_llm_context_from_resume function.""" def test_success_with_llm_formatted(self): """Test successful extraction with LLM formatted text.""" extraction_result = { "status": "success", "structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"} } result = ca.get_llm_context_from_resume(extraction_result) self.assertEqual(result, "LLM text") def test_fallback_to_full_text(self): """Test fallback to full text when LLM formatted not available.""" extraction_result = { "status": "success", "structured_text": {"full_text": "Full text"} } result = ca.get_llm_context_from_resume(extraction_result) self.assertEqual(result, "Full text") def test_error_status(self): """Test handling of error status.""" extraction_result = {"status": "error"} result = ca.get_llm_context_from_resume(extraction_result) self.assertEqual(result, "") def test_missing_structured_text(self): """Test handling of missing structured_text.""" extraction_result = {"status": "success"} result = ca.get_llm_context_from_resume(extraction_result) self.assertEqual(result, "") class TestExtractTextFromLinkedInPDF(unittest.TestCase): """Test cases for the extract_text_from_linkedin_pdf function.""" def test_none_input(self): """Test handling of None input.""" result = ca.extract_text_from_linkedin_pdf(None) self.assertEqual(result["status"], "error") self.assertIn("No PDF file provided", result["message"]) @patch('PyPDF2.PdfReader') @patch('builtins.open') def test_successful_extraction(self, mock_open, mock_pdf_reader): """Test successful PDF text extraction with mocked PyPDF2.""" # Create a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp_path = tmp.name try: # Mock file reading mock_file = MagicMock() mock_file.read.return_value = b"fake pdf content" mock_open.return_value.__enter__.return_value = mock_file # Mock PDF reader and page mock_page = MagicMock() mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \ "\nDeveloper\nExperience\nCompany X" mock_reader_instance = MagicMock() mock_reader_instance.pages = [mock_page] mock_pdf_reader.return_value = mock_reader_instance # Test the function result = ca.extract_text_from_linkedin_pdf(tmp_path) self.assertEqual(result["status"], "success") self.assertIn("structured_text", result) self.assertIn("metadata", result) self.assertIn("contact_info", result["structured_text"]["sections"]) finally: # Clean up if os.path.exists(tmp_path): os.remove(tmp_path) def test_nonexistent_file(self): """Test handling of non-existent file.""" result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf") self.assertEqual(result["status"], "error") self.assertIn("Failed to extract text from PDF", result["message"]) if __name__ == '__main__': unittest.main()