Spaces:
Configuration error
Configuration error

Moved multiple instances of 'pylint: disable=broad-exception-caught' to top of file
f9a80bc
verified
""" | |
Unit tests for the context_acquisition module. | |
""" | |
import unittest | |
import tempfile | |
import os | |
from unittest.mock import patch, MagicMock | |
from functions import linkedin_resume as ca | |
# pylint: disable=protected-access | |
class TestCleanExtractedText(unittest.TestCase): | |
"""Test cases for the _clean_extracted_text function.""" | |
def test_normalize_multiple_newlines(self): | |
"""Test normalization of multiple newlines.""" | |
raw = "Line 1\n\nLine 2\n\n\nLine 3" | |
expected = "Line 1\nLine 2\nLine 3" | |
self.assertEqual(ca._clean_extracted_text(raw), expected) | |
def test_remove_artifacts(self): | |
"""Test removal of PDF artifacts.""" | |
raw = " 123 \n|---|\nSome text\n" | |
expected = "Some text" | |
self.assertEqual(ca._clean_extracted_text(raw), expected) | |
def test_normalize_spaces(self): | |
"""Test normalization of multiple spaces.""" | |
raw = "A B C" | |
expected = "A B C" | |
self.assertEqual(ca._clean_extracted_text(raw), expected) | |
def test_empty_string(self): | |
"""Test handling of empty string.""" | |
self.assertEqual(ca._clean_extracted_text(""), "") | |
def test_none_input(self): | |
"""Test handling of None input.""" | |
self.assertEqual(ca._clean_extracted_text(None), "") | |
class TestStructureResumeText(unittest.TestCase): | |
"""Test cases for the _structure_resume_text function.""" | |
def test_basic_structure(self): | |
"""Test basic resume text structuring.""" | |
text = "Contact Info\nJohn Doe\nSummary\nExperienced dev" + \ | |
"\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++" | |
result = ca._structure_resume_text(text) | |
self.assertIn("contact_info", result["sections"]) | |
self.assertIn("summary", result["sections"]) | |
self.assertIn("experience", result["sections"]) | |
self.assertIn("education", result["sections"]) | |
self.assertIn("skills", result["sections"]) | |
self.assertGreater(result["word_count"], 0) | |
self.assertGreaterEqual(result["section_count"], 5) | |
def test_empty_text(self): | |
"""Test handling of empty text.""" | |
result = ca._structure_resume_text("") | |
self.assertEqual(result["sections"], {}) | |
self.assertEqual(result["full_text"], "") | |
self.assertEqual(result["word_count"], 0) | |
self.assertEqual(result["section_count"], 0) | |
def test_contains_required_fields(self): | |
"""Test that result contains all required fields.""" | |
text = "Some basic text" | |
result = ca._structure_resume_text(text) | |
required_fields = ["sections", "full_text", "llm_formatted", "summary", | |
"format", "word_count", "section_count"] | |
for field in required_fields: | |
self.assertIn(field, result) | |
class TestFormatForLLM(unittest.TestCase): | |
"""Test cases for the _format_for_llm function.""" | |
def test_section_formatting(self): | |
"""Test proper formatting of sections for LLM.""" | |
sections = { | |
"summary": "A summary.", | |
"contact_info": "Contact details.", | |
"experience": "Work exp.", | |
"education": "School info.", | |
"skills": "Python, C++" | |
} | |
formatted = ca._format_for_llm(sections) | |
self.assertIn("[SUMMARY]", formatted) | |
self.assertIn("[CONTACT INFO]", formatted) | |
self.assertIn("[EXPERIENCE]", formatted) | |
self.assertIn("[EDUCATION]", formatted) | |
self.assertIn("[SKILLS]", formatted) | |
self.assertTrue(formatted.startswith("=== RESUME CONTENT ===")) | |
self.assertTrue(formatted.endswith("=== END RESUME ===")) | |
def test_empty_sections(self): | |
"""Test handling of empty sections.""" | |
sections = {} | |
formatted = ca._format_for_llm(sections) | |
self.assertTrue(formatted.startswith("=== RESUME CONTENT ===")) | |
self.assertTrue(formatted.endswith("=== END RESUME ===")) | |
class TestGetLLMContextFromResume(unittest.TestCase): | |
"""Test cases for the get_llm_context_from_resume function.""" | |
def test_success_with_llm_formatted(self): | |
"""Test successful extraction with LLM formatted text.""" | |
extraction_result = { | |
"status": "success", | |
"structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"} | |
} | |
result = ca.get_llm_context_from_resume(extraction_result) | |
self.assertEqual(result, "LLM text") | |
def test_fallback_to_full_text(self): | |
"""Test fallback to full text when LLM formatted not available.""" | |
extraction_result = { | |
"status": "success", | |
"structured_text": {"full_text": "Full text"} | |
} | |
result = ca.get_llm_context_from_resume(extraction_result) | |
self.assertEqual(result, "Full text") | |
def test_error_status(self): | |
"""Test handling of error status.""" | |
extraction_result = {"status": "error"} | |
result = ca.get_llm_context_from_resume(extraction_result) | |
self.assertEqual(result, "") | |
def test_missing_structured_text(self): | |
"""Test handling of missing structured_text.""" | |
extraction_result = {"status": "success"} | |
result = ca.get_llm_context_from_resume(extraction_result) | |
self.assertEqual(result, "") | |
class TestExtractTextFromLinkedInPDF(unittest.TestCase): | |
"""Test cases for the extract_text_from_linkedin_pdf function.""" | |
def test_none_input(self): | |
"""Test handling of None input.""" | |
result = ca.extract_text_from_linkedin_pdf(None) | |
self.assertEqual(result["status"], "error") | |
self.assertIn("No PDF file provided", result["message"]) | |
def test_successful_extraction(self, mock_open, mock_pdf_reader): | |
"""Test successful PDF text extraction with mocked PyPDF2.""" | |
# Create a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
tmp_path = tmp.name | |
try: | |
# Mock file reading | |
mock_file = MagicMock() | |
mock_file.read.return_value = b"fake pdf content" | |
mock_open.return_value.__enter__.return_value = mock_file | |
# Mock PDF reader and page | |
mock_page = MagicMock() | |
mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \ | |
"\nDeveloper\nExperience\nCompany X" | |
mock_reader_instance = MagicMock() | |
mock_reader_instance.pages = [mock_page] | |
mock_pdf_reader.return_value = mock_reader_instance | |
# Test the function | |
result = ca.extract_text_from_linkedin_pdf(tmp_path) | |
self.assertEqual(result["status"], "success") | |
self.assertIn("structured_text", result) | |
self.assertIn("metadata", result) | |
self.assertIn("contact_info", result["structured_text"]["sections"]) | |
finally: | |
# Clean up | |
if os.path.exists(tmp_path): | |
os.remove(tmp_path) | |
def test_nonexistent_file(self): | |
"""Test handling of non-existent file.""" | |
result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf") | |
self.assertEqual(result["status"], "error") | |
self.assertIn("Failed to extract text from PDF", result["message"]) | |
if __name__ == '__main__': | |
unittest.main() | |