resumate / tests /test_linkedin_resume.py
gperdrizet's picture
Moved multiple instances of 'pylint: disable=broad-exception-caught' to top of file
f9a80bc verified
raw
history blame
7.5 kB
"""
Unit tests for the context_acquisition module.
"""
import unittest
import tempfile
import os
from unittest.mock import patch, MagicMock
from functions import linkedin_resume as ca
# pylint: disable=protected-access
class TestCleanExtractedText(unittest.TestCase):
"""Test cases for the _clean_extracted_text function."""
def test_normalize_multiple_newlines(self):
"""Test normalization of multiple newlines."""
raw = "Line 1\n\nLine 2\n\n\nLine 3"
expected = "Line 1\nLine 2\nLine 3"
self.assertEqual(ca._clean_extracted_text(raw), expected)
def test_remove_artifacts(self):
"""Test removal of PDF artifacts."""
raw = " 123 \n|---|\nSome text\n"
expected = "Some text"
self.assertEqual(ca._clean_extracted_text(raw), expected)
def test_normalize_spaces(self):
"""Test normalization of multiple spaces."""
raw = "A B C"
expected = "A B C"
self.assertEqual(ca._clean_extracted_text(raw), expected)
def test_empty_string(self):
"""Test handling of empty string."""
self.assertEqual(ca._clean_extracted_text(""), "")
def test_none_input(self):
"""Test handling of None input."""
self.assertEqual(ca._clean_extracted_text(None), "")
class TestStructureResumeText(unittest.TestCase):
"""Test cases for the _structure_resume_text function."""
def test_basic_structure(self):
"""Test basic resume text structuring."""
text = "Contact Info\nJohn Doe\nSummary\nExperienced dev" + \
"\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++"
result = ca._structure_resume_text(text)
self.assertIn("contact_info", result["sections"])
self.assertIn("summary", result["sections"])
self.assertIn("experience", result["sections"])
self.assertIn("education", result["sections"])
self.assertIn("skills", result["sections"])
self.assertGreater(result["word_count"], 0)
self.assertGreaterEqual(result["section_count"], 5)
def test_empty_text(self):
"""Test handling of empty text."""
result = ca._structure_resume_text("")
self.assertEqual(result["sections"], {})
self.assertEqual(result["full_text"], "")
self.assertEqual(result["word_count"], 0)
self.assertEqual(result["section_count"], 0)
def test_contains_required_fields(self):
"""Test that result contains all required fields."""
text = "Some basic text"
result = ca._structure_resume_text(text)
required_fields = ["sections", "full_text", "llm_formatted", "summary",
"format", "word_count", "section_count"]
for field in required_fields:
self.assertIn(field, result)
class TestFormatForLLM(unittest.TestCase):
"""Test cases for the _format_for_llm function."""
def test_section_formatting(self):
"""Test proper formatting of sections for LLM."""
sections = {
"summary": "A summary.",
"contact_info": "Contact details.",
"experience": "Work exp.",
"education": "School info.",
"skills": "Python, C++"
}
formatted = ca._format_for_llm(sections)
self.assertIn("[SUMMARY]", formatted)
self.assertIn("[CONTACT INFO]", formatted)
self.assertIn("[EXPERIENCE]", formatted)
self.assertIn("[EDUCATION]", formatted)
self.assertIn("[SKILLS]", formatted)
self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
self.assertTrue(formatted.endswith("=== END RESUME ==="))
def test_empty_sections(self):
"""Test handling of empty sections."""
sections = {}
formatted = ca._format_for_llm(sections)
self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
self.assertTrue(formatted.endswith("=== END RESUME ==="))
class TestGetLLMContextFromResume(unittest.TestCase):
"""Test cases for the get_llm_context_from_resume function."""
def test_success_with_llm_formatted(self):
"""Test successful extraction with LLM formatted text."""
extraction_result = {
"status": "success",
"structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"}
}
result = ca.get_llm_context_from_resume(extraction_result)
self.assertEqual(result, "LLM text")
def test_fallback_to_full_text(self):
"""Test fallback to full text when LLM formatted not available."""
extraction_result = {
"status": "success",
"structured_text": {"full_text": "Full text"}
}
result = ca.get_llm_context_from_resume(extraction_result)
self.assertEqual(result, "Full text")
def test_error_status(self):
"""Test handling of error status."""
extraction_result = {"status": "error"}
result = ca.get_llm_context_from_resume(extraction_result)
self.assertEqual(result, "")
def test_missing_structured_text(self):
"""Test handling of missing structured_text."""
extraction_result = {"status": "success"}
result = ca.get_llm_context_from_resume(extraction_result)
self.assertEqual(result, "")
class TestExtractTextFromLinkedInPDF(unittest.TestCase):
"""Test cases for the extract_text_from_linkedin_pdf function."""
def test_none_input(self):
"""Test handling of None input."""
result = ca.extract_text_from_linkedin_pdf(None)
self.assertEqual(result["status"], "error")
self.assertIn("No PDF file provided", result["message"])
@patch('PyPDF2.PdfReader')
@patch('builtins.open')
def test_successful_extraction(self, mock_open, mock_pdf_reader):
"""Test successful PDF text extraction with mocked PyPDF2."""
# Create a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp_path = tmp.name
try:
# Mock file reading
mock_file = MagicMock()
mock_file.read.return_value = b"fake pdf content"
mock_open.return_value.__enter__.return_value = mock_file
# Mock PDF reader and page
mock_page = MagicMock()
mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \
"\nDeveloper\nExperience\nCompany X"
mock_reader_instance = MagicMock()
mock_reader_instance.pages = [mock_page]
mock_pdf_reader.return_value = mock_reader_instance
# Test the function
result = ca.extract_text_from_linkedin_pdf(tmp_path)
self.assertEqual(result["status"], "success")
self.assertIn("structured_text", result)
self.assertIn("metadata", result)
self.assertIn("contact_info", result["structured_text"]["sections"])
finally:
# Clean up
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_nonexistent_file(self):
"""Test handling of non-existent file."""
result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf")
self.assertEqual(result["status"], "error")
self.assertIn("Failed to extract text from PDF", result["message"])
if __name__ == '__main__':
unittest.main()