Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

resumate / tests /test_linkedin_resume.py

gperdrizet

Moved multiple instances of 'pylint: disable=broad-exception-caught' to top of file

f9a80bc verified 3 months ago

raw

history blame

7.5 kB

	"""
	Unit tests for the context_acquisition module.
	"""

	import unittest
	import tempfile
	import os
	from unittest.mock import patch, MagicMock
	from functions import linkedin_resume as ca

	# pylint: disable=protected-access


	class TestCleanExtractedText(unittest.TestCase):
	"""Test cases for the _clean_extracted_text function."""

	def test_normalize_multiple_newlines(self):
	"""Test normalization of multiple newlines."""

	raw = "Line 1\n\nLine 2\n\n\nLine 3"
	expected = "Line 1\nLine 2\nLine 3"
	self.assertEqual(ca._clean_extracted_text(raw), expected)

	def test_remove_artifacts(self):
	"""Test removal of PDF artifacts."""

	raw = " 123 \n\|---\|\nSome text\n"
	expected = "Some text"
	self.assertEqual(ca._clean_extracted_text(raw), expected)

	def test_normalize_spaces(self):
	"""Test normalization of multiple spaces."""

	raw = "A B C"
	expected = "A B C"
	self.assertEqual(ca._clean_extracted_text(raw), expected)

	def test_empty_string(self):
	"""Test handling of empty string."""

	self.assertEqual(ca._clean_extracted_text(""), "")

	def test_none_input(self):
	"""Test handling of None input."""

	self.assertEqual(ca._clean_extracted_text(None), "")


	class TestStructureResumeText(unittest.TestCase):
	"""Test cases for the _structure_resume_text function."""

	def test_basic_structure(self):
	"""Test basic resume text structuring."""

	text = "Contact Info\nJohn Doe\nSummary\nExperienced dev" + \
	"\nExperience\nCompany X\nEducation\nMIT\nSkills\nPython, C++"

	result = ca._structure_resume_text(text)

	self.assertIn("contact_info", result["sections"])
	self.assertIn("summary", result["sections"])
	self.assertIn("experience", result["sections"])
	self.assertIn("education", result["sections"])
	self.assertIn("skills", result["sections"])
	self.assertGreater(result["word_count"], 0)
	self.assertGreaterEqual(result["section_count"], 5)

	def test_empty_text(self):
	"""Test handling of empty text."""

	result = ca._structure_resume_text("")
	self.assertEqual(result["sections"], {})
	self.assertEqual(result["full_text"], "")
	self.assertEqual(result["word_count"], 0)
	self.assertEqual(result["section_count"], 0)

	def test_contains_required_fields(self):
	"""Test that result contains all required fields."""

	text = "Some basic text"
	result = ca._structure_resume_text(text)

	required_fields = ["sections", "full_text", "llm_formatted", "summary",
	"format", "word_count", "section_count"]
	for field in required_fields:
	self.assertIn(field, result)


	class TestFormatForLLM(unittest.TestCase):
	"""Test cases for the _format_for_llm function."""

	def test_section_formatting(self):
	"""Test proper formatting of sections for LLM."""

	sections = {
	"summary": "A summary.",
	"contact_info": "Contact details.",
	"experience": "Work exp.",
	"education": "School info.",
	"skills": "Python, C++"
	}
	formatted = ca._format_for_llm(sections)

	self.assertIn("[SUMMARY]", formatted)
	self.assertIn("[CONTACT INFO]", formatted)
	self.assertIn("[EXPERIENCE]", formatted)
	self.assertIn("[EDUCATION]", formatted)
	self.assertIn("[SKILLS]", formatted)
	self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
	self.assertTrue(formatted.endswith("=== END RESUME ==="))

	def test_empty_sections(self):
	"""Test handling of empty sections."""

	sections = {}
	formatted = ca._format_for_llm(sections)

	self.assertTrue(formatted.startswith("=== RESUME CONTENT ==="))
	self.assertTrue(formatted.endswith("=== END RESUME ==="))


	class TestGetLLMContextFromResume(unittest.TestCase):
	"""Test cases for the get_llm_context_from_resume function."""

	def test_success_with_llm_formatted(self):
	"""Test successful extraction with LLM formatted text."""

	extraction_result = {
	"status": "success",
	"structured_text": {"llm_formatted": "LLM text", "full_text": "Full text"}
	}
	result = ca.get_llm_context_from_resume(extraction_result)
	self.assertEqual(result, "LLM text")

	def test_fallback_to_full_text(self):
	"""Test fallback to full text when LLM formatted not available."""

	extraction_result = {
	"status": "success",
	"structured_text": {"full_text": "Full text"}
	}
	result = ca.get_llm_context_from_resume(extraction_result)
	self.assertEqual(result, "Full text")

	def test_error_status(self):
	"""Test handling of error status."""

	extraction_result = {"status": "error"}
	result = ca.get_llm_context_from_resume(extraction_result)
	self.assertEqual(result, "")

	def test_missing_structured_text(self):
	"""Test handling of missing structured_text."""

	extraction_result = {"status": "success"}
	result = ca.get_llm_context_from_resume(extraction_result)
	self.assertEqual(result, "")


	class TestExtractTextFromLinkedInPDF(unittest.TestCase):
	"""Test cases for the extract_text_from_linkedin_pdf function."""

	def test_none_input(self):
	"""Test handling of None input."""

	result = ca.extract_text_from_linkedin_pdf(None)
	self.assertEqual(result["status"], "error")
	self.assertIn("No PDF file provided", result["message"])

	@patch('PyPDF2.PdfReader')
	@patch('builtins.open')
	def test_successful_extraction(self, mock_open, mock_pdf_reader):
	"""Test successful PDF text extraction with mocked PyPDF2."""

	# Create a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp_path = tmp.name

	try:
	# Mock file reading
	mock_file = MagicMock()
	mock_file.read.return_value = b"fake pdf content"
	mock_open.return_value.__enter__.return_value = mock_file

	# Mock PDF reader and page
	mock_page = MagicMock()
	mock_page.extract_text.return_value = "Contact Info\nJohn Doe\nSummary" + \
	"\nDeveloper\nExperience\nCompany X"

	mock_reader_instance = MagicMock()
	mock_reader_instance.pages = [mock_page]
	mock_pdf_reader.return_value = mock_reader_instance

	# Test the function
	result = ca.extract_text_from_linkedin_pdf(tmp_path)

	self.assertEqual(result["status"], "success")
	self.assertIn("structured_text", result)
	self.assertIn("metadata", result)
	self.assertIn("contact_info", result["structured_text"]["sections"])

	finally:
	# Clean up
	if os.path.exists(tmp_path):
	os.remove(tmp_path)

	def test_nonexistent_file(self):
	"""Test handling of non-existent file."""

	result = ca.extract_text_from_linkedin_pdf("/nonexistent/path.pdf")
	self.assertEqual(result["status"], "error")
	self.assertIn("Failed to extract text from PDF", result["message"])


	if __name__ == '__main__':
	unittest.main()