Spaces:

gperdrizet
/

resumate

Configuration error

resumate / tests /test_context_acquisition.py

Added unittests for context acquisition functions.

d5a003e verified 3 months ago

7.64 kB

	"""
	Unit tests for the context_acquisition module.
	"""

	import unittest
	import os
	import tempfile
	import shutil
	from selenium.webdriver.chrome.options import Options

	import functions.context_acquisition

	# Import the functions to test
	from functions.context_acquisition import (
	_clean_html_content,
	_save_html_to_file,
	setup_chrome_driver_options
	)


	class TestCleanHTMLContent(unittest.TestCase):
	"""Test cases for the _clean_html_content function."""

	def test_remove_blank_lines(self):
	"""Test removal of blank lines from HTML content."""
	html_with_blanks = """<html>

	<head>
	<title>Test</title>

	</head>

	<body>
	<div>Content</div>

	</body>
	</html>"""

	expected = """<html>
	<head>
	<title>Test</title>
	</head>
	<body>
	<div>Content</div>
	</body>
	</html>"""

	result = _clean_html_content(html_with_blanks)
	self.assertEqual(result, expected)

	def test_strip_trailing_whitespace(self):
	"""Test removal of trailing whitespace from lines."""
	html_with_trailing = "<div>Content</div> \n<p>Text</p>\t\n"
	expected = "<div>Content</div>\n<p>Text</p>"

	result = _clean_html_content(html_with_trailing)
	self.assertEqual(result, expected)

	def test_empty_content(self):
	"""Test handling of empty or whitespace-only content."""
	self.assertEqual(_clean_html_content(""), "")
	self.assertEqual(_clean_html_content(" \n\n\t "), "")
	self.assertEqual(_clean_html_content("\n"), "")

	def test_single_line_content(self):
	"""Test cleaning of single line content."""
	single_line = "<html><body>Content</body></html>"
	result = _clean_html_content(single_line)
	self.assertEqual(result, single_line)

	def test_mixed_whitespace(self):
	"""Test handling of mixed whitespace characters."""
	mixed = "<div>\t\n \n\r\n<p>Text</p>\n \n</div>"
	expected = "<div>\n<p>Text</p>\n</div>"
	result = _clean_html_content(mixed)
	self.assertEqual(result, expected)


	class TestSaveHTMLToFile(unittest.TestCase):
	"""Test cases for the _save_html_to_file function."""

	def setUp(self):
	"""Set up test fixtures with temporary directory."""
	self.test_dir = tempfile.mkdtemp()
	self.test_html = "<html><body>Test content</body></html>"
	self.test_url = "https://www.linkedin.com/in/johndoe"

	def tearDown(self):
	"""Clean up temporary directory."""
	if os.path.exists(self.test_dir):
	shutil.rmtree(self.test_dir)

	def test_successful_file_save(self):
	"""Test successful saving of HTML content to file."""
	# Temporarily change the file path calculation
	original_dirname = os.path.dirname

	def mock_dirname(path):
	if path.endswith('context_acquisition.py'):
	return self.test_dir
	return original_dirname(path)

	# Replace os.path.dirname temporarily
	original_func = functions.context_acquisition.os.path.dirname
	functions.context_acquisition.os.path.dirname = mock_dirname

	try:
	result = _save_html_to_file(self.test_html, self.test_url)

	# Verify file was created
	self.assertTrue(os.path.exists(result))
	self.assertTrue(result.endswith('.html'))

	# Verify file content
	with open(result, 'r', encoding='utf-8') as f:
	content = f.read()
	self.assertEqual(content, self.test_html)

	finally:
	# Restore original function
	functions.context_acquisition.os.path.dirname = original_func


	class TestSetupChromeDriverOptions(unittest.TestCase):
	"""Test cases for the setup_chrome_driver_options function."""

	def test_chrome_options_configuration(self):
	"""Test that Chrome options are properly configured."""
	options = setup_chrome_driver_options()

	# Verify that options object is returned
	self.assertIsNotNone(options)

	# Verify it's the correct type
	self.assertIsInstance(options, Options)

	def test_chrome_options_arguments(self):
	"""Test that required Chrome arguments are set."""
	options = setup_chrome_driver_options()

	# Access the arguments (this is implementation dependent)
	# Note: This test verifies the function runs without error
	# Specific argument verification would require accessing private attributes
	self.assertIsNotNone(options)


	class TestURLValidation(unittest.TestCase):
	"""Test cases for URL validation logic (extracted from main function)."""

	def test_valid_linkedin_urls(self):
	"""Test validation of valid LinkedIn URLs."""
	valid_urls = [
	"https://www.linkedin.com/in/johndoe",
	"https://linkedin.com/in/jane-smith",
	"http://www.linkedin.com/in/test123",
	"https://www.linkedin.com/in/user-name-with-dashes",
	]

	for url in valid_urls:
	# Test the validation logic directly
	self.assertTrue(isinstance(url, str))
	self.assertTrue(url.strip())
	self.assertIn("linkedin.com/in/", url)

	def test_invalid_linkedin_urls(self):
	"""Test validation of invalid LinkedIn URLs."""
	invalid_urls = [
	"",
	None,
	"https://www.example.com/profile",
	"https://www.linkedin.com/company/test",
	"https://github.com/user",
	"not-a-url",
	]

	for url in invalid_urls:
	# Test the validation logic directly
	if url is None or not isinstance(url, str):
	self.assertTrue(url is None or not isinstance(url, str))
	elif not url.strip():
	self.assertFalse(url.strip())
	else:
	self.assertNotIn("linkedin.com/in/", url)


	class TestHTMLContentProcessing(unittest.TestCase):
	"""Test cases for HTML content processing workflows."""

	def test_html_cleaning_workflow(self):
	"""Test the complete HTML cleaning workflow."""
	raw_html = """<!DOCTYPE html>
	<html>

	<head>
	<title>LinkedIn Profile</title>

	</head>

	<body>
	<div class="profile">
	<h1>John Doe</h1>

	<p>Software Engineer</p>
	</div>

	</body>

	</html>"""

	cleaned = _clean_html_content(raw_html)

	# Verify no empty lines
	lines = cleaned.split('\n')
	for line in lines:
	self.assertTrue(line.strip(), f"Found empty line: '{line}'")

	# Verify content is preserved
	self.assertIn("John Doe", cleaned)
	self.assertIn("Software Engineer", cleaned)
	self.assertIn("LinkedIn Profile", cleaned)

	def test_minimal_html_cleaning(self):
	"""Test cleaning of minimal HTML content."""
	minimal_html = "<html><body>Content</body></html>"
	result = _clean_html_content(minimal_html)
	self.assertEqual(result, minimal_html)

	def test_complex_whitespace_patterns(self):
	"""Test cleaning of complex whitespace patterns."""
	complex_html = """<div>
	\t\t
	<span>Text</span>
	\t

	<p>Paragraph</p>
	\t
	</div>"""

	result = _clean_html_content(complex_html)
	lines = result.split('\n')

	# Should have no empty lines
	for line in lines:
	self.assertTrue(line.strip())

	# Should preserve content
	self.assertIn("Text", result)
	self.assertIn("Paragraph", result)


	if __name__ == '__main__':
	unittest.main()