resumate / tests /test_context_acquisition.py
gperdrizet's picture
Added unittests for context acquisition functions.
d5a003e verified
raw
history blame
7.64 kB
"""
Unit tests for the context_acquisition module.
"""
import unittest
import os
import tempfile
import shutil
from selenium.webdriver.chrome.options import Options
import functions.context_acquisition
# Import the functions to test
from functions.context_acquisition import (
_clean_html_content,
_save_html_to_file,
setup_chrome_driver_options
)
class TestCleanHTMLContent(unittest.TestCase):
"""Test cases for the _clean_html_content function."""
def test_remove_blank_lines(self):
"""Test removal of blank lines from HTML content."""
html_with_blanks = """<html>
<head>
<title>Test</title>
</head>
<body>
<div>Content</div>
</body>
</html>"""
expected = """<html>
<head>
<title>Test</title>
</head>
<body>
<div>Content</div>
</body>
</html>"""
result = _clean_html_content(html_with_blanks)
self.assertEqual(result, expected)
def test_strip_trailing_whitespace(self):
"""Test removal of trailing whitespace from lines."""
html_with_trailing = "<div>Content</div> \n<p>Text</p>\t\n"
expected = "<div>Content</div>\n<p>Text</p>"
result = _clean_html_content(html_with_trailing)
self.assertEqual(result, expected)
def test_empty_content(self):
"""Test handling of empty or whitespace-only content."""
self.assertEqual(_clean_html_content(""), "")
self.assertEqual(_clean_html_content(" \n\n\t "), "")
self.assertEqual(_clean_html_content("\n"), "")
def test_single_line_content(self):
"""Test cleaning of single line content."""
single_line = "<html><body>Content</body></html>"
result = _clean_html_content(single_line)
self.assertEqual(result, single_line)
def test_mixed_whitespace(self):
"""Test handling of mixed whitespace characters."""
mixed = "<div>\t\n \n\r\n<p>Text</p>\n \n</div>"
expected = "<div>\n<p>Text</p>\n</div>"
result = _clean_html_content(mixed)
self.assertEqual(result, expected)
class TestSaveHTMLToFile(unittest.TestCase):
"""Test cases for the _save_html_to_file function."""
def setUp(self):
"""Set up test fixtures with temporary directory."""
self.test_dir = tempfile.mkdtemp()
self.test_html = "<html><body>Test content</body></html>"
self.test_url = "https://www.linkedin.com/in/johndoe"
def tearDown(self):
"""Clean up temporary directory."""
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def test_successful_file_save(self):
"""Test successful saving of HTML content to file."""
# Temporarily change the file path calculation
original_dirname = os.path.dirname
def mock_dirname(path):
if path.endswith('context_acquisition.py'):
return self.test_dir
return original_dirname(path)
# Replace os.path.dirname temporarily
original_func = functions.context_acquisition.os.path.dirname
functions.context_acquisition.os.path.dirname = mock_dirname
try:
result = _save_html_to_file(self.test_html, self.test_url)
# Verify file was created
self.assertTrue(os.path.exists(result))
self.assertTrue(result.endswith('.html'))
# Verify file content
with open(result, 'r', encoding='utf-8') as f:
content = f.read()
self.assertEqual(content, self.test_html)
finally:
# Restore original function
functions.context_acquisition.os.path.dirname = original_func
class TestSetupChromeDriverOptions(unittest.TestCase):
"""Test cases for the setup_chrome_driver_options function."""
def test_chrome_options_configuration(self):
"""Test that Chrome options are properly configured."""
options = setup_chrome_driver_options()
# Verify that options object is returned
self.assertIsNotNone(options)
# Verify it's the correct type
self.assertIsInstance(options, Options)
def test_chrome_options_arguments(self):
"""Test that required Chrome arguments are set."""
options = setup_chrome_driver_options()
# Access the arguments (this is implementation dependent)
# Note: This test verifies the function runs without error
# Specific argument verification would require accessing private attributes
self.assertIsNotNone(options)
class TestURLValidation(unittest.TestCase):
"""Test cases for URL validation logic (extracted from main function)."""
def test_valid_linkedin_urls(self):
"""Test validation of valid LinkedIn URLs."""
valid_urls = [
"https://www.linkedin.com/in/johndoe",
"https://linkedin.com/in/jane-smith",
"http://www.linkedin.com/in/test123",
"https://www.linkedin.com/in/user-name-with-dashes",
]
for url in valid_urls:
# Test the validation logic directly
self.assertTrue(isinstance(url, str))
self.assertTrue(url.strip())
self.assertIn("linkedin.com/in/", url)
def test_invalid_linkedin_urls(self):
"""Test validation of invalid LinkedIn URLs."""
invalid_urls = [
"",
None,
"https://www.example.com/profile",
"https://www.linkedin.com/company/test",
"https://github.com/user",
"not-a-url",
]
for url in invalid_urls:
# Test the validation logic directly
if url is None or not isinstance(url, str):
self.assertTrue(url is None or not isinstance(url, str))
elif not url.strip():
self.assertFalse(url.strip())
else:
self.assertNotIn("linkedin.com/in/", url)
class TestHTMLContentProcessing(unittest.TestCase):
"""Test cases for HTML content processing workflows."""
def test_html_cleaning_workflow(self):
"""Test the complete HTML cleaning workflow."""
raw_html = """<!DOCTYPE html>
<html>
<head>
<title>LinkedIn Profile</title>
</head>
<body>
<div class="profile">
<h1>John Doe</h1>
<p>Software Engineer</p>
</div>
</body>
</html>"""
cleaned = _clean_html_content(raw_html)
# Verify no empty lines
lines = cleaned.split('\n')
for line in lines:
self.assertTrue(line.strip(), f"Found empty line: '{line}'")
# Verify content is preserved
self.assertIn("John Doe", cleaned)
self.assertIn("Software Engineer", cleaned)
self.assertIn("LinkedIn Profile", cleaned)
def test_minimal_html_cleaning(self):
"""Test cleaning of minimal HTML content."""
minimal_html = "<html><body>Content</body></html>"
result = _clean_html_content(minimal_html)
self.assertEqual(result, minimal_html)
def test_complex_whitespace_patterns(self):
"""Test cleaning of complex whitespace patterns."""
complex_html = """<div>
\t\t
<span>Text</span>
\t
<p>Paragraph</p>
\t
</div>"""
result = _clean_html_content(complex_html)
lines = result.split('\n')
# Should have no empty lines
for line in lines:
self.assertTrue(line.strip())
# Should preserve content
self.assertIn("Text", result)
self.assertIn("Paragraph", result)
if __name__ == '__main__':
unittest.main()