""" Unit tests for the context_acquisition module. """ import unittest import os import tempfile import shutil from selenium.webdriver.chrome.options import Options import functions.context_acquisition # Import the functions to test from functions.context_acquisition import ( _clean_html_content, _save_html_to_file, setup_chrome_driver_options ) class TestCleanHTMLContent(unittest.TestCase): """Test cases for the _clean_html_content function.""" def test_remove_blank_lines(self): """Test removal of blank lines from HTML content.""" html_with_blanks = """ Test
Content
""" expected = """ Test
Content
""" result = _clean_html_content(html_with_blanks) self.assertEqual(result, expected) def test_strip_trailing_whitespace(self): """Test removal of trailing whitespace from lines.""" html_with_trailing = "
Content
\n

Text

\t\n" expected = "
Content
\n

Text

" result = _clean_html_content(html_with_trailing) self.assertEqual(result, expected) def test_empty_content(self): """Test handling of empty or whitespace-only content.""" self.assertEqual(_clean_html_content(""), "") self.assertEqual(_clean_html_content(" \n\n\t "), "") self.assertEqual(_clean_html_content("\n"), "") def test_single_line_content(self): """Test cleaning of single line content.""" single_line = "Content" result = _clean_html_content(single_line) self.assertEqual(result, single_line) def test_mixed_whitespace(self): """Test handling of mixed whitespace characters.""" mixed = "
\t\n \n\r\n

Text

\n \n
" expected = "
\n

Text

\n
" result = _clean_html_content(mixed) self.assertEqual(result, expected) class TestSaveHTMLToFile(unittest.TestCase): """Test cases for the _save_html_to_file function.""" def setUp(self): """Set up test fixtures with temporary directory.""" self.test_dir = tempfile.mkdtemp() self.test_html = "Test content" self.test_url = "https://www.linkedin.com/in/johndoe" def tearDown(self): """Clean up temporary directory.""" if os.path.exists(self.test_dir): shutil.rmtree(self.test_dir) def test_successful_file_save(self): """Test successful saving of HTML content to file.""" # Temporarily change the file path calculation original_dirname = os.path.dirname def mock_dirname(path): if path.endswith('context_acquisition.py'): return self.test_dir return original_dirname(path) # Replace os.path.dirname temporarily original_func = functions.context_acquisition.os.path.dirname functions.context_acquisition.os.path.dirname = mock_dirname try: result = _save_html_to_file(self.test_html, self.test_url) # Verify file was created self.assertTrue(os.path.exists(result)) self.assertTrue(result.endswith('.html')) # Verify file content with open(result, 'r', encoding='utf-8') as f: content = f.read() self.assertEqual(content, self.test_html) finally: # Restore original function functions.context_acquisition.os.path.dirname = original_func class TestSetupChromeDriverOptions(unittest.TestCase): """Test cases for the setup_chrome_driver_options function.""" def test_chrome_options_configuration(self): """Test that Chrome options are properly configured.""" options = setup_chrome_driver_options() # Verify that options object is returned self.assertIsNotNone(options) # Verify it's the correct type self.assertIsInstance(options, Options) def test_chrome_options_arguments(self): """Test that required Chrome arguments are set.""" options = setup_chrome_driver_options() # Access the arguments (this is implementation dependent) # Note: This test verifies the function runs without error # Specific argument verification would require accessing private attributes self.assertIsNotNone(options) class TestURLValidation(unittest.TestCase): """Test cases for URL validation logic (extracted from main function).""" def test_valid_linkedin_urls(self): """Test validation of valid LinkedIn URLs.""" valid_urls = [ "https://www.linkedin.com/in/johndoe", "https://linkedin.com/in/jane-smith", "http://www.linkedin.com/in/test123", "https://www.linkedin.com/in/user-name-with-dashes", ] for url in valid_urls: # Test the validation logic directly self.assertTrue(isinstance(url, str)) self.assertTrue(url.strip()) self.assertIn("linkedin.com/in/", url) def test_invalid_linkedin_urls(self): """Test validation of invalid LinkedIn URLs.""" invalid_urls = [ "", None, "https://www.example.com/profile", "https://www.linkedin.com/company/test", "https://github.com/user", "not-a-url", ] for url in invalid_urls: # Test the validation logic directly if url is None or not isinstance(url, str): self.assertTrue(url is None or not isinstance(url, str)) elif not url.strip(): self.assertFalse(url.strip()) else: self.assertNotIn("linkedin.com/in/", url) class TestHTMLContentProcessing(unittest.TestCase): """Test cases for HTML content processing workflows.""" def test_html_cleaning_workflow(self): """Test the complete HTML cleaning workflow.""" raw_html = """ LinkedIn Profile

John Doe

Software Engineer

""" cleaned = _clean_html_content(raw_html) # Verify no empty lines lines = cleaned.split('\n') for line in lines: self.assertTrue(line.strip(), f"Found empty line: '{line}'") # Verify content is preserved self.assertIn("John Doe", cleaned) self.assertIn("Software Engineer", cleaned) self.assertIn("LinkedIn Profile", cleaned) def test_minimal_html_cleaning(self): """Test cleaning of minimal HTML content.""" minimal_html = "Content" result = _clean_html_content(minimal_html) self.assertEqual(result, minimal_html) def test_complex_whitespace_patterns(self): """Test cleaning of complex whitespace patterns.""" complex_html = """
\t\t Text \t

Paragraph

\t
""" result = _clean_html_content(complex_html) lines = result.split('\n') # Should have no empty lines for line in lines: self.assertTrue(line.strip()) # Should preserve content self.assertIn("Text", result) self.assertIn("Paragraph", result) if __name__ == '__main__': unittest.main()