Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files

xet

Community

gperdrizet commited on Jul 8

Commit

d5a003e

verified ·

1 Parent(s): 7dcc57a

Added unittests for context acquisition functions.

Browse files

Files changed (1) hide show

tests/test_context_acquisition.py +252 -0

tests/test_context_acquisition.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+Unit tests for the context_acquisition module.
+"""
+import unittest
+import os
+import tempfile
+import shutil
+from selenium.webdriver.chrome.options import Options
+import functions.context_acquisition
+# Import the functions to test
+from functions.context_acquisition import (
+    _clean_html_content,
+    _save_html_to_file,
+    setup_chrome_driver_options
+)
+class TestCleanHTMLContent(unittest.TestCase):
+    """Test cases for the _clean_html_content function."""
+    def test_remove_blank_lines(self):
+        """Test removal of blank lines from HTML content."""
+        html_with_blanks = """<html>
+<head>
+    <title>Test</title>
+</head>
+<body>
+    <div>Content</div>
+</body>
+</html>"""
+        expected = """<html>
+<head>
+    <title>Test</title>
+</head>
+<body>
+    <div>Content</div>
+</body>
+</html>"""
+        result = _clean_html_content(html_with_blanks)
+        self.assertEqual(result, expected)
+    def test_strip_trailing_whitespace(self):
+        """Test removal of trailing whitespace from lines."""
+        html_with_trailing = "<div>Content</div>   \n<p>Text</p>\t\n"
+        expected = "<div>Content</div>\n<p>Text</p>"
+        result = _clean_html_content(html_with_trailing)
+        self.assertEqual(result, expected)
+    def test_empty_content(self):
+        """Test handling of empty or whitespace-only content."""
+        self.assertEqual(_clean_html_content(""), "")
+        self.assertEqual(_clean_html_content("   \n\n\t  "), "")
+        self.assertEqual(_clean_html_content("\n"), "")
+    def test_single_line_content(self):
+        """Test cleaning of single line content."""
+        single_line = "<html><body>Content</body></html>"
+        result = _clean_html_content(single_line)
+        self.assertEqual(result, single_line)
+    def test_mixed_whitespace(self):
+        """Test handling of mixed whitespace characters."""
+        mixed = "<div>\t\n  \n\r\n<p>Text</p>\n   \n</div>"
+        expected = "<div>\n<p>Text</p>\n</div>"
+        result = _clean_html_content(mixed)
+        self.assertEqual(result, expected)
+class TestSaveHTMLToFile(unittest.TestCase):
+    """Test cases for the _save_html_to_file function."""
+    def setUp(self):
+        """Set up test fixtures with temporary directory."""
+        self.test_dir = tempfile.mkdtemp()
+        self.test_html = "<html><body>Test content</body></html>"
+        self.test_url = "https://www.linkedin.com/in/johndoe"
+    def tearDown(self):
+        """Clean up temporary directory."""
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+    def test_successful_file_save(self):
+        """Test successful saving of HTML content to file."""
+        # Temporarily change the file path calculation
+        original_dirname = os.path.dirname
+        def mock_dirname(path):
+            if path.endswith('context_acquisition.py'):
+                return self.test_dir
+            return original_dirname(path)
+        # Replace os.path.dirname temporarily
+        original_func = functions.context_acquisition.os.path.dirname
+        functions.context_acquisition.os.path.dirname = mock_dirname
+        try:
+            result = _save_html_to_file(self.test_html, self.test_url)
+            # Verify file was created
+            self.assertTrue(os.path.exists(result))
+            self.assertTrue(result.endswith('.html'))
+            # Verify file content
+            with open(result, 'r', encoding='utf-8') as f:
+                content = f.read()
+                self.assertEqual(content, self.test_html)
+        finally:
+            # Restore original function
+            functions.context_acquisition.os.path.dirname = original_func
+class TestSetupChromeDriverOptions(unittest.TestCase):
+    """Test cases for the setup_chrome_driver_options function."""
+    def test_chrome_options_configuration(self):
+        """Test that Chrome options are properly configured."""
+        options = setup_chrome_driver_options()
+        # Verify that options object is returned
+        self.assertIsNotNone(options)
+        # Verify it's the correct type
+        self.assertIsInstance(options, Options)
+    def test_chrome_options_arguments(self):
+        """Test that required Chrome arguments are set."""
+        options = setup_chrome_driver_options()
+        # Access the arguments (this is implementation dependent)
+        # Note: This test verifies the function runs without error
+        # Specific argument verification would require accessing private attributes
+        self.assertIsNotNone(options)
+class TestURLValidation(unittest.TestCase):
+    """Test cases for URL validation logic (extracted from main function)."""
+    def test_valid_linkedin_urls(self):
+        """Test validation of valid LinkedIn URLs."""
+        valid_urls = [
+            "https://www.linkedin.com/in/johndoe",
+            "https://linkedin.com/in/jane-smith",
+            "http://www.linkedin.com/in/test123",
+            "https://www.linkedin.com/in/user-name-with-dashes",
+        ]
+        for url in valid_urls:
+            # Test the validation logic directly
+            self.assertTrue(isinstance(url, str))
+            self.assertTrue(url.strip())
+            self.assertIn("linkedin.com/in/", url)
+    def test_invalid_linkedin_urls(self):
+        """Test validation of invalid LinkedIn URLs."""
+        invalid_urls = [
+            "",
+            None,
+            "https://www.example.com/profile",
+            "https://www.linkedin.com/company/test",
+            "https://github.com/user",
+            "not-a-url",
+        ]
+        for url in invalid_urls:
+            # Test the validation logic directly
+            if url is None or not isinstance(url, str):
+                self.assertTrue(url is None or not isinstance(url, str))
+            elif not url.strip():
+                self.assertFalse(url.strip())
+            else:
+                self.assertNotIn("linkedin.com/in/", url)
+class TestHTMLContentProcessing(unittest.TestCase):
+    """Test cases for HTML content processing workflows."""
+    def test_html_cleaning_workflow(self):
+        """Test the complete HTML cleaning workflow."""
+        raw_html = """<!DOCTYPE html>
+<html>
+<head>
+    <title>LinkedIn Profile</title>
+</head>
+<body>
+    <div class="profile">
+        <h1>John Doe</h1>
+        <p>Software Engineer</p>
+    </div>
+</body>
+</html>"""
+        cleaned = _clean_html_content(raw_html)
+        # Verify no empty lines
+        lines = cleaned.split('\n')
+        for line in lines:
+            self.assertTrue(line.strip(), f"Found empty line: '{line}'")
+        # Verify content is preserved
+        self.assertIn("John Doe", cleaned)
+        self.assertIn("Software Engineer", cleaned)
+        self.assertIn("LinkedIn Profile", cleaned)
+    def test_minimal_html_cleaning(self):
+        """Test cleaning of minimal HTML content."""
+        minimal_html = "<html><body>Content</body></html>"
+        result = _clean_html_content(minimal_html)
+        self.assertEqual(result, minimal_html)
+    def test_complex_whitespace_patterns(self):
+        """Test cleaning of complex whitespace patterns."""
+        complex_html = """<div>
+\t\t
+    <span>Text</span>
+\t
+<p>Paragraph</p>
+   \t
+</div>"""
+        result = _clean_html_content(complex_html)
+        lines = result.split('\n')
+        # Should have no empty lines
+        for line in lines:
+            self.assertTrue(line.strip())
+        # Should preserve content
+        self.assertIn("Text", result)
+        self.assertIn("Paragraph", result)
+if __name__ == '__main__':
+    unittest.main()