gperdrizet commited on
Commit
d5a003e
·
verified ·
1 Parent(s): 7dcc57a

Added unittests for context acquisition functions.

Browse files
Files changed (1) hide show
  1. tests/test_context_acquisition.py +252 -0
tests/test_context_acquisition.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit tests for the context_acquisition module.
3
+ """
4
+
5
+ import unittest
6
+ import os
7
+ import tempfile
8
+ import shutil
9
+ from selenium.webdriver.chrome.options import Options
10
+
11
+ import functions.context_acquisition
12
+
13
+ # Import the functions to test
14
+ from functions.context_acquisition import (
15
+ _clean_html_content,
16
+ _save_html_to_file,
17
+ setup_chrome_driver_options
18
+ )
19
+
20
+
21
+ class TestCleanHTMLContent(unittest.TestCase):
22
+ """Test cases for the _clean_html_content function."""
23
+
24
+ def test_remove_blank_lines(self):
25
+ """Test removal of blank lines from HTML content."""
26
+ html_with_blanks = """<html>
27
+
28
+ <head>
29
+ <title>Test</title>
30
+
31
+ </head>
32
+
33
+ <body>
34
+ <div>Content</div>
35
+
36
+ </body>
37
+ </html>"""
38
+
39
+ expected = """<html>
40
+ <head>
41
+ <title>Test</title>
42
+ </head>
43
+ <body>
44
+ <div>Content</div>
45
+ </body>
46
+ </html>"""
47
+
48
+ result = _clean_html_content(html_with_blanks)
49
+ self.assertEqual(result, expected)
50
+
51
+ def test_strip_trailing_whitespace(self):
52
+ """Test removal of trailing whitespace from lines."""
53
+ html_with_trailing = "<div>Content</div> \n<p>Text</p>\t\n"
54
+ expected = "<div>Content</div>\n<p>Text</p>"
55
+
56
+ result = _clean_html_content(html_with_trailing)
57
+ self.assertEqual(result, expected)
58
+
59
+ def test_empty_content(self):
60
+ """Test handling of empty or whitespace-only content."""
61
+ self.assertEqual(_clean_html_content(""), "")
62
+ self.assertEqual(_clean_html_content(" \n\n\t "), "")
63
+ self.assertEqual(_clean_html_content("\n"), "")
64
+
65
+ def test_single_line_content(self):
66
+ """Test cleaning of single line content."""
67
+ single_line = "<html><body>Content</body></html>"
68
+ result = _clean_html_content(single_line)
69
+ self.assertEqual(result, single_line)
70
+
71
+ def test_mixed_whitespace(self):
72
+ """Test handling of mixed whitespace characters."""
73
+ mixed = "<div>\t\n \n\r\n<p>Text</p>\n \n</div>"
74
+ expected = "<div>\n<p>Text</p>\n</div>"
75
+ result = _clean_html_content(mixed)
76
+ self.assertEqual(result, expected)
77
+
78
+
79
+ class TestSaveHTMLToFile(unittest.TestCase):
80
+ """Test cases for the _save_html_to_file function."""
81
+
82
+ def setUp(self):
83
+ """Set up test fixtures with temporary directory."""
84
+ self.test_dir = tempfile.mkdtemp()
85
+ self.test_html = "<html><body>Test content</body></html>"
86
+ self.test_url = "https://www.linkedin.com/in/johndoe"
87
+
88
+ def tearDown(self):
89
+ """Clean up temporary directory."""
90
+ if os.path.exists(self.test_dir):
91
+ shutil.rmtree(self.test_dir)
92
+
93
+ def test_successful_file_save(self):
94
+ """Test successful saving of HTML content to file."""
95
+ # Temporarily change the file path calculation
96
+ original_dirname = os.path.dirname
97
+
98
+ def mock_dirname(path):
99
+ if path.endswith('context_acquisition.py'):
100
+ return self.test_dir
101
+ return original_dirname(path)
102
+
103
+ # Replace os.path.dirname temporarily
104
+ original_func = functions.context_acquisition.os.path.dirname
105
+ functions.context_acquisition.os.path.dirname = mock_dirname
106
+
107
+ try:
108
+ result = _save_html_to_file(self.test_html, self.test_url)
109
+
110
+ # Verify file was created
111
+ self.assertTrue(os.path.exists(result))
112
+ self.assertTrue(result.endswith('.html'))
113
+
114
+ # Verify file content
115
+ with open(result, 'r', encoding='utf-8') as f:
116
+ content = f.read()
117
+ self.assertEqual(content, self.test_html)
118
+
119
+ finally:
120
+ # Restore original function
121
+ functions.context_acquisition.os.path.dirname = original_func
122
+
123
+
124
+ class TestSetupChromeDriverOptions(unittest.TestCase):
125
+ """Test cases for the setup_chrome_driver_options function."""
126
+
127
+ def test_chrome_options_configuration(self):
128
+ """Test that Chrome options are properly configured."""
129
+ options = setup_chrome_driver_options()
130
+
131
+ # Verify that options object is returned
132
+ self.assertIsNotNone(options)
133
+
134
+ # Verify it's the correct type
135
+ self.assertIsInstance(options, Options)
136
+
137
+ def test_chrome_options_arguments(self):
138
+ """Test that required Chrome arguments are set."""
139
+ options = setup_chrome_driver_options()
140
+
141
+ # Access the arguments (this is implementation dependent)
142
+ # Note: This test verifies the function runs without error
143
+ # Specific argument verification would require accessing private attributes
144
+ self.assertIsNotNone(options)
145
+
146
+
147
+ class TestURLValidation(unittest.TestCase):
148
+ """Test cases for URL validation logic (extracted from main function)."""
149
+
150
+ def test_valid_linkedin_urls(self):
151
+ """Test validation of valid LinkedIn URLs."""
152
+ valid_urls = [
153
+ "https://www.linkedin.com/in/johndoe",
154
+ "https://linkedin.com/in/jane-smith",
155
+ "http://www.linkedin.com/in/test123",
156
+ "https://www.linkedin.com/in/user-name-with-dashes",
157
+ ]
158
+
159
+ for url in valid_urls:
160
+ # Test the validation logic directly
161
+ self.assertTrue(isinstance(url, str))
162
+ self.assertTrue(url.strip())
163
+ self.assertIn("linkedin.com/in/", url)
164
+
165
+ def test_invalid_linkedin_urls(self):
166
+ """Test validation of invalid LinkedIn URLs."""
167
+ invalid_urls = [
168
+ "",
169
+ None,
170
+ "https://www.example.com/profile",
171
+ "https://www.linkedin.com/company/test",
172
+ "https://github.com/user",
173
+ "not-a-url",
174
+ ]
175
+
176
+ for url in invalid_urls:
177
+ # Test the validation logic directly
178
+ if url is None or not isinstance(url, str):
179
+ self.assertTrue(url is None or not isinstance(url, str))
180
+ elif not url.strip():
181
+ self.assertFalse(url.strip())
182
+ else:
183
+ self.assertNotIn("linkedin.com/in/", url)
184
+
185
+
186
+ class TestHTMLContentProcessing(unittest.TestCase):
187
+ """Test cases for HTML content processing workflows."""
188
+
189
+ def test_html_cleaning_workflow(self):
190
+ """Test the complete HTML cleaning workflow."""
191
+ raw_html = """<!DOCTYPE html>
192
+ <html>
193
+
194
+ <head>
195
+ <title>LinkedIn Profile</title>
196
+
197
+ </head>
198
+
199
+ <body>
200
+ <div class="profile">
201
+ <h1>John Doe</h1>
202
+
203
+ <p>Software Engineer</p>
204
+ </div>
205
+
206
+ </body>
207
+
208
+ </html>"""
209
+
210
+ cleaned = _clean_html_content(raw_html)
211
+
212
+ # Verify no empty lines
213
+ lines = cleaned.split('\n')
214
+ for line in lines:
215
+ self.assertTrue(line.strip(), f"Found empty line: '{line}'")
216
+
217
+ # Verify content is preserved
218
+ self.assertIn("John Doe", cleaned)
219
+ self.assertIn("Software Engineer", cleaned)
220
+ self.assertIn("LinkedIn Profile", cleaned)
221
+
222
+ def test_minimal_html_cleaning(self):
223
+ """Test cleaning of minimal HTML content."""
224
+ minimal_html = "<html><body>Content</body></html>"
225
+ result = _clean_html_content(minimal_html)
226
+ self.assertEqual(result, minimal_html)
227
+
228
+ def test_complex_whitespace_patterns(self):
229
+ """Test cleaning of complex whitespace patterns."""
230
+ complex_html = """<div>
231
+ \t\t
232
+ <span>Text</span>
233
+ \t
234
+
235
+ <p>Paragraph</p>
236
+ \t
237
+ </div>"""
238
+
239
+ result = _clean_html_content(complex_html)
240
+ lines = result.split('\n')
241
+
242
+ # Should have no empty lines
243
+ for line in lines:
244
+ self.assertTrue(line.strip())
245
+
246
+ # Should preserve content
247
+ self.assertIn("Text", result)
248
+ self.assertIn("Paragraph", result)
249
+
250
+
251
+ if __name__ == '__main__':
252
+ unittest.main()