Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import pytest | |
| import asyncio | |
| import json | |
| # Add the parent directory to the Python path | |
| parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| sys.path.append(parent_dir) | |
| from crawl4ai.async_webcrawler import AsyncWebCrawler | |
| async def test_cache_url(): | |
| async with AsyncWebCrawler(verbose=True) as crawler: | |
| url = "https://www.example.com" | |
| # First run to cache the URL | |
| result1 = await crawler.arun(url=url, bypass_cache=True) | |
| assert result1.success | |
| # Second run to retrieve from cache | |
| result2 = await crawler.arun(url=url, bypass_cache=False) | |
| assert result2.success | |
| assert result2.html == result1.html | |
| async def test_bypass_cache(): | |
| async with AsyncWebCrawler(verbose=True) as crawler: | |
| url = "https://www.python.org" | |
| # First run to cache the URL | |
| result1 = await crawler.arun(url=url, bypass_cache=True) | |
| assert result1.success | |
| # Second run bypassing cache | |
| result2 = await crawler.arun(url=url, bypass_cache=True) | |
| assert result2.success | |
| assert result2.html != result1.html # Content might be different due to dynamic nature of websites | |
| async def test_cache_size(): | |
| async with AsyncWebCrawler(verbose=True) as crawler: | |
| initial_size = await crawler.aget_cache_size() | |
| url = "https://www.nbcnews.com/business" | |
| await crawler.arun(url=url, bypass_cache=True) | |
| new_size = await crawler.aget_cache_size() | |
| assert new_size == initial_size + 1 | |
| async def test_clear_cache(): | |
| async with AsyncWebCrawler(verbose=True) as crawler: | |
| url = "https://www.example.org" | |
| await crawler.arun(url=url, bypass_cache=True) | |
| initial_size = await crawler.aget_cache_size() | |
| assert initial_size > 0 | |
| await crawler.aclear_cache() | |
| new_size = await crawler.aget_cache_size() | |
| assert new_size == 0 | |
| async def test_flush_cache(): | |
| async with AsyncWebCrawler(verbose=True) as crawler: | |
| url = "https://www.example.net" | |
| await crawler.arun(url=url, bypass_cache=True) | |
| initial_size = await crawler.aget_cache_size() | |
| assert initial_size > 0 | |
| await crawler.aflush_cache() | |
| new_size = await crawler.aget_cache_size() | |
| assert new_size == 0 | |
| # Try to retrieve the previously cached URL | |
| result = await crawler.arun(url=url, bypass_cache=False) | |
| assert result.success # The crawler should still succeed, but it will fetch the content anew | |
| # Entry point for debugging | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-v"]) |