Spaces:
Sleeping
Sleeping
| from crawl4ai.llmtxt import AsyncLLMTextManager # Changed to AsyncLLMTextManager | |
| from crawl4ai.async_logger import AsyncLogger | |
| from pathlib import Path | |
| import asyncio | |
| async def main(): | |
| current_file = Path(__file__).resolve() | |
| # base_dir = current_file.parent.parent / "local/_docs/llm.txt/test_docs" | |
| base_dir = current_file.parent.parent / "local/_docs/llm.txt" | |
| docs_dir = base_dir | |
| # Create directory if it doesn't exist | |
| docs_dir.mkdir(parents=True, exist_ok=True) | |
| # Initialize logger | |
| logger = AsyncLogger() | |
| # Updated initialization with default batching params | |
| # manager = AsyncLLMTextManager(docs_dir, logger, max_concurrent_calls=3, batch_size=2) | |
| manager = AsyncLLMTextManager(docs_dir, logger, batch_size=2) | |
| # Let's first check what files we have | |
| print("\nAvailable files:") | |
| for f in docs_dir.glob("*.md"): | |
| print(f"- {f.name}") | |
| # Generate index files | |
| print("\nGenerating index files...") | |
| await manager.generate_index_files( | |
| force_generate_facts=False, | |
| clear_bm25_cache=False | |
| ) | |
| # Test some relevant queries about Crawl4AI | |
| test_queries = [ | |
| "How is using the `arun_many` method?", | |
| ] | |
| print("\nTesting search functionality:") | |
| for query in test_queries: | |
| print(f"\nQuery: {query}") | |
| results = manager.search(query, top_k=2) | |
| print(f"Results length: {len(results)} characters") | |
| if results: | |
| print("First 200 chars of results:", results[:200].replace('\n', ' '), "...") | |
| else: | |
| print("No results found") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |