Spaces:
Runtime error
Runtime error
amaye15
commited on
Commit
·
a976cb6
1
Parent(s):
8a2df9f
Debug - Recurision Error
Browse files
main.py
CHANGED
|
@@ -474,18 +474,71 @@ async def crawl_sync(request: CrawlRequest) -> Dict[str, Any]:
|
|
| 474 |
raise HTTPException(status_code=408, detail="Task timed out")
|
| 475 |
|
| 476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
@app.post(
|
| 478 |
"/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
|
| 479 |
)
|
| 480 |
async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
|
|
|
| 481 |
try:
|
|
|
|
| 482 |
crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
|
|
|
|
|
|
|
|
|
|
| 483 |
extraction_strategy = crawler_service._create_extraction_strategy(
|
| 484 |
request.extraction_config
|
| 485 |
)
|
|
|
|
| 486 |
|
| 487 |
try:
|
| 488 |
if isinstance(request.urls, list):
|
|
|
|
| 489 |
results = await crawler.arun_many(
|
| 490 |
urls=[str(url) for url in request.urls],
|
| 491 |
extraction_strategy=extraction_strategy,
|
|
@@ -498,8 +551,10 @@ async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
|
| 498 |
session_id=request.session_id,
|
| 499 |
**request.extra,
|
| 500 |
)
|
|
|
|
| 501 |
return {"results": [result.dict() for result in results]}
|
| 502 |
else:
|
|
|
|
| 503 |
result = await crawler.arun(
|
| 504 |
url=str(request.urls),
|
| 505 |
extraction_strategy=extraction_strategy,
|
|
@@ -512,9 +567,12 @@ async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
|
| 512 |
session_id=request.session_id,
|
| 513 |
**request.extra,
|
| 514 |
)
|
|
|
|
| 515 |
return {"result": result.dict()}
|
| 516 |
finally:
|
|
|
|
| 517 |
await crawler_service.crawler_pool.release(crawler)
|
|
|
|
| 518 |
except Exception as e:
|
| 519 |
logger.error(f"Error in direct crawl: {str(e)}")
|
| 520 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 474 |
raise HTTPException(status_code=408, detail="Task timed out")
|
| 475 |
|
| 476 |
|
| 477 |
+
# @app.post(
|
| 478 |
+
# "/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
|
| 479 |
+
# )
|
| 480 |
+
# async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
| 481 |
+
# try:
|
| 482 |
+
# crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
|
| 483 |
+
# extraction_strategy = crawler_service._create_extraction_strategy(
|
| 484 |
+
# request.extraction_config
|
| 485 |
+
# )
|
| 486 |
+
|
| 487 |
+
# try:
|
| 488 |
+
# if isinstance(request.urls, list):
|
| 489 |
+
# results = await crawler.arun_many(
|
| 490 |
+
# urls=[str(url) for url in request.urls],
|
| 491 |
+
# extraction_strategy=extraction_strategy,
|
| 492 |
+
# js_code=request.js_code,
|
| 493 |
+
# wait_for=request.wait_for,
|
| 494 |
+
# css_selector=request.css_selector,
|
| 495 |
+
# screenshot=request.screenshot,
|
| 496 |
+
# magic=request.magic,
|
| 497 |
+
# cache_mode=request.cache_mode,
|
| 498 |
+
# session_id=request.session_id,
|
| 499 |
+
# **request.extra,
|
| 500 |
+
# )
|
| 501 |
+
# return {"results": [result.dict() for result in results]}
|
| 502 |
+
# else:
|
| 503 |
+
# result = await crawler.arun(
|
| 504 |
+
# url=str(request.urls),
|
| 505 |
+
# extraction_strategy=extraction_strategy,
|
| 506 |
+
# js_code=request.js_code,
|
| 507 |
+
# wait_for=request.wait_for,
|
| 508 |
+
# css_selector=request.css_selector,
|
| 509 |
+
# screenshot=request.screenshot,
|
| 510 |
+
# magic=request.magic,
|
| 511 |
+
# cache_mode=request.cache_mode,
|
| 512 |
+
# session_id=request.session_id,
|
| 513 |
+
# **request.extra,
|
| 514 |
+
# )
|
| 515 |
+
# return {"result": result.dict()}
|
| 516 |
+
# finally:
|
| 517 |
+
# await crawler_service.crawler_pool.release(crawler)
|
| 518 |
+
# except Exception as e:
|
| 519 |
+
# logger.error(f"Error in direct crawl: {str(e)}")
|
| 520 |
+
# raise HTTPException(status_code=500, detail=str(e))
|
| 521 |
+
|
| 522 |
+
|
| 523 |
@app.post(
|
| 524 |
"/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
|
| 525 |
)
|
| 526 |
async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
| 527 |
+
logger.info("Received request to crawl directly.")
|
| 528 |
try:
|
| 529 |
+
logger.debug("Acquiring crawler from the crawler pool.")
|
| 530 |
crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
|
| 531 |
+
logger.debug("Crawler acquired successfully.")
|
| 532 |
+
|
| 533 |
+
logger.debug("Creating extraction strategy based on the request configuration.")
|
| 534 |
extraction_strategy = crawler_service._create_extraction_strategy(
|
| 535 |
request.extraction_config
|
| 536 |
)
|
| 537 |
+
logger.debug("Extraction strategy created successfully.")
|
| 538 |
|
| 539 |
try:
|
| 540 |
if isinstance(request.urls, list):
|
| 541 |
+
logger.info("Processing multiple URLs.")
|
| 542 |
results = await crawler.arun_many(
|
| 543 |
urls=[str(url) for url in request.urls],
|
| 544 |
extraction_strategy=extraction_strategy,
|
|
|
|
| 551 |
session_id=request.session_id,
|
| 552 |
**request.extra,
|
| 553 |
)
|
| 554 |
+
logger.info("Crawling completed for multiple URLs.")
|
| 555 |
return {"results": [result.dict() for result in results]}
|
| 556 |
else:
|
| 557 |
+
logger.info("Processing a single URL.")
|
| 558 |
result = await crawler.arun(
|
| 559 |
url=str(request.urls),
|
| 560 |
extraction_strategy=extraction_strategy,
|
|
|
|
| 567 |
session_id=request.session_id,
|
| 568 |
**request.extra,
|
| 569 |
)
|
| 570 |
+
logger.info("Crawling completed for a single URL.")
|
| 571 |
return {"result": result.dict()}
|
| 572 |
finally:
|
| 573 |
+
logger.debug("Releasing crawler back to the pool.")
|
| 574 |
await crawler_service.crawler_pool.release(crawler)
|
| 575 |
+
logger.debug("Crawler released successfully.")
|
| 576 |
except Exception as e:
|
| 577 |
logger.error(f"Error in direct crawl: {str(e)}")
|
| 578 |
raise HTTPException(status_code=500, detail=str(e))
|