Spaces:
Sleeping
Sleeping
import requests | |
import json | |
import time | |
from typing import List, Dict, Any, Optional | |
from utils import SafeProgress | |
def call_chicory_parser(product_names: List[str], batch_size: int = 25, delay_seconds: float = 0.1, progress=None) -> Dict[str, Any]: | |
""" | |
Call the Chicory Parser V3 API to get ingredient predictions | |
Args: | |
product_names: List of product names to parse | |
batch_size: Maximum number of products to process in one batch | |
delay_seconds: Delay between batches in seconds | |
progress: Optional progress tracking object (Gradio progress bar) | |
Returns: | |
Dictionary mapping product names to their Chicory Parser results | |
""" | |
progress_tracker = SafeProgress(progress, desc="Parsing products") | |
# Check if batching is needed | |
if len(product_names) <= batch_size: | |
progress_tracker(0.1, desc=f"Parsing {len(product_names)} products...") | |
result = _make_chicory_api_call(product_names) | |
progress_tracker(1.0, desc="Parsing complete") | |
return result | |
# Process in batches | |
all_results = {} | |
total_batches = (len(product_names) + batch_size - 1) // batch_size | |
# Create batch index ranges | |
batch_ranges = [(i, min(i + batch_size, len(product_names))) | |
for i in range(0, len(product_names), batch_size)] | |
# Process each batch with tqdm progress | |
for i, (start, end) in enumerate(progress_tracker.tqdm(batch_ranges, desc="Processing batches")): | |
batch = product_names[start:end] | |
batch_number = i + 1 | |
# Update with more specific progress info | |
batch_desc = f"Batch {batch_number}/{total_batches}: {len(batch)} products" | |
progress_tracker((i + 0.5) / total_batches, desc=batch_desc) | |
batch_results = _make_chicory_api_call(batch) | |
all_results.update(batch_results) | |
# Add delay before processing the next batch (but not after the last batch) | |
if end < len(product_names): | |
time.sleep(delay_seconds) | |
progress_tracker(1.0, desc=f"Completed parsing {len(product_names)} products") | |
return all_results | |
def _make_chicory_api_call(product_names: List[str]) -> Dict[str, Any]: | |
""" | |
Makes the actual API call to Chicory Parser | |
""" | |
url = "https://prod-parserv3.chicoryapp.com/api/v3/prediction" | |
# Prepare the payload | |
items = [{"id": i, "text": name} for i, name in enumerate(product_names)] | |
payload = json.dumps({"items": items}) | |
# Set headers | |
headers = { | |
'Content-Type': 'application/json' | |
} | |
try: | |
response = requests.post(url, headers=headers, data=payload) | |
response.raise_for_status() # Raise exception for HTTP errors | |
# Parse the response | |
results = response.json() | |
# Create a dictionary mapping product names to results | |
product_results = {} | |
for result in results: | |
product_name = result["input_text"] | |
product_results[product_name] = result | |
return product_results | |
except requests.exceptions.RequestException as e: | |
print(f"Error calling Chicory Parser API: {e}") | |
return {} | |
except json.JSONDecodeError: | |
print(f"Error parsing Chicory API response: {response.text}") | |
return {} | |