|
|
|
""" |
|
API-Based Clue Generator for Crossword Puzzles |
|
Uses Hugging Face Inference API to test multiple models without local downloads. |
|
""" |
|
|
|
import os |
|
import time |
|
import json |
|
import logging |
|
import requests |
|
from typing import List, Dict, Optional, Tuple |
|
from pathlib import Path |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class APIClueGenerator: |
|
""" |
|
API-based clue generator using Hugging Face Inference API. |
|
Tests multiple models without local downloads. |
|
""" |
|
|
|
def __init__(self, hf_token: Optional[str] = None): |
|
"""Initialize API clue generator. |
|
|
|
Args: |
|
hf_token: Hugging Face API token (optional but recommended for rate limits) |
|
""" |
|
self.hf_token = hf_token or os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_TOKEN') |
|
self.base_url = "https://router.huggingface.co/v1" |
|
|
|
|
|
self.models = { |
|
"deepseek-v3": "deepseek-ai/DeepSeek-V3-0324:fireworks-ai", |
|
"llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct:fireworks-ai", |
|
} |
|
|
|
|
|
self.headers = {} |
|
if self.hf_token: |
|
self.headers["Authorization"] = f"Bearer {self.hf_token}" |
|
|
|
|
|
self.prompts = { |
|
"instruction": """Generate a crossword clue for '{word}' (category: {topic}). |
|
|
|
Rules: |
|
- 2-6 words only |
|
- Don't use the word '{word}' in the clue |
|
- Be descriptive and accurate |
|
|
|
Examples: |
|
- CAT (animals) → "Feline household pet" |
|
- GUITAR (music) → "Six-stringed instrument" |
|
- AIRPORT (transportation) → "Flight departure hub" |
|
|
|
Clue for '{word}' ({topic}):""", |
|
|
|
"simple": """Complete this crossword clue: |
|
|
|
{word} ({topic}) = [ANSWER] |
|
|
|
Examples: |
|
VIOLIN (music) = Bowed string instrument |
|
SCIENTIST (science) = Research professional |
|
DATABASE (technology) = Information storage system |
|
|
|
{word} ({topic}) =""", |
|
|
|
"question": """What is '{word}' in the context of {topic}? Give a brief crossword clue (2-5 words) without using the word '{word}'. |
|
|
|
Answer:""" |
|
} |
|
|
|
def query_model(self, model_name: str, word: str, context: str, max_retries: int = 3) -> Optional[str]: |
|
"""Query a model via Hugging Face Router API using chat completions format. |
|
|
|
Args: |
|
model_name: Name of the model to query |
|
word: Target word for clue generation |
|
context: Topic/context for the word |
|
max_retries: Maximum number of retries |
|
|
|
Returns: |
|
Generated clue text or None if failed |
|
""" |
|
url = f"{self.base_url}/chat/completions" |
|
|
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": f"You are a crossword puzzle clue generator. Generate a single, concise, creative crossword clue for the word '{word}'. The clue should be 2-8 words, accurate, and not contain the word '{word}' itself." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": f"Generate a crossword clue for the word '{word}' in the context of '{context}'." |
|
} |
|
] |
|
|
|
payload = { |
|
"model": model_name, |
|
"messages": messages, |
|
"temperature": 0.7, |
|
"max_tokens": 50 |
|
} |
|
|
|
for attempt in range(max_retries): |
|
try: |
|
response = requests.post(url, headers=self.headers, json=payload, timeout=30) |
|
response.raise_for_status() |
|
|
|
result = response.json() |
|
|
|
|
|
if "choices" in result and len(result["choices"]) > 0: |
|
generated_text = result["choices"][0]["message"]["content"].strip() |
|
return self._clean_response(generated_text) |
|
else: |
|
logger.warning(f"No choices in response for {model_name}") |
|
return None |
|
|
|
except requests.exceptions.RequestException as e: |
|
logger.warning(f"Request failed for {model_name} (attempt {attempt+1}): {e}") |
|
if hasattr(e, 'response') and e.response is not None: |
|
logger.warning(f"Response content: {e.response.text}") |
|
if attempt < max_retries - 1: |
|
time.sleep(2) |
|
except Exception as e: |
|
logger.warning(f"Unexpected error for {model_name} (attempt {attempt+1}): {e}") |
|
if attempt < max_retries - 1: |
|
time.sleep(2) |
|
|
|
return None |
|
|
|
def _clean_response(self, text: str) -> str: |
|
"""Clean and validate API response.""" |
|
if not text: |
|
return "" |
|
|
|
|
|
text = text.strip() |
|
text = text.replace('\n', ' ').replace('\t', ' ') |
|
|
|
|
|
text = text.strip('"\'[](){}') |
|
|
|
|
|
if '.' in text and len(text.split('.')) > 1: |
|
text = text.split('.')[0].strip() |
|
if '\n' in text: |
|
text = text.split('\n')[0].strip() |
|
|
|
|
|
if len(text) < 3 or len(text) > 100: |
|
return "" |
|
|
|
return text |
|
|
|
def generate_clue(self, word: str, topic: str) -> Dict[str, Optional[str]]: |
|
"""Generate clues using all available models. |
|
|
|
Args: |
|
word: Target word |
|
topic: Topic/category context |
|
|
|
Returns: |
|
Dictionary mapping model names to generated clues |
|
""" |
|
results = {} |
|
|
|
logger.info(f"🎯 Generating clues for '{word}' + '{topic}' using {len(self.models)} models") |
|
|
|
for model_key, model_name in self.models.items(): |
|
logger.info(f" Querying {model_key}...") |
|
clue = self.query_model(model_name, word, topic) |
|
results[model_key] = clue |
|
|
|
|
|
time.sleep(1) |
|
|
|
return results |
|
|
|
def evaluate_clue_quality(self, word: str, clue: str) -> Tuple[str, float]: |
|
"""Evaluate the quality of a generated clue. |
|
|
|
Args: |
|
word: Target word |
|
clue: Generated clue |
|
|
|
Returns: |
|
Tuple of (quality_label, quality_score) |
|
""" |
|
if not clue or len(clue.strip()) < 3: |
|
return "FAILED", 0.0 |
|
|
|
word_lower = word.lower() |
|
clue_lower = clue.lower() |
|
|
|
|
|
if word_lower in clue_lower: |
|
return "POOR", 0.2 |
|
|
|
|
|
score = 0.5 |
|
quality_words = ["player", "instrument", "device", "system", "location", "animal", |
|
"food", "building", "activity", "professional", "tool", "creature", |
|
"terminal", "hub", "language", "storage", "sport", "game"] |
|
|
|
if any(qw in clue_lower for qw in quality_words): |
|
score += 0.3 |
|
|
|
if 2 <= len(clue.split()) <= 6: |
|
score += 0.1 |
|
|
|
if len(clue) >= 8: |
|
score += 0.1 |
|
|
|
|
|
if score >= 0.8: |
|
return "EXCELLENT", score |
|
elif score >= 0.6: |
|
return "GOOD", score |
|
elif score >= 0.4: |
|
return "ACCEPTABLE", score |
|
else: |
|
return "POOR", score |
|
|
|
|
|
def main(): |
|
"""Demo the API clue generator using your working approach.""" |
|
generator = APIClueGenerator() |
|
|
|
if not generator.hf_token: |
|
print("❌ Error: HF_TOKEN environment variable not set") |
|
print("Please set your Hugging Face token: export HF_TOKEN='hf_your_token_here'") |
|
return |
|
|
|
|
|
test_cases = [ |
|
("CRICKET", "sports"), |
|
("SHUTTLE", "space"), |
|
("CAT", "animals"), |
|
("DATABASE", "technology"), |
|
] |
|
|
|
for word, topic in test_cases: |
|
print(f"\n🧪 Testing: {word} + {topic}") |
|
print("=" * 50) |
|
|
|
results = generator.generate_clue(word, topic) |
|
|
|
for model, clue in results.items(): |
|
if clue: |
|
quality, score = generator.evaluate_clue_quality(word, clue) |
|
print(f"{model:15} | {quality:10} | {clue}") |
|
else: |
|
print(f"{model:15} | FAILED | No response") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|