File size: 9,110 Bytes
486eff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 |
#!/usr/bin/env python3
"""
API-Based Clue Generator for Crossword Puzzles
Uses Hugging Face Inference API to test multiple models without local downloads.
"""
import os
import time
import json
import logging
import requests
from typing import List, Dict, Optional, Tuple
from pathlib import Path
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class APIClueGenerator:
"""
API-based clue generator using Hugging Face Inference API.
Tests multiple models without local downloads.
"""
def __init__(self, hf_token: Optional[str] = None):
"""Initialize API clue generator.
Args:
hf_token: Hugging Face API token (optional but recommended for rate limits)
"""
self.hf_token = hf_token or os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_TOKEN')
self.base_url = "https://router.huggingface.co/v1" # Use Router API like in clue_with_hf.py
# Models available via HF Router API (based on working clue_with_hf.py approach)
self.models = {
"deepseek-v3": "deepseek-ai/DeepSeek-V3-0324:fireworks-ai", # Your working example
"llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct:fireworks-ai", # Large Llama model
}
# Headers for API requests
self.headers = {}
if self.hf_token:
self.headers["Authorization"] = f"Bearer {self.hf_token}"
# Enhanced prompts for crossword clue generation
self.prompts = {
"instruction": """Generate a crossword clue for '{word}' (category: {topic}).
Rules:
- 2-6 words only
- Don't use the word '{word}' in the clue
- Be descriptive and accurate
Examples:
- CAT (animals) → "Feline household pet"
- GUITAR (music) → "Six-stringed instrument"
- AIRPORT (transportation) → "Flight departure hub"
Clue for '{word}' ({topic}):""",
"simple": """Complete this crossword clue:
{word} ({topic}) = [ANSWER]
Examples:
VIOLIN (music) = Bowed string instrument
SCIENTIST (science) = Research professional
DATABASE (technology) = Information storage system
{word} ({topic}) =""",
"question": """What is '{word}' in the context of {topic}? Give a brief crossword clue (2-5 words) without using the word '{word}'.
Answer:"""
}
def query_model(self, model_name: str, word: str, context: str, max_retries: int = 3) -> Optional[str]:
"""Query a model via Hugging Face Router API using chat completions format.
Args:
model_name: Name of the model to query
word: Target word for clue generation
context: Topic/context for the word
max_retries: Maximum number of retries
Returns:
Generated clue text or None if failed
"""
url = f"{self.base_url}/chat/completions"
# Use the same successful approach as clue_with_hf.py
messages = [
{
"role": "system",
"content": f"You are a crossword puzzle clue generator. Generate a single, concise, creative crossword clue for the word '{word}'. The clue should be 2-8 words, accurate, and not contain the word '{word}' itself."
},
{
"role": "user",
"content": f"Generate a crossword clue for the word '{word}' in the context of '{context}'."
}
]
payload = {
"model": model_name,
"messages": messages,
"temperature": 0.7,
"max_tokens": 50
}
for attempt in range(max_retries):
try:
response = requests.post(url, headers=self.headers, json=payload, timeout=30)
response.raise_for_status()
result = response.json()
# Extract content from chat completions response
if "choices" in result and len(result["choices"]) > 0:
generated_text = result["choices"][0]["message"]["content"].strip()
return self._clean_response(generated_text)
else:
logger.warning(f"No choices in response for {model_name}")
return None
except requests.exceptions.RequestException as e:
logger.warning(f"Request failed for {model_name} (attempt {attempt+1}): {e}")
if hasattr(e, 'response') and e.response is not None:
logger.warning(f"Response content: {e.response.text}")
if attempt < max_retries - 1:
time.sleep(2)
except Exception as e:
logger.warning(f"Unexpected error for {model_name} (attempt {attempt+1}): {e}")
if attempt < max_retries - 1:
time.sleep(2)
return None
def _clean_response(self, text: str) -> str:
"""Clean and validate API response."""
if not text:
return ""
# Remove common artifacts
text = text.strip()
text = text.replace('\n', ' ').replace('\t', ' ')
# Remove quotes and brackets
text = text.strip('"\'[](){}')
# Take first sentence/line if multiple
if '.' in text and len(text.split('.')) > 1:
text = text.split('.')[0].strip()
if '\n' in text:
text = text.split('\n')[0].strip()
# Basic length check
if len(text) < 3 or len(text) > 100:
return ""
return text
def generate_clue(self, word: str, topic: str) -> Dict[str, Optional[str]]:
"""Generate clues using all available models.
Args:
word: Target word
topic: Topic/category context
Returns:
Dictionary mapping model names to generated clues
"""
results = {}
logger.info(f"🎯 Generating clues for '{word}' + '{topic}' using {len(self.models)} models")
for model_key, model_name in self.models.items():
logger.info(f" Querying {model_key}...")
clue = self.query_model(model_name, word, topic)
results[model_key] = clue
# Add small delay to be respectful to the API
time.sleep(1)
return results
def evaluate_clue_quality(self, word: str, clue: str) -> Tuple[str, float]:
"""Evaluate the quality of a generated clue.
Args:
word: Target word
clue: Generated clue
Returns:
Tuple of (quality_label, quality_score)
"""
if not clue or len(clue.strip()) < 3:
return "FAILED", 0.0
word_lower = word.lower()
clue_lower = clue.lower()
# Check for critical issues
if word_lower in clue_lower:
return "POOR", 0.2
# Check for quality indicators
score = 0.5 # Base score
quality_words = ["player", "instrument", "device", "system", "location", "animal",
"food", "building", "activity", "professional", "tool", "creature",
"terminal", "hub", "language", "storage", "sport", "game"]
if any(qw in clue_lower for qw in quality_words):
score += 0.3
if 2 <= len(clue.split()) <= 6: # Good length
score += 0.1
if len(clue) >= 8: # Descriptive
score += 0.1
# Determine label
if score >= 0.8:
return "EXCELLENT", score
elif score >= 0.6:
return "GOOD", score
elif score >= 0.4:
return "ACCEPTABLE", score
else:
return "POOR", score
def main():
"""Demo the API clue generator using your working approach."""
generator = APIClueGenerator()
if not generator.hf_token:
print("❌ Error: HF_TOKEN environment variable not set")
print("Please set your Hugging Face token: export HF_TOKEN='hf_your_token_here'")
return
# Test with your working examples first
test_cases = [
("CRICKET", "sports"),
("SHUTTLE", "space"),
("CAT", "animals"),
("DATABASE", "technology"),
]
for word, topic in test_cases:
print(f"\n🧪 Testing: {word} + {topic}")
print("=" * 50)
results = generator.generate_clue(word, topic)
for model, clue in results.items():
if clue:
quality, score = generator.evaluate_clue_quality(word, clue)
print(f"{model:15} | {quality:10} | {clue}")
else:
print(f"{model:15} | FAILED | No response")
if __name__ == "__main__":
main()
|