File size: 9,110 Bytes
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
#!/usr/bin/env python3
"""
API-Based Clue Generator for Crossword Puzzles
Uses Hugging Face Inference API to test multiple models without local downloads.
"""

import os
import time
import json
import logging
import requests
from typing import List, Dict, Optional, Tuple
from pathlib import Path

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class APIClueGenerator:
    """
    API-based clue generator using Hugging Face Inference API.
    Tests multiple models without local downloads.
    """
    
    def __init__(self, hf_token: Optional[str] = None):
        """Initialize API clue generator.
        
        Args:
            hf_token: Hugging Face API token (optional but recommended for rate limits)
        """
        self.hf_token = hf_token or os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_TOKEN')
        self.base_url = "https://router.huggingface.co/v1"  # Use Router API like in clue_with_hf.py
        
        # Models available via HF Router API (based on working clue_with_hf.py approach)
        self.models = {
            "deepseek-v3": "deepseek-ai/DeepSeek-V3-0324:fireworks-ai",  # Your working example
            "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct:fireworks-ai",  # Large Llama model
        }
        
        # Headers for API requests
        self.headers = {}
        if self.hf_token:
            self.headers["Authorization"] = f"Bearer {self.hf_token}"
        
        # Enhanced prompts for crossword clue generation
        self.prompts = {
            "instruction": """Generate a crossword clue for '{word}' (category: {topic}).

Rules:
- 2-6 words only
- Don't use the word '{word}' in the clue
- Be descriptive and accurate

Examples:
- CAT (animals) → "Feline household pet"
- GUITAR (music) → "Six-stringed instrument"  
- AIRPORT (transportation) → "Flight departure hub"

Clue for '{word}' ({topic}):""",

            "simple": """Complete this crossword clue:

{word} ({topic}) = [ANSWER]

Examples:
VIOLIN (music) = Bowed string instrument
SCIENTIST (science) = Research professional
DATABASE (technology) = Information storage system

{word} ({topic}) =""",

            "question": """What is '{word}' in the context of {topic}? Give a brief crossword clue (2-5 words) without using the word '{word}'.

Answer:"""
        }
    
    def query_model(self, model_name: str, word: str, context: str, max_retries: int = 3) -> Optional[str]:
        """Query a model via Hugging Face Router API using chat completions format.
        
        Args:
            model_name: Name of the model to query
            word: Target word for clue generation
            context: Topic/context for the word
            max_retries: Maximum number of retries
            
        Returns:
            Generated clue text or None if failed
        """
        url = f"{self.base_url}/chat/completions"
        
        # Use the same successful approach as clue_with_hf.py
        messages = [
            {
                "role": "system",
                "content": f"You are a crossword puzzle clue generator. Generate a single, concise, creative crossword clue for the word '{word}'. The clue should be 2-8 words, accurate, and not contain the word '{word}' itself."
            },
            {
                "role": "user",
                "content": f"Generate a crossword clue for the word '{word}' in the context of '{context}'."
            }
        ]
        
        payload = {
            "model": model_name,
            "messages": messages,
            "temperature": 0.7,
            "max_tokens": 50
        }
        
        for attempt in range(max_retries):
            try:
                response = requests.post(url, headers=self.headers, json=payload, timeout=30)
                response.raise_for_status()
                
                result = response.json()
                
                # Extract content from chat completions response
                if "choices" in result and len(result["choices"]) > 0:
                    generated_text = result["choices"][0]["message"]["content"].strip()
                    return self._clean_response(generated_text)
                else:
                    logger.warning(f"No choices in response for {model_name}")
                    return None
                
            except requests.exceptions.RequestException as e:
                logger.warning(f"Request failed for {model_name} (attempt {attempt+1}): {e}")
                if hasattr(e, 'response') and e.response is not None:
                    logger.warning(f"Response content: {e.response.text}")
                if attempt < max_retries - 1:
                    time.sleep(2)
            except Exception as e:
                logger.warning(f"Unexpected error for {model_name} (attempt {attempt+1}): {e}")
                if attempt < max_retries - 1:
                    time.sleep(2)
                    
        return None
    
    def _clean_response(self, text: str) -> str:
        """Clean and validate API response."""
        if not text:
            return ""
        
        # Remove common artifacts
        text = text.strip()
        text = text.replace('\n', ' ').replace('\t', ' ')
        
        # Remove quotes and brackets
        text = text.strip('"\'[](){}')
        
        # Take first sentence/line if multiple
        if '.' in text and len(text.split('.')) > 1:
            text = text.split('.')[0].strip()
        if '\n' in text:
            text = text.split('\n')[0].strip()
        
        # Basic length check
        if len(text) < 3 or len(text) > 100:
            return ""
        
        return text
    
    def generate_clue(self, word: str, topic: str) -> Dict[str, Optional[str]]:
        """Generate clues using all available models.
        
        Args:
            word: Target word
            topic: Topic/category context
            
        Returns:
            Dictionary mapping model names to generated clues
        """
        results = {}
        
        logger.info(f"🎯 Generating clues for '{word}' + '{topic}' using {len(self.models)} models")
        
        for model_key, model_name in self.models.items():
            logger.info(f"  Querying {model_key}...")
            clue = self.query_model(model_name, word, topic)
            results[model_key] = clue
            
            # Add small delay to be respectful to the API
            time.sleep(1)
        
        return results
    
    def evaluate_clue_quality(self, word: str, clue: str) -> Tuple[str, float]:
        """Evaluate the quality of a generated clue.
        
        Args:
            word: Target word
            clue: Generated clue
            
        Returns:
            Tuple of (quality_label, quality_score)
        """
        if not clue or len(clue.strip()) < 3:
            return "FAILED", 0.0
        
        word_lower = word.lower()
        clue_lower = clue.lower()
        
        # Check for critical issues
        if word_lower in clue_lower:
            return "POOR", 0.2
        
        # Check for quality indicators
        score = 0.5  # Base score
        quality_words = ["player", "instrument", "device", "system", "location", "animal", 
                        "food", "building", "activity", "professional", "tool", "creature",
                        "terminal", "hub", "language", "storage", "sport", "game"]
        
        if any(qw in clue_lower for qw in quality_words):
            score += 0.3
        
        if 2 <= len(clue.split()) <= 6:  # Good length
            score += 0.1
        
        if len(clue) >= 8:  # Descriptive
            score += 0.1
        
        # Determine label
        if score >= 0.8:
            return "EXCELLENT", score
        elif score >= 0.6:
            return "GOOD", score
        elif score >= 0.4:
            return "ACCEPTABLE", score
        else:
            return "POOR", score


def main():
    """Demo the API clue generator using your working approach."""
    generator = APIClueGenerator()
    
    if not generator.hf_token:
        print("❌ Error: HF_TOKEN environment variable not set")
        print("Please set your Hugging Face token: export HF_TOKEN='hf_your_token_here'")
        return
    
    # Test with your working examples first
    test_cases = [
        ("CRICKET", "sports"),
        ("SHUTTLE", "space"),
        ("CAT", "animals"),
        ("DATABASE", "technology"),
    ]
    
    for word, topic in test_cases:
        print(f"\n🧪 Testing: {word} + {topic}")
        print("=" * 50)
        
        results = generator.generate_clue(word, topic)
        
        for model, clue in results.items():
            if clue:
                quality, score = generator.evaluate_clue_quality(word, clue)
                print(f"{model:15} | {quality:10} | {clue}")
            else:
                print(f"{model:15} | FAILED     | No response")


if __name__ == "__main__":
    main()