# OPTIMAL GENERATION FUNCTION FOR CLIMATE CHATBOT
# Experiment 4c - Balanced parameters for best performance

def generate_answer_optimal(question, max_length=70, temperature=0.5):
    """
    OPTIMAL generation function for climate chatbot
    Balanced for quality, speed, and factual accuracy
    """
    # Clean input
    input_text = f"question: {question.strip()}"
    
    # Tokenize
    input_ids = tokenizer.encode(
        input_text,
        return_tensors='tf',
        max_length=110,
        truncation=True,
        add_special_tokens=True
    )
    
    # OPTIMAL PARAMETERS (Experiment 4c)
    outputs = model.generate(
        input_ids,
        max_length=max_length,        # 70 tokens for focused answers
        min_length=18,               # Ensure substantial content
        temperature=temperature,      # 0.5 for balanced creativity
        do_sample=True,
        top_p=0.8,                   # Nucleus sampling
        top_k=40,                    # Vocabulary restriction
        repetition_penalty=2.0,      # Anti-repetition
        no_repeat_ngram_size=3,      # Prevent 3-gram repetition
        num_beams=1,                 # Single beam for speed
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )
    
    # Decode
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Post-processing
    answer = post_process_optimal(answer, question)
    
    return answer

def post_process_optimal(answer, question):
    """
    Optimal post-processing for factual accuracy
    """
    # Remove input echo
    if answer.lower().startswith(question.lower()):
        answer = answer[len(question):].strip()
    
    # Remove prefixes
    prefixes = ["question:", "answer:", "response:", "a:", "q:"]
    for prefix in prefixes:
        if answer.lower().startswith(prefix):
            answer = answer[len(prefix):].strip()
    
    # Fix critical factual errors only
    critical_fixes = {
        'sea levels falling': 'sea levels rising',
        'sea level falling': 'sea level rising',
        'temperature decreasing': 'temperature increasing'
    }
    
    for error, correction in critical_fixes.items():
        if error in answer.lower():
            answer = answer.lower().replace(error, correction)
    
    # Remove immediate word repetition
    words = answer.split()
    if len(words) > 1:
        cleaned_words = [words[0]]
        for i in range(1, len(words)):
            if words[i].lower() != words[i-1].lower():
                cleaned_words.append(words[i])
        answer = ' '.join(cleaned_words)
    
    # Ensure proper ending
    if answer and not answer.endswith('.') and not answer.endswith('?'):
        answer += '.'
    
    # Quality check
    if len(answer.split()) < 6:
        return "I can provide information about this climate topic. What specific aspect interests you?"
    
    return answer

def interactive_climate_chat_optimal():
    """
    OPTIMAL interactive climate chat
    """
    print("CLIMATE EDUCATION CHATBOT - OPTIMAL VERSION")
    print("Best performing model from comprehensive experiments")
    print("Commands: 'quit' to exit, 'stats' for performance info")
    print("-" * 70)
    
    while True:
        try:
            user_input = input("\nYour climate question: ").strip()
            
            if user_input.lower() == 'quit':
                print("Thanks for learning about climate change!")
                break
            elif user_input.lower() == 'stats':
                print(f"\nOPTIMAL MODEL PERFORMANCE:")
                print(f"   Experiment: 4c (Balanced)")
                print(f"   BLEU Score: 0.0549 (Best balanced performance)")
                print(f"   Generation Speed: ~17s (Good speed)")
                print(f"   Training Loss: 0.5757 (Excellent)")
                print(f"   Validation Loss: 0.8844 (Stable)")
                continue
            elif not user_input:
                continue
            
            # Generate with optimal function
            import time
            start_time = time.time()
            answer = generate_answer_optimal(user_input)
            gen_time = time.time() - start_time
            
            print(f"\nAnswer: {answer}")
            print(f"Generated in {gen_time:.2f} seconds")
            
        except KeyboardInterrupt:
            print("\nGoodbye!")
            break
        except Exception as e:
            print(f"Error: {e}")