Spaces:
Sleeping
Sleeping
File size: 6,621 Bytes
0cf2485 47d18ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import os
# Set cache dirs (must match Dockerfile env vars)
os.environ['HOME'] = '/app'
os.environ['HF_HOME'] = '/app/.hf_cache'
os.environ['LANGTOOL_HOME'] = '/app/.ltool_cache'
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
from typing import Dict, List
class GrammarChatbot:
def __init__(self):
"""Initialize models for fluent corrections"""
# Initialize models
self.grammar_model = pipeline(
"text2text-generation",
model="vennify/t5-base-grammar-correction",
device=0 if torch.cuda.is_available() else -1
)
# Fluent paraphrasing model
self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
self.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
# Enhanced conversation components
self.compliments = [
"Great job! Your English is improving!",
"Nice improvement! Keep it up!",
"You're doing well with your practice!",
"Good effort! I can see you're trying hard!",
"Excellent try! You're getting better!",
"Well done! That was much better!",
"Impressive! Your sentence structure is improving!"
]
# Organized question bank by categories
self.question_categories = {
"daily_life": [
"What did you do this morning?",
"How do you usually spend your evenings?",
"What's your morning routine like?",
"Do you have any plans for this weekend?",
"What time do you usually wake up?"
],
"hobbies": [
"What hobbies do you enjoy in your free time?",
"Have you picked up any new hobbies recently?",
"Do you prefer indoor or outdoor activities?",
"What's something you've always wanted to try?",
"Do you play any musical instruments?"
],
"food": [
"What's your favorite comfort food?",
"Do you enjoy cooking? What's your specialty?",
"What's the most unusual food you've ever tried?",
"Do you prefer sweet or savory snacks?",
"What's your go-to breakfast?"
],
"travel": [
"If you could visit any country, where would you go?",
"What's the most beautiful place you've ever seen?",
"Do you prefer beach vacations or city trips?",
"What's your favorite travel memory?",
"What's the next place you'd like to visit?"
],
"technology": [
"How do you use technology in your daily life?",
"What's your opinion about social media?",
"Do you think AI will change our lives significantly?",
"What tech gadget couldn't you live without?",
"How has technology changed your work/studies?"
],
"future": [
"Where do you see yourself in five years?",
"What's one skill you'd like to learn?",
"Do you have any big goals for this year?",
"What would your perfect day look like?",
"What's something you want to achieve?"
]
}
self.current_question = None
self.current_category = None
self.conversation_history = []
def correct_grammar(self, text: str) -> str:
"""Basic grammar correction"""
result = self.grammar_model(
text,
max_length=256,
num_beams=4,
early_stopping=True
)
return result[0]['generated_text']
def fluent_correction(self, text: str) -> str:
"""Create fluent, natural rewrites"""
input_ids = self.paraphrase_tokenizer(
f"paraphrase: {text}",
return_tensors="pt",
max_length=256,
truncation=True
).input_ids
outputs = self.paraphrase_model.generate(
input_ids,
temperature=0.7,
max_length=256,
num_beams=5,
early_stopping=True
)
return self.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
def _get_next_question(self) -> str:
"""Select next question based on conversation context"""
# If we're continuing a category, use follow-up questions
if self.current_category and random.random() < 0.6: # 60% chance to stay on topic
return random.choice(self.question_categories[self.current_category])
# Otherwise select a new random category
self.current_category = random.choice(list(self.question_categories.keys()))
return random.choice(self.question_categories[self.current_category])
def generate_response(self, user_input: str) -> Dict:
"""Generate a conversational response"""
# Store conversation history
self.conversation_history.append(("user", user_input))
# Correct the input
corrected = self.fluent_correction(user_input)
is_corrected = corrected.lower() != user_input.lower()
# Generate response
response = {
"original_text": user_input,
"corrected_text": corrected,
"is_corrected": is_corrected,
"compliment": random.choice(self.compliments) if is_corrected else "",
"next_question": self._get_next_question(),
"end_conversation": False
}
# Update state
self.current_question = response["next_question"]
self.conversation_history.append(("bot", response["next_question"]))
return response
def start_conversation(self) -> Dict:
"""Initialize a new conversation"""
self.conversation_history = []
self.current_category = random.choice(list(self.question_categories.keys()))
self.current_question = random.choice(self.question_categories[self.current_category])
return {
"message": "Hello! I'm your English practice partner. Let's chat!",
"next_question": self.current_question,
"end_conversation": False
}
def get_conversation_history(self) -> List[tuple]:
"""Get the complete conversation history"""
return self.conversation_history |