shevadesuyash commited on
Commit
0cf2485
·
verified ·
1 Parent(s): 0ebbe65

Update grammar_chatbot.py

Browse files
Files changed (1) hide show
  1. grammar_chatbot.py +166 -158
grammar_chatbot.py CHANGED
@@ -1,159 +1,167 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
2
- import torch
3
- import random
4
- from typing import Dict, List
5
-
6
- class GrammarChatbot:
7
- def __init__(self):
8
- """Initialize models for fluent corrections"""
9
- # Initialize models
10
- self.grammar_model = pipeline(
11
- "text2text-generation",
12
- model="vennify/t5-base-grammar-correction",
13
- device=0 if torch.cuda.is_available() else -1
14
- )
15
-
16
- # Fluent paraphrasing model
17
- self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
18
- self.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
19
-
20
- # Enhanced conversation components
21
- self.compliments = [
22
- "Great job! Your English is improving!",
23
- "Nice improvement! Keep it up!",
24
- "You're doing well with your practice!",
25
- "Good effort! I can see you're trying hard!",
26
- "Excellent try! You're getting better!",
27
- "Well done! That was much better!",
28
- "Impressive! Your sentence structure is improving!"
29
- ]
30
-
31
- # Organized question bank by categories
32
- self.question_categories = {
33
- "daily_life": [
34
- "What did you do this morning?",
35
- "How do you usually spend your evenings?",
36
- "What's your morning routine like?",
37
- "Do you have any plans for this weekend?",
38
- "What time do you usually wake up?"
39
- ],
40
- "hobbies": [
41
- "What hobbies do you enjoy in your free time?",
42
- "Have you picked up any new hobbies recently?",
43
- "Do you prefer indoor or outdoor activities?",
44
- "What's something you've always wanted to try?",
45
- "Do you play any musical instruments?"
46
- ],
47
- "food": [
48
- "What's your favorite comfort food?",
49
- "Do you enjoy cooking? What's your specialty?",
50
- "What's the most unusual food you've ever tried?",
51
- "Do you prefer sweet or savory snacks?",
52
- "What's your go-to breakfast?"
53
- ],
54
- "travel": [
55
- "If you could visit any country, where would you go?",
56
- "What's the most beautiful place you've ever seen?",
57
- "Do you prefer beach vacations or city trips?",
58
- "What's your favorite travel memory?",
59
- "What's the next place you'd like to visit?"
60
- ],
61
- "technology": [
62
- "How do you use technology in your daily life?",
63
- "What's your opinion about social media?",
64
- "Do you think AI will change our lives significantly?",
65
- "What tech gadget couldn't you live without?",
66
- "How has technology changed your work/studies?"
67
- ],
68
- "future": [
69
- "Where do you see yourself in five years?",
70
- "What's one skill you'd like to learn?",
71
- "Do you have any big goals for this year?",
72
- "What would your perfect day look like?",
73
- "What's something you want to achieve?"
74
- ]
75
- }
76
-
77
- self.current_question = None
78
- self.current_category = None
79
- self.conversation_history = []
80
-
81
- def correct_grammar(self, text: str) -> str:
82
- """Basic grammar correction"""
83
- result = self.grammar_model(
84
- text,
85
- max_length=256,
86
- num_beams=4,
87
- early_stopping=True
88
- )
89
- return result[0]['generated_text']
90
-
91
- def fluent_correction(self, text: str) -> str:
92
- """Create fluent, natural rewrites"""
93
- input_ids = self.paraphrase_tokenizer(
94
- f"paraphrase: {text}",
95
- return_tensors="pt",
96
- max_length=256,
97
- truncation=True
98
- ).input_ids
99
-
100
- outputs = self.paraphrase_model.generate(
101
- input_ids,
102
- temperature=0.7,
103
- max_length=256,
104
- num_beams=5,
105
- early_stopping=True
106
- )
107
-
108
- return self.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
109
-
110
- def _get_next_question(self) -> str:
111
- """Select next question based on conversation context"""
112
- # If we're continuing a category, use follow-up questions
113
- if self.current_category and random.random() < 0.6: # 60% chance to stay on topic
114
- return random.choice(self.question_categories[self.current_category])
115
-
116
- # Otherwise select a new random category
117
- self.current_category = random.choice(list(self.question_categories.keys()))
118
- return random.choice(self.question_categories[self.current_category])
119
-
120
- def generate_response(self, user_input: str) -> Dict:
121
- """Generate a conversational response"""
122
- # Store conversation history
123
- self.conversation_history.append(("user", user_input))
124
-
125
- # Correct the input
126
- corrected = self.fluent_correction(user_input)
127
- is_corrected = corrected.lower() != user_input.lower()
128
-
129
- # Generate response
130
- response = {
131
- "original_text": user_input,
132
- "corrected_text": corrected,
133
- "is_corrected": is_corrected,
134
- "compliment": random.choice(self.compliments) if is_corrected else "",
135
- "next_question": self._get_next_question(),
136
- "end_conversation": False
137
- }
138
-
139
- # Update state
140
- self.current_question = response["next_question"]
141
- self.conversation_history.append(("bot", response["next_question"]))
142
-
143
- return response
144
-
145
- def start_conversation(self) -> Dict:
146
- """Initialize a new conversation"""
147
- self.conversation_history = []
148
- self.current_category = random.choice(list(self.question_categories.keys()))
149
- self.current_question = random.choice(self.question_categories[self.current_category])
150
-
151
- return {
152
- "message": "Hello! I'm your English practice partner. Let's chat!",
153
- "next_question": self.current_question,
154
- "end_conversation": False
155
- }
156
-
157
- def get_conversation_history(self) -> List[tuple]:
158
- """Get the complete conversation history"""
 
 
 
 
 
 
 
 
159
  return self.conversation_history
 
1
+ import os
2
+
3
+ # Set cache dirs (must match Dockerfile env vars)
4
+ os.environ['HOME'] = '/app'
5
+ os.environ['HF_HOME'] = '/app/.hf_cache'
6
+ os.environ['LANGTOOL_HOME'] = '/app/.ltool_cache'
7
+ os.environ['XDG_CACHE_HOME'] = '/app/.cache'
8
+
9
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
10
+ import torch
11
+ import random
12
+ from typing import Dict, List
13
+
14
+ class GrammarChatbot:
15
+ def __init__(self):
16
+ """Initialize models for fluent corrections"""
17
+ # Initialize models
18
+ self.grammar_model = pipeline(
19
+ "text2text-generation",
20
+ model="vennify/t5-base-grammar-correction",
21
+ device=0 if torch.cuda.is_available() else -1
22
+ )
23
+
24
+ # Fluent paraphrasing model
25
+ self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
26
+ self.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
27
+
28
+ # Enhanced conversation components
29
+ self.compliments = [
30
+ "Great job! Your English is improving!",
31
+ "Nice improvement! Keep it up!",
32
+ "You're doing well with your practice!",
33
+ "Good effort! I can see you're trying hard!",
34
+ "Excellent try! You're getting better!",
35
+ "Well done! That was much better!",
36
+ "Impressive! Your sentence structure is improving!"
37
+ ]
38
+
39
+ # Organized question bank by categories
40
+ self.question_categories = {
41
+ "daily_life": [
42
+ "What did you do this morning?",
43
+ "How do you usually spend your evenings?",
44
+ "What's your morning routine like?",
45
+ "Do you have any plans for this weekend?",
46
+ "What time do you usually wake up?"
47
+ ],
48
+ "hobbies": [
49
+ "What hobbies do you enjoy in your free time?",
50
+ "Have you picked up any new hobbies recently?",
51
+ "Do you prefer indoor or outdoor activities?",
52
+ "What's something you've always wanted to try?",
53
+ "Do you play any musical instruments?"
54
+ ],
55
+ "food": [
56
+ "What's your favorite comfort food?",
57
+ "Do you enjoy cooking? What's your specialty?",
58
+ "What's the most unusual food you've ever tried?",
59
+ "Do you prefer sweet or savory snacks?",
60
+ "What's your go-to breakfast?"
61
+ ],
62
+ "travel": [
63
+ "If you could visit any country, where would you go?",
64
+ "What's the most beautiful place you've ever seen?",
65
+ "Do you prefer beach vacations or city trips?",
66
+ "What's your favorite travel memory?",
67
+ "What's the next place you'd like to visit?"
68
+ ],
69
+ "technology": [
70
+ "How do you use technology in your daily life?",
71
+ "What's your opinion about social media?",
72
+ "Do you think AI will change our lives significantly?",
73
+ "What tech gadget couldn't you live without?",
74
+ "How has technology changed your work/studies?"
75
+ ],
76
+ "future": [
77
+ "Where do you see yourself in five years?",
78
+ "What's one skill you'd like to learn?",
79
+ "Do you have any big goals for this year?",
80
+ "What would your perfect day look like?",
81
+ "What's something you want to achieve?"
82
+ ]
83
+ }
84
+
85
+ self.current_question = None
86
+ self.current_category = None
87
+ self.conversation_history = []
88
+
89
+ def correct_grammar(self, text: str) -> str:
90
+ """Basic grammar correction"""
91
+ result = self.grammar_model(
92
+ text,
93
+ max_length=256,
94
+ num_beams=4,
95
+ early_stopping=True
96
+ )
97
+ return result[0]['generated_text']
98
+
99
+ def fluent_correction(self, text: str) -> str:
100
+ """Create fluent, natural rewrites"""
101
+ input_ids = self.paraphrase_tokenizer(
102
+ f"paraphrase: {text}",
103
+ return_tensors="pt",
104
+ max_length=256,
105
+ truncation=True
106
+ ).input_ids
107
+
108
+ outputs = self.paraphrase_model.generate(
109
+ input_ids,
110
+ temperature=0.7,
111
+ max_length=256,
112
+ num_beams=5,
113
+ early_stopping=True
114
+ )
115
+
116
+ return self.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
117
+
118
+ def _get_next_question(self) -> str:
119
+ """Select next question based on conversation context"""
120
+ # If we're continuing a category, use follow-up questions
121
+ if self.current_category and random.random() < 0.6: # 60% chance to stay on topic
122
+ return random.choice(self.question_categories[self.current_category])
123
+
124
+ # Otherwise select a new random category
125
+ self.current_category = random.choice(list(self.question_categories.keys()))
126
+ return random.choice(self.question_categories[self.current_category])
127
+
128
+ def generate_response(self, user_input: str) -> Dict:
129
+ """Generate a conversational response"""
130
+ # Store conversation history
131
+ self.conversation_history.append(("user", user_input))
132
+
133
+ # Correct the input
134
+ corrected = self.fluent_correction(user_input)
135
+ is_corrected = corrected.lower() != user_input.lower()
136
+
137
+ # Generate response
138
+ response = {
139
+ "original_text": user_input,
140
+ "corrected_text": corrected,
141
+ "is_corrected": is_corrected,
142
+ "compliment": random.choice(self.compliments) if is_corrected else "",
143
+ "next_question": self._get_next_question(),
144
+ "end_conversation": False
145
+ }
146
+
147
+ # Update state
148
+ self.current_question = response["next_question"]
149
+ self.conversation_history.append(("bot", response["next_question"]))
150
+
151
+ return response
152
+
153
+ def start_conversation(self) -> Dict:
154
+ """Initialize a new conversation"""
155
+ self.conversation_history = []
156
+ self.current_category = random.choice(list(self.question_categories.keys()))
157
+ self.current_question = random.choice(self.question_categories[self.current_category])
158
+
159
+ return {
160
+ "message": "Hello! I'm your English practice partner. Let's chat!",
161
+ "next_question": self.current_question,
162
+ "end_conversation": False
163
+ }
164
+
165
+ def get_conversation_history(self) -> List[tuple]:
166
+ """Get the complete conversation history"""
167
  return self.conversation_history