Spaces:

shevadesuyash
/

Convomate-module

Sleeping

App Files Files Community

shevadesuyash commited on Jun 13

Commit

47d18ca

verified ·

1 Parent(s): a8382bd

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +31 -0
app.py +92 -0
cache_models.py +37 -0
grammar_chatbot.py +159 -0
paragraph_checker.py +62 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Base image
+FROM python:3.10-slim
+# Environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    openjdk-17-jre-headless \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy all project files
+COPY . .
+# Install Python packages
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+# Cache models during build
+RUN python cache_models.py
+# Expose port
+EXPOSE 8080
+# Run the Flask app
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from flask import Flask, request, jsonify
+from paragraph_checker import ParagraphCorrector
+from grammar_chatbot import GrammarChatbot
+import logging
+app = Flask(__name__)
+# Initialize services
+paragraph_service = ParagraphCorrector()
+chatbot_service = GrammarChatbot()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@app.route('/correct_text', methods=['POST'])
+def handle_paragraph():
+    """Endpoint for conservative paragraph correction"""
+    data = request.get_json()
+    text = data.get('paragraph', '').strip()
+    if not text:
+        return jsonify({"error": "No paragraph provided"}), 400
+    try:
+        corrected = paragraph_service.conservative_correction(text)
+        return jsonify({
+            "original_text": text,
+            "grammar_corrected": corrected
+        })
+    except Exception as e:
+        logger.error(f"Paragraph correction error: {str(e)}")
+        return jsonify({
+            "error": "Paragraph processing failed",
+            "details": str(e)
+        }), 500
+@app.route('/chat', methods=['POST', 'GET'])  # Added GET method for testing
+def handle_chat():
+    """Endpoint for fluent conversational correction"""
+    if request.method == 'POST':
+        data = request.get_json()
+        user_input = data.get('message', '').strip()
+    else:  # GET method for testing
+        user_input = request.args.get('message', '').strip()
+    if not user_input:
+        return jsonify({"error": "No message provided"}), 400
+    try:
+        response = chatbot_service.generate_response(user_input)
+        return jsonify({
+            "original_text": response["original_text"],
+            "corrected_text": response["corrected_text"],
+            "is_corrected": response["is_corrected"],
+            "compliment": response["compliment"],
+            "next_question": response["next_question"],
+            "end_conversation": response["end_conversation"]
+        })
+    except Exception as e:
+        logger.error(f"Chatbot error: {str(e)}")
+        return jsonify({
+            "error": "Chat processing failed",
+            "details": str(e)
+        }), 500
+@app.route('/start', methods=['GET'])
+def start_conversation():
+    try:
+        response = chatbot_service.start_conversation()
+        return jsonify(response)
+    except Exception as e:
+        logger.error(f"Start conversation error: {str(e)}")
+        return jsonify({
+            "error": "Failed to start conversation",
+            "details": str(e)
+        }), 500
+@app.route('/health', methods=['GET', 'POST'])  # Added POST method for testing
+def health_check():
+    return jsonify({
+        "status": "healthy",
+        "services": ["paragraph", "chat"],
+        "details": {
+            "paragraph_service": "active",
+            "chatbot_service": "active"
+        }
+    })
+if __name__ == '__main__':
+    logger.info("Starting grammar services...")
+    app.run(host='0.0.0.0', port=8080, debug=True)

cache_models.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import language_tool_python
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+def pre_cache_models():
+    """
+    Downloads and caches all required models and dependencies.
+    This script is run during the Docker build process.
+    """
+    print("Caching LanguageTool model...")
+    try:
+        # This will download and cache the LanguageTool server files
+        language_tool_python.LanguageTool('en-US')
+        print("LanguageTool model cached successfully.")
+    except Exception as e:
+        print(f"Failed to cache LanguageTool: {e}")
+    print("\nCaching Hugging Face models...")
+    models_to_cache = [
+        "vennify/t5-base-grammar-correction",
+        "humarin/chatgpt_paraphraser_on_T5_base"
+    ]
+    for model_name in models_to_cache:
+        try:
+            print(f"Caching {model_name}...")
+            # Cache both tokenizer and model files
+            AutoTokenizer.from_pretrained(model_name)
+            AutoModelForSeq2SeqLM.from_pretrained(model_name)
+            print(f"{model_name} cached successfully.")
+        except Exception as e:
+            print(f"Failed to cache {model_name}: {e}")
+    print("\nAll models have been cached.")
+if __name__ == "__main__":
+    pre_cache_models()

grammar_chatbot.py ADDED Viewed

	@@ -0,0 +1,159 @@

+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import random
+from typing import Dict, List
+class GrammarChatbot:
+    def __init__(self):
+        """Initialize models for fluent corrections"""
+        # Initialize models
+        self.grammar_model = pipeline(
+            "text2text-generation",
+            model="vennify/t5-base-grammar-correction",
+            device=0 if torch.cuda.is_available() else -1
+        )
+        # Fluent paraphrasing model
+        self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
+        self.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
+        # Enhanced conversation components
+        self.compliments = [
+            "Great job! Your English is improving!",
+            "Nice improvement! Keep it up!",
+            "You're doing well with your practice!",
+            "Good effort! I can see you're trying hard!",
+            "Excellent try! You're getting better!",
+            "Well done! That was much better!",
+            "Impressive! Your sentence structure is improving!"
+        ]
+        # Organized question bank by categories
+        self.question_categories = {
+            "daily_life": [
+                "What did you do this morning?",
+                "How do you usually spend your evenings?",
+                "What's your morning routine like?",
+                "Do you have any plans for this weekend?",
+                "What time do you usually wake up?"
+            ],
+            "hobbies": [
+                "What hobbies do you enjoy in your free time?",
+                "Have you picked up any new hobbies recently?",
+                "Do you prefer indoor or outdoor activities?",
+                "What's something you've always wanted to try?",
+                "Do you play any musical instruments?"
+            ],
+            "food": [
+                "What's your favorite comfort food?",
+                "Do you enjoy cooking? What's your specialty?",
+                "What's the most unusual food you've ever tried?",
+                "Do you prefer sweet or savory snacks?",
+                "What's your go-to breakfast?"
+            ],
+            "travel": [
+                "If you could visit any country, where would you go?",
+                "What's the most beautiful place you've ever seen?",
+                "Do you prefer beach vacations or city trips?",
+                "What's your favorite travel memory?",
+                "What's the next place you'd like to visit?"
+            ],
+            "technology": [
+                "How do you use technology in your daily life?",
+                "What's your opinion about social media?",
+                "Do you think AI will change our lives significantly?",
+                "What tech gadget couldn't you live without?",
+                "How has technology changed your work/studies?"
+            ],
+            "future": [
+                "Where do you see yourself in five years?",
+                "What's one skill you'd like to learn?",
+                "Do you have any big goals for this year?",
+                "What would your perfect day look like?",
+                "What's something you want to achieve?"
+            ]
+        }
+        self.current_question = None
+        self.current_category = None
+        self.conversation_history = []
+    def correct_grammar(self, text: str) -> str:
+        """Basic grammar correction"""
+        result = self.grammar_model(
+            text,
+            max_length=256,
+            num_beams=4,
+            early_stopping=True
+        )
+        return result[0]['generated_text']
+    def fluent_correction(self, text: str) -> str:
+        """Create fluent, natural rewrites"""
+        input_ids = self.paraphrase_tokenizer(
+            f"paraphrase: {text}",
+            return_tensors="pt",
+            max_length=256,
+            truncation=True
+        ).input_ids
+        outputs = self.paraphrase_model.generate(
+            input_ids,
+            temperature=0.7,
+            max_length=256,
+            num_beams=5,
+            early_stopping=True
+        )
+        return self.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    def _get_next_question(self) -> str:
+        """Select next question based on conversation context"""
+        # If we're continuing a category, use follow-up questions
+        if self.current_category and random.random() < 0.6:  # 60% chance to stay on topic
+            return random.choice(self.question_categories[self.current_category])
+        # Otherwise select a new random category
+        self.current_category = random.choice(list(self.question_categories.keys()))
+        return random.choice(self.question_categories[self.current_category])
+    def generate_response(self, user_input: str) -> Dict:
+        """Generate a conversational response"""
+        # Store conversation history
+        self.conversation_history.append(("user", user_input))
+        # Correct the input
+        corrected = self.fluent_correction(user_input)
+        is_corrected = corrected.lower() != user_input.lower()
+        # Generate response
+        response = {
+            "original_text": user_input,
+            "corrected_text": corrected,
+            "is_corrected": is_corrected,
+            "compliment": random.choice(self.compliments) if is_corrected else "",
+            "next_question": self._get_next_question(),
+            "end_conversation": False
+        }
+        # Update state
+        self.current_question = response["next_question"]
+        self.conversation_history.append(("bot", response["next_question"]))
+        return response
+    def start_conversation(self) -> Dict:
+        """Initialize a new conversation"""
+        self.conversation_history = []
+        self.current_category = random.choice(list(self.question_categories.keys()))
+        self.current_question = random.choice(self.question_categories[self.current_category])
+        return {
+            "message": "Hello! I'm your English practice partner. Let's chat!",
+            "next_question": self.current_question,
+            "end_conversation": False
+        }
+    def get_conversation_history(self) -> List[tuple]:
+        """Get the complete conversation history"""
+        return self.conversation_history

paragraph_checker.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import language_tool_python
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+class ParagraphCorrector:
+    def __init__(self):
+        """Initialize correction models with conservative settings"""
+        # Grammar tool with increased timeout
+        self.grammar_tool = language_tool_python.LanguageTool(
+            'en-US',
+            config={'maxTextLength': 100000}
+        )
+        # Conservative grammar correction model
+        self.grammar_model = pipeline(
+            "text2text-generation",
+            model="vennify/t5-base-grammar-correction",
+            device=0 if torch.cuda.is_available() else -1
+        )
+    def correct_sentence(self, sentence: str) -> str:
+        """Correct a single sentence conservatively"""
+        # Basic grammar/spelling correction
+        matches = self.grammar_tool.check(sentence)
+        corrected = language_tool_python.utils.correct(sentence, matches)
+        # Light neural correction
+        result = self.grammar_model(
+            corrected,
+            max_length=256,
+            num_beams=3,
+            temperature=0.3,  # Low temperature for minimal changes
+            early_stopping=True
+        )
+        return result[0]['generated_text']
+    def conservative_correction(self, text: str) -> str:
+        """Process text while preserving original structure"""
+        if not text.strip():
+            return text
+        # Split into sentences while preserving delimiters
+        sentences = []
+        current = ""
+        for char in text:
+            current += char
+            if char in {'.', '!', '?'}:
+                sentences.append(current)
+                current = ""
+        if current:
+            sentences.append(current)
+        # Correct each sentence individually
+        corrected_sentences = []
+        for sentence in sentences:
+            if sentence.strip():
+                corrected = self.correct_sentence(sentence)
+                corrected_sentences.append(corrected)
+            else:
+                corrected_sentences.append(sentence)
+        return ''.join(corrected_sentences)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+Flask==3.0.3
+language-tool-python==2.8
+transformers==4.49.0
+torch==2.6.0