shevadesuyash commited on
Commit
ca7aa50
·
0 Parent(s):

Initial commit of Paragraph_Checker module for Hugging Face

Browse files
Files changed (4) hide show
  1. Dockerfile +21 -0
  2. app.py +42 -0
  3. paragraph_checker.py +67 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && \
5
+ apt-get install -y --no-install-recommends \
6
+ openjdk-17-jre-headless \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set up application
10
+ WORKDIR /app
11
+ COPY . .
12
+
13
+ # Install Python dependencies
14
+ RUN pip install --no-cache-dir --upgrade pip && \
15
+ pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Pre-download models during build
18
+ RUN python -c "from paragraph_checker import initialize_models; initialize_models()"
19
+
20
+ EXPOSE 5001
21
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from paragraph_checker import correct_paragraph
3
+
4
+ app = Flask(__name__)
5
+
6
+ @app.route('/correct_text', methods=['POST'])
7
+ def correct_text():
8
+ data = request.get_json()
9
+ text = data.get("paragraph", "")
10
+
11
+ if not text:
12
+ return jsonify({"error": "No text provided"}), 400
13
+
14
+ print("Original Text:", text)
15
+
16
+ try:
17
+ # Get fully corrected text
18
+ fully_corrected = correct_paragraph(text)
19
+
20
+ return jsonify({
21
+ "original_text": text,
22
+ "corrected_text": fully_corrected
23
+ })
24
+
25
+ except Exception as e:
26
+ print(f"Error processing text: {str(e)}")
27
+ return jsonify({
28
+ "error": "An error occurred while processing the text",
29
+ "details": str(e)
30
+ }), 500
31
+
32
+ if __name__ == '__main__':
33
+ # Initialize models at startup
34
+ from paragraph_checker import initialize_models
35
+ print("Loading ML models...")
36
+ try:
37
+ initialize_models()
38
+ print("Models loaded successfully")
39
+ except Exception as e:
40
+ print(f"Error loading models: {str(e)}")
41
+
42
+ app.run(host="0.0.0.0", port=5001, debug=True)
paragraph_checker.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import language_tool_python
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
3
+
4
+ # Global variables for models
5
+ grammar_tool = None
6
+ tense_model = None
7
+ tense_tokenizer = None
8
+
9
+ def initialize_models():
10
+ """Initialize all ML models at startup"""
11
+ global grammar_tool, tense_model, tense_tokenizer
12
+
13
+ print("Initializing Language Tool...")
14
+ grammar_tool = language_tool_python.LanguageTool('en-US')
15
+
16
+ print("Initializing T5 model...")
17
+ model_name = "Vamsi/T5_Paraphrase_Paws"
18
+ tense_tokenizer = T5Tokenizer.from_pretrained(model_name)
19
+ tense_model = T5ForConditionalGeneration.from_pretrained(model_name)
20
+
21
+ def grammar_correction(text):
22
+ """Correct grammar using LanguageTool"""
23
+ if not grammar_tool:
24
+ raise Exception("Grammar tool not initialized")
25
+
26
+ matches = grammar_tool.check(text)
27
+ corrected_text = language_tool_python.utils.correct(text, matches)
28
+ return corrected_text
29
+
30
+ def tense_correction(text):
31
+ """Correct tense using T5 model"""
32
+ if not tense_model or not tense_tokenizer:
33
+ raise Exception("Tense correction models not initialized")
34
+
35
+ input_text = "paraphrase: " + text + " </s>"
36
+ encoding = tense_tokenizer.encode_plus(
37
+ input_text,
38
+ padding='max_length',
39
+ return_tensors="pt",
40
+ max_length=256,
41
+ truncation=True
42
+ )
43
+ input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
44
+
45
+ outputs = tense_model.generate(
46
+ input_ids=input_ids,
47
+ attention_mask=attention_masks,
48
+ max_length=256,
49
+ num_return_sequences=1,
50
+ num_beams=5,
51
+ temperature=1.5
52
+ )
53
+
54
+ paraphrased = tense_tokenizer.decode(outputs[0], skip_special_tokens=True)
55
+ return paraphrased
56
+
57
+ def correct_paragraph(text):
58
+ """Complete text correction pipeline"""
59
+ # Step 1: Grammar correction
60
+ grammatically_correct = grammar_correction(text)
61
+ print("After Grammar Correction:", grammatically_correct)
62
+
63
+ # Step 2: Tense correction
64
+ fully_corrected = tense_correction(grammatically_correct)
65
+ print("After Grammar + Tense Correction:", fully_corrected)
66
+
67
+ return fully_corrected
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ flask
2
+ language-tool-python
3
+ transformers
4
+ torch
5
+ sentencepiece
6
+ protobuf