shevadesuyash commited on
Commit
a8382bd
·
verified ·
1 Parent(s): 2ef014c

Delete paragraph_checker.py

Browse files
Files changed (1) hide show
  1. paragraph_checker.py +0 -62
paragraph_checker.py DELETED
@@ -1,62 +0,0 @@
1
- import language_tool_python
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
- import torch
4
-
5
- class ParagraphCorrector:
6
- def __init__(self):
7
- """Initialize correction models with conservative settings"""
8
- # Grammar tool with increased timeout
9
- self.grammar_tool = language_tool_python.LanguageTool(
10
- 'en-US',
11
- config={'maxTextLength': 100000}
12
- )
13
-
14
- # Conservative grammar correction model
15
- self.grammar_model = pipeline(
16
- "text2text-generation",
17
- model="vennify/t5-base-grammar-correction",
18
- device=0 if torch.cuda.is_available() else -1
19
- )
20
-
21
- def correct_sentence(self, sentence: str) -> str:
22
- """Correct a single sentence conservatively"""
23
- # Basic grammar/spelling correction
24
- matches = self.grammar_tool.check(sentence)
25
- corrected = language_tool_python.utils.correct(sentence, matches)
26
-
27
- # Light neural correction
28
- result = self.grammar_model(
29
- corrected,
30
- max_length=256,
31
- num_beams=3,
32
- temperature=0.3, # Low temperature for minimal changes
33
- early_stopping=True
34
- )
35
- return result[0]['generated_text']
36
-
37
- def conservative_correction(self, text: str) -> str:
38
- """Process text while preserving original structure"""
39
- if not text.strip():
40
- return text
41
-
42
- # Split into sentences while preserving delimiters
43
- sentences = []
44
- current = ""
45
- for char in text:
46
- current += char
47
- if char in {'.', '!', '?'}:
48
- sentences.append(current)
49
- current = ""
50
- if current:
51
- sentences.append(current)
52
-
53
- # Correct each sentence individually
54
- corrected_sentences = []
55
- for sentence in sentences:
56
- if sentence.strip():
57
- corrected = self.correct_sentence(sentence)
58
- corrected_sentences.append(corrected)
59
- else:
60
- corrected_sentences.append(sentence)
61
-
62
- return ''.join(corrected_sentences)