shevadesuyash commited on
Commit
0ebbe65
·
verified ·
1 Parent(s): a342713

Update cache_models.py

Browse files
Files changed (1) hide show
  1. cache_models.py +44 -36
cache_models.py CHANGED
@@ -1,37 +1,45 @@
1
- import language_tool_python
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
- import torch
4
-
5
- def pre_cache_models():
6
- """
7
- Downloads and caches all required models and dependencies.
8
- This script is run during the Docker build process.
9
- """
10
- print("Caching LanguageTool model...")
11
- try:
12
- # This will download and cache the LanguageTool server files
13
- language_tool_python.LanguageTool('en-US')
14
- print("LanguageTool model cached successfully.")
15
- except Exception as e:
16
- print(f"Failed to cache LanguageTool: {e}")
17
-
18
- print("\nCaching Hugging Face models...")
19
- models_to_cache = [
20
- "vennify/t5-base-grammar-correction",
21
- "humarin/chatgpt_paraphraser_on_T5_base"
22
- ]
23
-
24
- for model_name in models_to_cache:
25
- try:
26
- print(f"Caching {model_name}...")
27
- # Cache both tokenizer and model files
28
- AutoTokenizer.from_pretrained(model_name)
29
- AutoModelForSeq2SeqLM.from_pretrained(model_name)
30
- print(f"{model_name} cached successfully.")
31
- except Exception as e:
32
- print(f"Failed to cache {model_name}: {e}")
33
-
34
- print("\nAll models have been cached.")
35
-
36
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
37
  pre_cache_models()
 
1
+ import os
2
+
3
+ # Set cache dirs (must match Dockerfile env vars)
4
+ os.environ['HOME'] = '/app'
5
+ os.environ['HF_HOME'] = '/app/.hf_cache'
6
+ os.environ['LANGTOOL_HOME'] = '/app/.ltool_cache'
7
+ os.environ['XDG_CACHE_HOME'] = '/app/.cache'
8
+
9
+ import language_tool_python
10
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
11
+ import torch
12
+
13
+ def pre_cache_models():
14
+ """
15
+ Downloads and caches all required models and dependencies.
16
+ This script is run during the Docker build process.
17
+ """
18
+ print("Caching LanguageTool model...")
19
+ try:
20
+ # This will download and cache the LanguageTool server files
21
+ language_tool_python.LanguageTool('en-US')
22
+ print("LanguageTool model cached successfully.")
23
+ except Exception as e:
24
+ print(f"Failed to cache LanguageTool: {e}")
25
+
26
+ print("\nCaching Hugging Face models...")
27
+ models_to_cache = [
28
+ "vennify/t5-base-grammar-correction",
29
+ "humarin/chatgpt_paraphraser_on_T5_base"
30
+ ]
31
+
32
+ for model_name in models_to_cache:
33
+ try:
34
+ print(f"Caching {model_name}...")
35
+ # Cache both tokenizer and model files
36
+ AutoTokenizer.from_pretrained(model_name)
37
+ AutoModelForSeq2SeqLM.from_pretrained(model_name)
38
+ print(f"{model_name} cached successfully.")
39
+ except Exception as e:
40
+ print(f"Failed to cache {model_name}: {e}")
41
+
42
+ print("\nAll models have been cached.")
43
+
44
+ if __name__ == "__main__":
45
  pre_cache_models()