Convomate-module / cache_models.py
shevadesuyash's picture
Update cache_models.py
0ebbe65 verified
raw
history blame
1.48 kB
import os
# Set cache dirs (must match Dockerfile env vars)
os.environ['HOME'] = '/app'
os.environ['HF_HOME'] = '/app/.hf_cache'
os.environ['LANGTOOL_HOME'] = '/app/.ltool_cache'
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
import language_tool_python
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
def pre_cache_models():
"""
Downloads and caches all required models and dependencies.
This script is run during the Docker build process.
"""
print("Caching LanguageTool model...")
try:
# This will download and cache the LanguageTool server files
language_tool_python.LanguageTool('en-US')
print("LanguageTool model cached successfully.")
except Exception as e:
print(f"Failed to cache LanguageTool: {e}")
print("\nCaching Hugging Face models...")
models_to_cache = [
"vennify/t5-base-grammar-correction",
"humarin/chatgpt_paraphraser_on_T5_base"
]
for model_name in models_to_cache:
try:
print(f"Caching {model_name}...")
# Cache both tokenizer and model files
AutoTokenizer.from_pretrained(model_name)
AutoModelForSeq2SeqLM.from_pretrained(model_name)
print(f"{model_name} cached successfully.")
except Exception as e:
print(f"Failed to cache {model_name}: {e}")
print("\nAll models have been cached.")
if __name__ == "__main__":
pre_cache_models()