Spaces:
Sleeping
Sleeping
File size: 1,483 Bytes
0ebbe65 47d18ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import os
# Set cache dirs (must match Dockerfile env vars)
os.environ['HOME'] = '/app'
os.environ['HF_HOME'] = '/app/.hf_cache'
os.environ['LANGTOOL_HOME'] = '/app/.ltool_cache'
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
import language_tool_python
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
def pre_cache_models():
"""
Downloads and caches all required models and dependencies.
This script is run during the Docker build process.
"""
print("Caching LanguageTool model...")
try:
# This will download and cache the LanguageTool server files
language_tool_python.LanguageTool('en-US')
print("LanguageTool model cached successfully.")
except Exception as e:
print(f"Failed to cache LanguageTool: {e}")
print("\nCaching Hugging Face models...")
models_to_cache = [
"vennify/t5-base-grammar-correction",
"humarin/chatgpt_paraphraser_on_T5_base"
]
for model_name in models_to_cache:
try:
print(f"Caching {model_name}...")
# Cache both tokenizer and model files
AutoTokenizer.from_pretrained(model_name)
AutoModelForSeq2SeqLM.from_pretrained(model_name)
print(f"{model_name} cached successfully.")
except Exception as e:
print(f"Failed to cache {model_name}: {e}")
print("\nAll models have been cached.")
if __name__ == "__main__":
pre_cache_models() |