RickyGuoTheCrazish
commited on
Commit
·
47d83ad
1
Parent(s):
33c14bd
update docker file
Browse files- .dockerignore +73 -0
- Dockerfile +18 -2
- README.md +7 -0
- src/sentiment_analyzer.py +27 -3
.dockerignore
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python cache files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
build/
|
8 |
+
develop-eggs/
|
9 |
+
dist/
|
10 |
+
downloads/
|
11 |
+
eggs/
|
12 |
+
.eggs/
|
13 |
+
lib/
|
14 |
+
lib64/
|
15 |
+
parts/
|
16 |
+
sdist/
|
17 |
+
var/
|
18 |
+
wheels/
|
19 |
+
*.egg-info/
|
20 |
+
.installed.cfg
|
21 |
+
*.egg
|
22 |
+
|
23 |
+
# Virtual environments
|
24 |
+
venv/
|
25 |
+
env/
|
26 |
+
ENV/
|
27 |
+
|
28 |
+
# IDE files
|
29 |
+
.vscode/
|
30 |
+
.idea/
|
31 |
+
*.swp
|
32 |
+
*.swo
|
33 |
+
*~
|
34 |
+
|
35 |
+
# OS files
|
36 |
+
.DS_Store
|
37 |
+
Thumbs.db
|
38 |
+
|
39 |
+
# Git
|
40 |
+
.git/
|
41 |
+
.gitignore
|
42 |
+
|
43 |
+
# Test files
|
44 |
+
test_*.py
|
45 |
+
*_test.py
|
46 |
+
tests/
|
47 |
+
|
48 |
+
# Documentation
|
49 |
+
*.md
|
50 |
+
USAGE.md
|
51 |
+
|
52 |
+
# Local development files
|
53 |
+
.env
|
54 |
+
.env.local
|
55 |
+
*.log
|
56 |
+
|
57 |
+
# Jupyter notebooks
|
58 |
+
*.ipynb
|
59 |
+
.ipynb_checkpoints/
|
60 |
+
|
61 |
+
# Coverage reports
|
62 |
+
htmlcov/
|
63 |
+
.coverage
|
64 |
+
.coverage.*
|
65 |
+
coverage.xml
|
66 |
+
|
67 |
+
# pytest
|
68 |
+
.pytest_cache/
|
69 |
+
|
70 |
+
# mypy
|
71 |
+
.mypy_cache/
|
72 |
+
.dmypy.json
|
73 |
+
dmypy.json
|
Dockerfile
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
-
FROM python:3.
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
|
|
5 |
RUN apt-get update && apt-get install -y \
|
6 |
build-essential \
|
7 |
curl \
|
@@ -9,10 +10,25 @@ RUN apt-get update && apt-get install -y \
|
|
9 |
git \
|
10 |
&& rm -rf /var/lib/apt/lists/*
|
11 |
|
|
|
12 |
COPY requirements.txt ./
|
|
|
|
|
|
|
|
|
|
|
13 |
COPY src/ ./src/
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
EXPOSE 8501
|
18 |
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
5 |
+
# Install system dependencies
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
build-essential \
|
8 |
curl \
|
|
|
10 |
git \
|
11 |
&& rm -rf /var/lib/apt/lists/*
|
12 |
|
13 |
+
# Copy requirements first for better caching
|
14 |
COPY requirements.txt ./
|
15 |
+
|
16 |
+
# Install Python dependencies
|
17 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
18 |
+
|
19 |
+
# Copy source code
|
20 |
COPY src/ ./src/
|
21 |
|
22 |
+
# Set environment variables for model loading
|
23 |
+
ENV TRANSFORMERS_CACHE=/app/.cache/transformers
|
24 |
+
ENV HF_HOME=/app/.cache/huggingface
|
25 |
+
ENV TORCH_HOME=/app/.cache/torch
|
26 |
+
|
27 |
+
# Create cache directories
|
28 |
+
RUN mkdir -p /app/.cache/transformers /app/.cache/huggingface /app/.cache/torch
|
29 |
+
|
30 |
+
# Pre-download the FinBERT model to avoid runtime download issues
|
31 |
+
RUN python3 -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; import torch; print('Pre-downloading FinBERT model...'); tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert'); model = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert'); print('FinBERT model downloaded successfully!')"
|
32 |
|
33 |
EXPOSE 8501
|
34 |
|
README.md
CHANGED
@@ -59,3 +59,10 @@ streamlit run src/streamlit_app.py
|
|
59 |
- 30-second rate limit between requests
|
60 |
- Needs 1+ day old news (requires market data)
|
61 |
- Uses Yahoo Finance (free but limited)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
- 30-second rate limit between requests
|
60 |
- Needs 1+ day old news (requires market data)
|
61 |
- Uses Yahoo Finance (free but limited)
|
62 |
+
|
63 |
+
|
64 |
+
# Build the Docker image
|
65 |
+
docker build -t finbert-market-eval .
|
66 |
+
|
67 |
+
# Run locally to test
|
68 |
+
docker run -p 8501:8501 finbert-market-eval
|
src/sentiment_analyzer.py
CHANGED
@@ -39,14 +39,38 @@ class FinBERTAnalyzer:
|
|
39 |
"""
|
40 |
try:
|
41 |
logger.info(f"Loading FinBERT model: {_self.model_name}")
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
_self.model.to(_self.device)
|
45 |
_self.model.eval()
|
46 |
logger.info("FinBERT model loaded successfully")
|
47 |
return True
|
|
|
48 |
except Exception as e:
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
return False
|
51 |
|
52 |
def analyze_sentiment(self, text: str) -> Dict[str, float]:
|
|
|
39 |
"""
|
40 |
try:
|
41 |
logger.info(f"Loading FinBERT model: {_self.model_name}")
|
42 |
+
|
43 |
+
# Try to load tokenizer first
|
44 |
+
_self.tokenizer = AutoTokenizer.from_pretrained(
|
45 |
+
_self.model_name,
|
46 |
+
cache_dir=None, # Use default cache
|
47 |
+
local_files_only=False # Allow downloading if needed
|
48 |
+
)
|
49 |
+
logger.info("Tokenizer loaded successfully")
|
50 |
+
|
51 |
+
# Load model
|
52 |
+
_self.model = AutoModelForSequenceClassification.from_pretrained(
|
53 |
+
_self.model_name,
|
54 |
+
cache_dir=None, # Use default cache
|
55 |
+
local_files_only=False # Allow downloading if needed
|
56 |
+
)
|
57 |
_self.model.to(_self.device)
|
58 |
_self.model.eval()
|
59 |
logger.info("FinBERT model loaded successfully")
|
60 |
return True
|
61 |
+
|
62 |
except Exception as e:
|
63 |
+
error_msg = f"Error loading FinBERT model: {str(e)}"
|
64 |
+
logger.error(error_msg)
|
65 |
+
|
66 |
+
# Provide helpful error messages
|
67 |
+
if "Connection" in str(e) or "timeout" in str(e).lower():
|
68 |
+
logger.error("Network connection issue. Check internet connectivity.")
|
69 |
+
elif "disk" in str(e).lower() or "space" in str(e).lower():
|
70 |
+
logger.error("Insufficient disk space for model download.")
|
71 |
+
elif "permission" in str(e).lower():
|
72 |
+
logger.error("Permission denied. Check file/directory permissions.")
|
73 |
+
|
74 |
return False
|
75 |
|
76 |
def analyze_sentiment(self, text: str) -> Dict[str, float]:
|