Spaces:
Running
Running
Refactor and streamline Cocktail Suggestions project
Browse files- Removed README.md and SETUP_GUIDE.md files to consolidate documentation.
- Enhanced app.py with error handling for recommender initialization and environment variable settings for Hugging Face Spaces compatibility.
- Deleted data_processor.py, debug.py, demo_setup.py, docker-compose.yml, quickstart.py, setup.sh, test_system.py files to simplify the project structure.
- Added start.sh script to manage environment setup and start the Streamlit app with appropriate configurations.
- Updated recommender.py to improve model loading with fallback options.
- Removed requirements.txt to streamline dependency management.
- .gitignore +60 -0
- Dockerfile +52 -5
- README.md +21 -7
- src/.env.example +0 -9
- src/.gitignore +0 -219
- src/Dockerfile +0 -28
- src/README.md +0 -219
- src/SETUP_GUIDE.md +0 -142
- src/app.py +16 -7
- src/data_processor.py +0 -256
- src/debug.py +0 -203
- src/demo_setup.py +0 -96
- src/docker-compose.yml +0 -37
- src/quickstart.py +0 -116
- src/recommender.py +12 -2
- src/requirements.txt +0 -9
- src/setup.sh +0 -33
- src/test_system.py +0 -165
- start.sh +23 -0
.gitignore
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
build/
|
8 |
+
develop-eggs/
|
9 |
+
dist/
|
10 |
+
downloads/
|
11 |
+
eggs/
|
12 |
+
.eggs/
|
13 |
+
lib/
|
14 |
+
lib64/
|
15 |
+
parts/
|
16 |
+
sdist/
|
17 |
+
var/
|
18 |
+
wheels/
|
19 |
+
share/python-wheels/
|
20 |
+
*.egg-info/
|
21 |
+
.installed.cfg
|
22 |
+
*.egg
|
23 |
+
MANIFEST
|
24 |
+
|
25 |
+
# Virtual environments
|
26 |
+
.env
|
27 |
+
.venv
|
28 |
+
env/
|
29 |
+
venv/
|
30 |
+
ENV/
|
31 |
+
env.bak/
|
32 |
+
venv.bak/
|
33 |
+
|
34 |
+
# IDE
|
35 |
+
.vscode/
|
36 |
+
.idea/
|
37 |
+
*.swp
|
38 |
+
*.swo
|
39 |
+
|
40 |
+
# OS
|
41 |
+
.DS_Store
|
42 |
+
Thumbs.db
|
43 |
+
|
44 |
+
# Logs
|
45 |
+
*.log
|
46 |
+
|
47 |
+
# Database
|
48 |
+
*.db
|
49 |
+
*.sqlite
|
50 |
+
|
51 |
+
# Cache
|
52 |
+
.cache/
|
53 |
+
.streamlit/
|
54 |
+
|
55 |
+
# Hugging Face
|
56 |
+
.huggingface/
|
57 |
+
|
58 |
+
# Model cache
|
59 |
+
models/
|
60 |
+
checkpoints/
|
Dockerfile
CHANGED
@@ -1,7 +1,24 @@
|
|
1 |
FROM python:3.9-slim
|
2 |
|
3 |
-
|
|
|
|
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
RUN apt-get update && apt-get install -y \
|
6 |
build-essential \
|
7 |
curl \
|
@@ -9,13 +26,43 @@ RUN apt-get update && apt-get install -y \
|
|
9 |
git \
|
10 |
&& rm -rf /var/lib/apt/lists/*
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
EXPOSE 8501
|
18 |
|
19 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
20 |
|
21 |
-
ENTRYPOINT ["
|
|
|
1 |
FROM python:3.9-slim
|
2 |
|
3 |
+
# Create a non-root user
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
USER user
|
6 |
|
7 |
+
# Set home directory and working directory
|
8 |
+
ENV HOME=/home/user \
|
9 |
+
PATH=/home/user/.local/bin:$PATH \
|
10 |
+
PYTHONPATH=$HOME/app \
|
11 |
+
PYTHONUNBUFFERED=1 \
|
12 |
+
GRADIO_ALLOW_FLAGGING=never \
|
13 |
+
GRADIO_NUM_PORTS=1 \
|
14 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
15 |
+
GRADIO_THEME=huggingface \
|
16 |
+
SYSTEM=spaces
|
17 |
+
|
18 |
+
WORKDIR $HOME/app
|
19 |
+
|
20 |
+
# Install system dependencies
|
21 |
+
USER root
|
22 |
RUN apt-get update && apt-get install -y \
|
23 |
build-essential \
|
24 |
curl \
|
|
|
26 |
git \
|
27 |
&& rm -rf /var/lib/apt/lists/*
|
28 |
|
29 |
+
# Switch back to user
|
30 |
+
USER user
|
31 |
+
|
32 |
+
# Set up cache and config directories with proper permissions
|
33 |
+
RUN mkdir -p $HOME/.cache/huggingface \
|
34 |
+
&& mkdir -p $HOME/.streamlit \
|
35 |
+
&& mkdir -p $HOME/.cache/torch
|
36 |
+
|
37 |
+
# Set environment variables for caching
|
38 |
+
ENV HF_HOME=$HOME/.cache/huggingface \
|
39 |
+
TRANSFORMERS_CACHE=$HOME/.cache/huggingface \
|
40 |
+
SENTENCE_TRANSFORMERS_HOME=$HOME/.cache/huggingface \
|
41 |
+
TORCH_HOME=$HOME/.cache/torch
|
42 |
+
|
43 |
+
# Copy files
|
44 |
+
COPY --chown=user requirements.txt ./
|
45 |
+
COPY --chown=user src/ ./src/
|
46 |
+
COPY --chown=user start.sh ./
|
47 |
+
|
48 |
+
# Make start script executable
|
49 |
+
RUN chmod +x start.sh
|
50 |
+
|
51 |
+
# Install Python dependencies
|
52 |
+
RUN pip3 install --user -r requirements.txt
|
53 |
|
54 |
+
# Create Streamlit config
|
55 |
+
RUN echo "[server]\n\
|
56 |
+
headless = true\n\
|
57 |
+
port = 8501\n\
|
58 |
+
address = \"0.0.0.0\"\n\
|
59 |
+
\n\
|
60 |
+
[browser]\n\
|
61 |
+
gatherUsageStats = false\n\
|
62 |
+
" > $HOME/.streamlit/config.toml
|
63 |
|
64 |
EXPOSE 8501
|
65 |
|
66 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
67 |
|
68 |
+
ENTRYPOINT ["./start.sh"]
|
README.md
CHANGED
@@ -1,19 +1,33 @@
|
|
1 |
---
|
2 |
title: Cocktail Suggestions
|
3 |
-
emoji:
|
4 |
colorFrom: red
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
app_port: 8501
|
8 |
tags:
|
9 |
- streamlit
|
|
|
|
|
|
|
10 |
pinned: false
|
11 |
-
short_description: AI-Powered Cocktail
|
12 |
---
|
13 |
|
14 |
-
#
|
15 |
|
16 |
-
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
title: Cocktail Suggestions
|
3 |
+
emoji: 🍹
|
4 |
colorFrom: red
|
5 |
+
colorTo: purple
|
6 |
sdk: docker
|
7 |
app_port: 8501
|
8 |
tags:
|
9 |
- streamlit
|
10 |
+
- cocktails
|
11 |
+
- ai
|
12 |
+
- recommendations
|
13 |
pinned: false
|
14 |
+
short_description: AI-Powered Cocktail Recommendation System
|
15 |
---
|
16 |
|
17 |
+
# 🍹 Cocktail Suggestions
|
18 |
|
19 |
+
An AI-powered cocktail recommendation system that helps you discover new drinks based on your preferences.
|
20 |
|
21 |
+
## Features
|
22 |
+
|
23 |
+
- Intelligent cocktail recommendations using semantic search
|
24 |
+
- Browse cocktails by category, glass type, and ingredients
|
25 |
+
- Beautiful, modern UI with responsive design
|
26 |
+
- Powered by sentence transformers and vector similarity search
|
27 |
+
|
28 |
+
## How to Use
|
29 |
+
|
30 |
+
1. Select your preferences from the sidebar
|
31 |
+
2. Choose ingredients you like or want to try
|
32 |
+
3. Get personalized cocktail recommendations
|
33 |
+
4. Explore new drinks and save your favorites!
|
src/.env.example
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
# Environment variables
|
2 |
-
DB_HOST=localhost
|
3 |
-
DB_PORT=5432
|
4 |
-
DB_NAME=cocktails_db
|
5 |
-
DB_USER=postgres
|
6 |
-
DB_PASSWORD=your_password
|
7 |
-
|
8 |
-
# Vector embedding model
|
9 |
-
MODEL_NAME=all-MiniLM-L6-v2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/.gitignore
DELETED
@@ -1,219 +0,0 @@
|
|
1 |
-
# Byte-compiled / optimized / DLL files
|
2 |
-
__pycache__/
|
3 |
-
*.py[codz]
|
4 |
-
*$py.class
|
5 |
-
|
6 |
-
# C extensions
|
7 |
-
*.so
|
8 |
-
|
9 |
-
# Distribution / packaging
|
10 |
-
.Python
|
11 |
-
build/
|
12 |
-
develop-eggs/
|
13 |
-
dist/
|
14 |
-
downloads/
|
15 |
-
eggs/
|
16 |
-
.eggs/
|
17 |
-
lib/
|
18 |
-
lib64/
|
19 |
-
parts/
|
20 |
-
sdist/
|
21 |
-
var/
|
22 |
-
wheels/
|
23 |
-
share/python-wheels/
|
24 |
-
*.egg-info/
|
25 |
-
.installed.cfg
|
26 |
-
*.egg
|
27 |
-
MANIFEST
|
28 |
-
|
29 |
-
# Project-specific files
|
30 |
-
.env
|
31 |
-
data/cocktails.csv
|
32 |
-
data/*.csv
|
33 |
-
logs/
|
34 |
-
*.log
|
35 |
-
|
36 |
-
# PyInstaller
|
37 |
-
# Usually these files are written by a python script from a template
|
38 |
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
39 |
-
*.manifest
|
40 |
-
*.spec
|
41 |
-
|
42 |
-
# Installer logs
|
43 |
-
pip-log.txt
|
44 |
-
pip-delete-this-directory.txt
|
45 |
-
|
46 |
-
# Unit test / coverage reports
|
47 |
-
htmlcov/
|
48 |
-
.tox/
|
49 |
-
.nox/
|
50 |
-
.coverage
|
51 |
-
.coverage.*
|
52 |
-
.cache
|
53 |
-
nosetests.xml
|
54 |
-
coverage.xml
|
55 |
-
*.cover
|
56 |
-
*.py.cover
|
57 |
-
.hypothesis/
|
58 |
-
.pytest_cache/
|
59 |
-
cover/
|
60 |
-
|
61 |
-
# Translations
|
62 |
-
*.mo
|
63 |
-
*.pot
|
64 |
-
|
65 |
-
# Django stuff:
|
66 |
-
*.log
|
67 |
-
local_settings.py
|
68 |
-
db.sqlite3
|
69 |
-
db.sqlite3-journal
|
70 |
-
|
71 |
-
# Flask stuff:
|
72 |
-
instance/
|
73 |
-
.webassets-cache
|
74 |
-
|
75 |
-
# Scrapy stuff:
|
76 |
-
.scrapy
|
77 |
-
|
78 |
-
# Sphinx documentation
|
79 |
-
docs/_build/
|
80 |
-
|
81 |
-
# PyBuilder
|
82 |
-
.pybuilder/
|
83 |
-
target/
|
84 |
-
|
85 |
-
# Jupyter Notebook
|
86 |
-
.ipynb_checkpoints
|
87 |
-
|
88 |
-
# IPython
|
89 |
-
profile_default/
|
90 |
-
ipython_config.py
|
91 |
-
|
92 |
-
# pyenv
|
93 |
-
# For a library or package, you might want to ignore these files since the code is
|
94 |
-
# intended to run in multiple environments; otherwise, check them in:
|
95 |
-
# .python-version
|
96 |
-
|
97 |
-
# pipenv
|
98 |
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
99 |
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
100 |
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
101 |
-
# install all needed dependencies.
|
102 |
-
#Pipfile.lock
|
103 |
-
|
104 |
-
# UV
|
105 |
-
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
106 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
107 |
-
# commonly ignored for libraries.
|
108 |
-
#uv.lock
|
109 |
-
|
110 |
-
# poetry
|
111 |
-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
112 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
113 |
-
# commonly ignored for libraries.
|
114 |
-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
115 |
-
#poetry.lock
|
116 |
-
#poetry.toml
|
117 |
-
|
118 |
-
# pdm
|
119 |
-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
120 |
-
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
121 |
-
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
122 |
-
#pdm.lock
|
123 |
-
#pdm.toml
|
124 |
-
.pdm-python
|
125 |
-
.pdm-build/
|
126 |
-
|
127 |
-
# pixi
|
128 |
-
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
129 |
-
#pixi.lock
|
130 |
-
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
131 |
-
# in the .venv directory. It is recommended not to include this directory in version control.
|
132 |
-
.pixi
|
133 |
-
|
134 |
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
135 |
-
__pypackages__/
|
136 |
-
|
137 |
-
# Celery stuff
|
138 |
-
celerybeat-schedule
|
139 |
-
celerybeat.pid
|
140 |
-
|
141 |
-
# SageMath parsed files
|
142 |
-
*.sage.py
|
143 |
-
|
144 |
-
# Environments
|
145 |
-
.env
|
146 |
-
.envrc
|
147 |
-
.venv
|
148 |
-
env/
|
149 |
-
venv/
|
150 |
-
ENV/
|
151 |
-
env.bak/
|
152 |
-
venv.bak/
|
153 |
-
|
154 |
-
# Spyder project settings
|
155 |
-
.spyderproject
|
156 |
-
.spyproject
|
157 |
-
|
158 |
-
# Rope project settings
|
159 |
-
.ropeproject
|
160 |
-
|
161 |
-
# mkdocs documentation
|
162 |
-
/site
|
163 |
-
|
164 |
-
# mypy
|
165 |
-
.mypy_cache/
|
166 |
-
.dmypy.json
|
167 |
-
dmypy.json
|
168 |
-
|
169 |
-
# Pyre type checker
|
170 |
-
.pyre/
|
171 |
-
|
172 |
-
# pytype static type analyzer
|
173 |
-
.pytype/
|
174 |
-
|
175 |
-
# Cython debug symbols
|
176 |
-
cython_debug/
|
177 |
-
|
178 |
-
# PyCharm
|
179 |
-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
180 |
-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
181 |
-
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
182 |
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
183 |
-
#.idea/
|
184 |
-
|
185 |
-
# Abstra
|
186 |
-
# Abstra is an AI-powered process automation framework.
|
187 |
-
# Ignore directories containing user credentials, local state, and settings.
|
188 |
-
# Learn more at https://abstra.io/docs
|
189 |
-
.abstra/
|
190 |
-
|
191 |
-
# Visual Studio Code
|
192 |
-
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
193 |
-
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
194 |
-
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
195 |
-
# you could uncomment the following to ignore the entire vscode folder
|
196 |
-
# .vscode/
|
197 |
-
|
198 |
-
# Ruff stuff:
|
199 |
-
.ruff_cache/
|
200 |
-
|
201 |
-
# PyPI configuration file
|
202 |
-
.pypirc
|
203 |
-
|
204 |
-
# Cursor
|
205 |
-
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
206 |
-
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
207 |
-
# refer to https://docs.cursor.com/context/ignore-files
|
208 |
-
.cursorignore
|
209 |
-
.cursorindexingignore
|
210 |
-
|
211 |
-
# Marimo
|
212 |
-
marimo/_static/
|
213 |
-
marimo/_lsp/
|
214 |
-
__marimo__/
|
215 |
-
.vscode
|
216 |
-
*.npy
|
217 |
-
*.csv
|
218 |
-
*.json
|
219 |
-
.env.supabase
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/Dockerfile
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
FROM python:3.10-slim
|
2 |
-
|
3 |
-
WORKDIR /app
|
4 |
-
|
5 |
-
# Install system dependencies
|
6 |
-
RUN apt-get update && apt-get install -y \
|
7 |
-
gcc \
|
8 |
-
postgresql-client \
|
9 |
-
&& rm -rf /var/lib/apt/lists/*
|
10 |
-
|
11 |
-
# Copy requirements first for better caching
|
12 |
-
COPY requirements.txt .
|
13 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
-
|
15 |
-
# Copy application code
|
16 |
-
COPY . .
|
17 |
-
|
18 |
-
# Create data directory
|
19 |
-
RUN mkdir -p data logs
|
20 |
-
|
21 |
-
# Expose Streamlit port
|
22 |
-
EXPOSE 8501
|
23 |
-
|
24 |
-
# Health check
|
25 |
-
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
26 |
-
|
27 |
-
# Run the application
|
28 |
-
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/README.md
DELETED
@@ -1,219 +0,0 @@
|
|
1 |
-
# 🍹 AI-Powered Cocktail Suggestions
|
2 |
-
|
3 |
-
An intelligent cocktail recommendation system using vector databases and AI embeddings to suggest the perfect drinks based on your preferences.
|
4 |
-
|
5 |
-
## 🚀 Live Demos
|
6 |
-
|
7 |
-
[](https://cocktail-suggestions.streamlit.app/)
|
8 |
-
[](https://huggingface.co/spaces/thuanan/cocktail_suggestions)
|
9 |
-
|
10 |
-
> 🎯 **Try the live demos above to explore cocktail recommendations without any setup!**
|
11 |
-
|
12 |
-
## 🎯 Project Overview
|
13 |
-
|
14 |
-
This project creates a smart cocktail recommendation system that
|
15 |
-
|
16 |
-
- Stores cocktail recipes in a vector database using pgvector
|
17 |
-
- Uses AI embeddings to understand cocktail characteristics
|
18 |
-
- Provides personalized suggestions based on user preferences
|
19 |
-
- Features a beautiful Streamlit web interface
|
20 |
-
|
21 |
-
## 🏗️ Architecture
|
22 |
-
|
23 |
-
- **Database**: PostgreSQL with pgvector extension for vector similarity search
|
24 |
-
- **AI Model**: SentenceTransformers for generating embeddings
|
25 |
-
- **Web Framework**: Streamlit for the user interface
|
26 |
-
- **Dataset**: Kaggle cocktails dataset with 600+ recipes
|
27 |
-
|
28 |
-
## 📊 Dataset
|
29 |
-
|
30 |
-
**Source**: https://www.kaggle.com/datasets/aadyasingh55/cocktails/data
|
31 |
-
|
32 |
-
## 🛠️ Technology Stack
|
33 |
-
|
34 |
-
- **Vector Database**: [pgvector](https://github.com/pgvector/pgvector)
|
35 |
-
- **Web Framework**: Streamlit
|
36 |
-
- **AI/ML**: SentenceTransformers, scikit-learn
|
37 |
-
- **Database**: PostgreSQL
|
38 |
-
- **Language**: Python 3.8+
|
39 |
-
|
40 |
-
## 🚀 Quick Start
|
41 |
-
|
42 |
-
> **💡 Want to try it first?** Check out our [live demos](#-live-demos) above for instant access without any setup!
|
43 |
-
|
44 |
-
### Option 1: Docker (Recommended)
|
45 |
-
|
46 |
-
```bash
|
47 |
-
# Clone the repository
|
48 |
-
git clone https://github.com/ThuanNaN/aio2025_cocktail_suggestions
|
49 |
-
cd aio2025_cocktail_suggestions
|
50 |
-
|
51 |
-
# Download the dataset
|
52 |
-
# Place cocktails.csv in the data/ directory
|
53 |
-
|
54 |
-
# Start with Docker Compose
|
55 |
-
docker-compose up -d
|
56 |
-
|
57 |
-
# Set up the database (first time only)
|
58 |
-
docker-compose exec cocktail-app python database_setup.py
|
59 |
-
docker-compose exec cocktail-app python data_processor.py
|
60 |
-
|
61 |
-
# Access the app at http://localhost:8501
|
62 |
-
```
|
63 |
-
|
64 |
-
### Option 2: Local Setup
|
65 |
-
|
66 |
-
```bash
|
67 |
-
# Clone the repository
|
68 |
-
git clone <repository-url>
|
69 |
-
cd aio2025_cocktail_suggestions
|
70 |
-
|
71 |
-
# Run the quick setup
|
72 |
-
python quickstart.py
|
73 |
-
|
74 |
-
# Or manual setup:
|
75 |
-
pip install -r requirements.txt
|
76 |
-
cp .env.example .env
|
77 |
-
# Edit .env with your database credentials
|
78 |
-
|
79 |
-
# Set up PostgreSQL with pgvector
|
80 |
-
# Run the database setup
|
81 |
-
python database_setup.py
|
82 |
-
|
83 |
-
# Process and store the cocktail data
|
84 |
-
python data_processor.py
|
85 |
-
|
86 |
-
# Start the Streamlit app
|
87 |
-
streamlit run app.py
|
88 |
-
```
|
89 |
-
|
90 |
-
## 📋 Prerequisites
|
91 |
-
|
92 |
-
### For Local Setup
|
93 |
-
|
94 |
-
- Python 3.8+
|
95 |
-
- PostgreSQL with pgvector extension
|
96 |
-
- Git
|
97 |
-
|
98 |
-
### For Docker Setup
|
99 |
-
|
100 |
-
- Docker and Docker Compose
|
101 |
-
|
102 |
-
## 🔧 Configuration
|
103 |
-
|
104 |
-
1. **Environment Variables** (`.env` file):
|
105 |
-
|
106 |
-
```env
|
107 |
-
DB_HOST=localhost
|
108 |
-
DB_PORT=5432
|
109 |
-
DB_NAME=cocktails_db
|
110 |
-
DB_USER=postgres
|
111 |
-
DB_PASSWORD=your_password
|
112 |
-
MODEL_NAME=all-MiniLM-L6-v2
|
113 |
-
```
|
114 |
-
|
115 |
-
2. **Database Setup**:
|
116 |
-
- Install PostgreSQL
|
117 |
-
- Install pgvector extension
|
118 |
-
- Create database and user
|
119 |
-
|
120 |
-
3. **Dataset**:
|
121 |
-
- Download from Kaggle
|
122 |
-
- Place `cocktails.csv` in `data/` directory
|
123 |
-
|
124 |
-
## 🎮 Features
|
125 |
-
|
126 |
-
### 🔍 Search Options
|
127 |
-
|
128 |
-
- **By Name**: Find specific cocktails
|
129 |
-
- **By Ingredients**: Get suggestions based on available ingredients
|
130 |
-
- **By Style/Mood**: Find drinks matching your mood (sweet, strong, refreshing, etc.)
|
131 |
-
- **By Occasion**: Perfect drinks for parties, date nights, etc.
|
132 |
-
- **Mixed Preferences**: Combine multiple criteria
|
133 |
-
- **By Category**: Browse by drink categories
|
134 |
-
- **Random Discovery**: Let AI surprise you
|
135 |
-
|
136 |
-
### 🎨 User Interface
|
137 |
-
|
138 |
-
- Modern, responsive design
|
139 |
-
- Real-time search and filtering
|
140 |
-
- Similarity scores for recommendations
|
141 |
-
- Detailed recipe information
|
142 |
-
- Ingredient highlighting
|
143 |
-
|
144 |
-
### 🧠 AI Features
|
145 |
-
|
146 |
-
- Vector similarity search
|
147 |
-
- Semantic understanding of preferences
|
148 |
-
- Contextual recommendations
|
149 |
-
- Personalized suggestions
|
150 |
-
|
151 |
-
## 📁 Project Structure
|
152 |
-
|
153 |
-
```text
|
154 |
-
aio2025_cocktail_suggestions/
|
155 |
-
├── app.py # Main Streamlit application
|
156 |
-
├── database_setup.py # Database initialization
|
157 |
-
├── data_processor.py # Data processing and embedding generation
|
158 |
-
├── recommender.py # Recommendation engine
|
159 |
-
├── requirements.txt # Python dependencies
|
160 |
-
├── docker-compose.yml # Docker setup
|
161 |
-
├── Dockerfile # Docker configuration
|
162 |
-
├── quickstart.py # Quick setup script
|
163 |
-
├── setup.sh # Bash setup script
|
164 |
-
├── .env.example # Environment variables template
|
165 |
-
├── data/ # Dataset directory
|
166 |
-
│ ├── README.md
|
167 |
-
│ └── final_cocktails.csv # (Download required)
|
168 |
-
└── README.md # This file
|
169 |
-
```
|
170 |
-
|
171 |
-
## 🔬 How It Works
|
172 |
-
|
173 |
-
1. **Data Processing**: Cocktail recipes are processed and converted into high-dimensional vectors using SentenceTransformers
|
174 |
-
2. **Vector Storage**: Embeddings are stored in PostgreSQL with pgvector for efficient similarity search
|
175 |
-
3. **Recommendation**: User preferences are converted to vectors and matched against the database using cosine similarity
|
176 |
-
4. **Ranking**: Results are ranked by similarity score and presented through the web interface
|
177 |
-
|
178 |
-
## 🎯 Use Cases
|
179 |
-
|
180 |
-
- **Home Bartenders**: Discover new cocktails based on available ingredients
|
181 |
-
- **Cocktail Enthusiasts**: Explore drinks by style and preference
|
182 |
-
- **Event Planning**: Find perfect drinks for specific occasions
|
183 |
-
- **Learning**: Understand cocktail composition and flavor profiles
|
184 |
-
|
185 |
-
## 🔮 Future Enhancements
|
186 |
-
|
187 |
-
- User rating system
|
188 |
-
- Personal cocktail collection
|
189 |
-
- Ingredient substitution suggestions
|
190 |
-
- Nutritional information
|
191 |
-
- Social sharing features
|
192 |
-
- Mobile app version
|
193 |
-
|
194 |
-
## 🤝 Contributing
|
195 |
-
|
196 |
-
1. Fork the repository
|
197 |
-
2. Create a feature branch
|
198 |
-
3. Make your changes
|
199 |
-
4. Add tests if applicable
|
200 |
-
5. Submit a pull request
|
201 |
-
|
202 |
-
## 📄 License
|
203 |
-
|
204 |
-
This project is open source and available under the MIT License.
|
205 |
-
|
206 |
-
## 🆘 Troubleshooting
|
207 |
-
|
208 |
-
**Common Issues:**
|
209 |
-
|
210 |
-
1. **Database Connection Error**: Check your `.env` file and ensure PostgreSQL is running
|
211 |
-
2. **pgvector Extension**: Make sure pgvector is properly installed in PostgreSQL
|
212 |
-
3. **Dataset Not Found**: Download the cocktails.csv file and place it in the data/ directory
|
213 |
-
4. **Memory Issues**: The embedding generation can be memory-intensive; consider processing in batches
|
214 |
-
|
215 |
-
**Support:**
|
216 |
-
|
217 |
-
- Check the logs in the `logs/` directory
|
218 |
-
- Ensure all dependencies are installed correctly
|
219 |
-
- Verify database credentials and connectivity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/SETUP_GUIDE.md
DELETED
@@ -1,142 +0,0 @@
|
|
1 |
-
# 🍹 Cocktail Suggestions - Project Setup Guide
|
2 |
-
|
3 |
-
## 📁 What We Built
|
4 |
-
|
5 |
-
A complete AI-powered cocktail recommendation system with:
|
6 |
-
|
7 |
-
### Core Components
|
8 |
-
|
9 |
-
1. **`database_setup.py`** - PostgreSQL + pgvector setup
|
10 |
-
2. **`data_processor.py`** - Kaggle dataset processing and embedding generation
|
11 |
-
3. **`recommender.py`** - AI-powered recommendation engine
|
12 |
-
4. **`app.py`** - Beautiful Streamlit web interface
|
13 |
-
|
14 |
-
### Supporting Files
|
15 |
-
|
16 |
-
- **`requirements.txt`** - All Python dependencies
|
17 |
-
- **`docker-compose.yml`** - Complete Docker setup
|
18 |
-
- **`quickstart.py`** - Automated setup script
|
19 |
-
- **`test_system.py`** - System verification
|
20 |
-
- **`.env.example`** - Configuration template
|
21 |
-
|
22 |
-
## 🚀 Getting Started (Choose One Method)
|
23 |
-
|
24 |
-
### Method 1: Docker (Easiest) 🐳
|
25 |
-
|
26 |
-
```bash
|
27 |
-
# 1. Download the cocktail dataset
|
28 |
-
# Go to: https://www.kaggle.com/datasets/aadyasingh55/cocktails/data
|
29 |
-
# Download and place cocktails.csv in data/ folder
|
30 |
-
|
31 |
-
# 2. Start everything with Docker
|
32 |
-
docker-compose up -d
|
33 |
-
|
34 |
-
# 3. Initialize the database (one-time setup)
|
35 |
-
docker-compose exec cocktail-app python database_setup.py
|
36 |
-
docker-compose exec cocktail-app python data_processor.py
|
37 |
-
|
38 |
-
# 4. Open http://localhost:8501 in your browser
|
39 |
-
```
|
40 |
-
|
41 |
-
### Method 2: Local Setup 💻
|
42 |
-
|
43 |
-
```bash
|
44 |
-
# 1. Install dependencies
|
45 |
-
pip install -r requirements.txt
|
46 |
-
|
47 |
-
# 2. Set up environment
|
48 |
-
cp .env.example .env
|
49 |
-
# Edit .env with your PostgreSQL credentials
|
50 |
-
|
51 |
-
# 3. Download dataset to data/cocktails.csv
|
52 |
-
|
53 |
-
# 4. Set up database
|
54 |
-
python database_setup.py
|
55 |
-
|
56 |
-
# 5. Process data and generate embeddings
|
57 |
-
python data_processor.py
|
58 |
-
|
59 |
-
# 6. Start the web app
|
60 |
-
streamlit run app.py
|
61 |
-
```
|
62 |
-
|
63 |
-
### Method 3: Quick Setup Script 🔧
|
64 |
-
|
65 |
-
```bash
|
66 |
-
# Run the automated setup
|
67 |
-
python quickstart.py
|
68 |
-
|
69 |
-
# Follow the instructions shown
|
70 |
-
```
|
71 |
-
|
72 |
-
## 📋 Prerequisites
|
73 |
-
|
74 |
-
### For Docker
|
75 |
-
|
76 |
-
- Docker and Docker Compose
|
77 |
-
- The cocktail dataset (cocktails.csv)
|
78 |
-
|
79 |
-
### For Local Setup
|
80 |
-
|
81 |
-
- Python 3.8+
|
82 |
-
- PostgreSQL with pgvector extension
|
83 |
-
- The cocktail dataset (cocktails.csv)
|
84 |
-
|
85 |
-
## 🎯 How to Use the App
|
86 |
-
|
87 |
-
### Search Options
|
88 |
-
|
89 |
-
1. **🔍 By Name** - Search for specific cocktails
|
90 |
-
2. **🥃 By Ingredients** - Get suggestions based on what you have
|
91 |
-
3. **🎭 By Style** - Find drinks by mood (sweet, strong, fruity, etc.)
|
92 |
-
4. **🎉 By Occasion** - Perfect drinks for parties, dates, etc.
|
93 |
-
5. **🎲 Mixed Preferences** - Combine multiple criteria
|
94 |
-
6. **📂 By Category** - Browse drink categories
|
95 |
-
7. **🎰 Random Discovery** - Let AI surprise you
|
96 |
-
|
97 |
-
### Features
|
98 |
-
|
99 |
-
- Real-time similarity matching
|
100 |
-
- Beautiful, responsive interface
|
101 |
-
- Detailed recipes and ingredients
|
102 |
-
- Similarity scores for each recommendation
|
103 |
-
|
104 |
-
## 🔧 Troubleshooting
|
105 |
-
|
106 |
-
### Common Issues
|
107 |
-
|
108 |
-
1. **"Import errors"** - Install requirements: `pip install -r requirements.txt`
|
109 |
-
|
110 |
-
2. **"Database connection failed"** - Check PostgreSQL is running and .env file
|
111 |
-
|
112 |
-
3. **"pgvector extension not found"** - Install pgvector in PostgreSQL
|
113 |
-
|
114 |
-
4. **"Dataset not found"** - Download cocktails.csv to data/ folder
|
115 |
-
|
116 |
-
5. **"Memory issues"** - The AI model needs ~2GB RAM for embeddings
|
117 |
-
|
118 |
-
### Test Your Setup
|
119 |
-
|
120 |
-
```bash
|
121 |
-
python test_system.py
|
122 |
-
```
|
123 |
-
|
124 |
-
## 🎉 What's Next?
|
125 |
-
|
126 |
-
After setup, you can
|
127 |
-
|
128 |
-
- Explore 600+ cocktail recipes
|
129 |
-
- Get personalized recommendations
|
130 |
-
- Discover new drinks based on your preferences
|
131 |
-
- Learn about cocktail ingredients and preparation
|
132 |
-
|
133 |
-
## 🆘 Need Help?
|
134 |
-
|
135 |
-
1. Check the detailed README.md
|
136 |
-
2. Run the test script: `python test_system.py`
|
137 |
-
3. Check logs in the logs/ directory
|
138 |
-
4. Ensure all dependencies are installed correctly
|
139 |
-
|
140 |
-
---
|
141 |
-
|
142 |
-
**Enjoy discovering your perfect cocktail! 🍹**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/app.py
CHANGED
@@ -1,6 +1,11 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
from recommender import CocktailRecommender
|
3 |
|
|
|
|
|
|
|
|
|
4 |
# Page config
|
5 |
st.set_page_config(
|
6 |
page_title="🍹 Cocktail Suggestions",
|
@@ -52,8 +57,13 @@ st.markdown("""
|
|
52 |
|
53 |
@st.cache_resource
|
54 |
def get_recommender():
|
55 |
-
"""Initialize the cocktail recommender"""
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
def display_cocktail(cocktail):
|
59 |
"""Display a cocktail in a nice card format"""
|
@@ -93,11 +103,10 @@ def main():
|
|
93 |
st.session_state.last_search_type = ""
|
94 |
|
95 |
# Initialize recommender
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
st.
|
100 |
-
st.info("Make sure your database is set up and the environment variables are configured.")
|
101 |
return
|
102 |
|
103 |
# Sidebar for filters and preferences
|
|
|
1 |
import streamlit as st
|
2 |
+
import os
|
3 |
from recommender import CocktailRecommender
|
4 |
|
5 |
+
# Set environment variables for Hugging Face Spaces compatibility
|
6 |
+
os.environ.setdefault('STREAMLIT_SERVER_HEADLESS', 'true')
|
7 |
+
os.environ.setdefault('STREAMLIT_BROWSER_GATHER_USAGE_STATS', 'false')
|
8 |
+
|
9 |
# Page config
|
10 |
st.set_page_config(
|
11 |
page_title="🍹 Cocktail Suggestions",
|
|
|
57 |
|
58 |
@st.cache_resource
|
59 |
def get_recommender():
|
60 |
+
"""Initialize the cocktail recommender with error handling"""
|
61 |
+
try:
|
62 |
+
return CocktailRecommender()
|
63 |
+
except Exception as e:
|
64 |
+
st.error(f"Error initializing cocktail recommender: {str(e)}")
|
65 |
+
st.info("This might be due to model loading issues. Please try refreshing the page.")
|
66 |
+
return None
|
67 |
|
68 |
def display_cocktail(cocktail):
|
69 |
"""Display a cocktail in a nice card format"""
|
|
|
103 |
st.session_state.last_search_type = ""
|
104 |
|
105 |
# Initialize recommender
|
106 |
+
recommender = get_recommender()
|
107 |
+
if recommender is None:
|
108 |
+
st.error("Failed to initialize the cocktail recommender. Please try refreshing the page.")
|
109 |
+
st.info("If the problem persists, there might be an issue with model loading or database connection.")
|
|
|
110 |
return
|
111 |
|
112 |
# Sidebar for filters and preferences
|
src/data_processor.py
DELETED
@@ -1,256 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from sentence_transformers import SentenceTransformer
|
3 |
-
from database_setup import DatabaseSetup
|
4 |
-
import os
|
5 |
-
from dotenv import load_dotenv
|
6 |
-
|
7 |
-
load_dotenv()
|
8 |
-
|
9 |
-
class CocktailDataProcessor:
|
10 |
-
def __init__(self):
|
11 |
-
self.model_name = os.getenv('MODEL_NAME', 'all-MiniLM-L6-v2')
|
12 |
-
self.model = SentenceTransformer(self.model_name)
|
13 |
-
self.db_setup = DatabaseSetup()
|
14 |
-
|
15 |
-
def load_data(self, csv_path):
|
16 |
-
"""Load cocktail data from CSV file"""
|
17 |
-
try:
|
18 |
-
df = pd.read_csv(csv_path)
|
19 |
-
print(f"Loaded {len(df)} cocktails from {csv_path}")
|
20 |
-
return df
|
21 |
-
except Exception as e:
|
22 |
-
print(f"Error loading data: {e}")
|
23 |
-
return None
|
24 |
-
|
25 |
-
def clean_data(self, df):
|
26 |
-
"""Clean and preprocess the cocktail data"""
|
27 |
-
# Auto-detect column names (handle both old and new formats)
|
28 |
-
name_col = 'name' if 'name' in df.columns else 'strDrink'
|
29 |
-
category_col = 'category' if 'category' in df.columns else 'strCategory'
|
30 |
-
alcoholic_col = 'alcoholic' if 'alcoholic' in df.columns else 'strAlcoholic'
|
31 |
-
glass_col = 'glassType' if 'glassType' in df.columns else 'strGlass'
|
32 |
-
instructions_col = 'instructions' if 'instructions' in df.columns else 'strInstructions'
|
33 |
-
|
34 |
-
print(f"Detected columns: name='{name_col}', category='{category_col}', alcoholic='{alcoholic_col}', glass='{glass_col}'")
|
35 |
-
|
36 |
-
# Remove duplicates based on name
|
37 |
-
if name_col in df.columns:
|
38 |
-
df = df.drop_duplicates(subset=[name_col])
|
39 |
-
print(f"After removing duplicates: {len(df)} cocktails")
|
40 |
-
|
41 |
-
# Fill missing values
|
42 |
-
df = df.fillna('')
|
43 |
-
|
44 |
-
# Create a combined text for embedding
|
45 |
-
df['combined_text'] = ''
|
46 |
-
|
47 |
-
if name_col in df.columns:
|
48 |
-
df['combined_text'] += df[name_col].astype(str) + ' '
|
49 |
-
if category_col in df.columns:
|
50 |
-
df['combined_text'] += df[category_col].astype(str) + ' '
|
51 |
-
if alcoholic_col in df.columns:
|
52 |
-
df['combined_text'] += df[alcoholic_col].astype(str) + ' '
|
53 |
-
if glass_col in df.columns:
|
54 |
-
df['combined_text'] += df[glass_col].astype(str) + ' '
|
55 |
-
|
56 |
-
# Handle ingredients (could be in different formats)
|
57 |
-
if 'ingredients' in df.columns:
|
58 |
-
# New format: ingredients as string/list
|
59 |
-
df['combined_text'] += df['ingredients'].astype(str) + ' '
|
60 |
-
else:
|
61 |
-
# Old format: strIngredient1, strIngredient2, etc.
|
62 |
-
ingredient_cols = [col for col in df.columns if col.startswith('strIngredient')]
|
63 |
-
for col in ingredient_cols:
|
64 |
-
df['combined_text'] += df[col].astype(str) + ' '
|
65 |
-
|
66 |
-
# Add instructions if available
|
67 |
-
if instructions_col in df.columns:
|
68 |
-
df['combined_text'] += df[instructions_col].astype(str) + ' '
|
69 |
-
|
70 |
-
# Clean the combined text
|
71 |
-
df['combined_text'] = df['combined_text'].str.replace(r'\s+', ' ', regex=True).str.strip()
|
72 |
-
|
73 |
-
print(f"Sample combined text: {df['combined_text'].iloc[0][:100]}...")
|
74 |
-
|
75 |
-
return df
|
76 |
-
|
77 |
-
def generate_embeddings(self, texts):
|
78 |
-
"""Generate embeddings for the given texts"""
|
79 |
-
embeddings = self.model.encode(texts, show_progress_bar=True)
|
80 |
-
return embeddings
|
81 |
-
|
82 |
-
def create_recipe_text(self, row):
|
83 |
-
"""Create a readable recipe from the row data"""
|
84 |
-
# Auto-detect column names
|
85 |
-
name_col = 'name' if 'name' in row else 'strDrink'
|
86 |
-
category_col = 'category' if 'category' in row else 'strCategory'
|
87 |
-
alcoholic_col = 'alcoholic' if 'alcoholic' in row else 'strAlcoholic'
|
88 |
-
glass_col = 'glassType' if 'glassType' in row else 'strGlass'
|
89 |
-
instructions_col = 'instructions' if 'instructions' in row else 'strInstructions'
|
90 |
-
|
91 |
-
recipe = f"Drink: {row.get(name_col, '')}\n"
|
92 |
-
recipe += f"Category: {row.get(category_col, '')}\n"
|
93 |
-
recipe += f"Type: {row.get(alcoholic_col, '')}\n"
|
94 |
-
recipe += f"Glass: {row.get(glass_col, '')}\n"
|
95 |
-
|
96 |
-
if row.get(instructions_col):
|
97 |
-
recipe += f"Instructions: {row[instructions_col]}\n"
|
98 |
-
|
99 |
-
recipe += "Ingredients:\n"
|
100 |
-
|
101 |
-
# Handle new format (ingredients as string/list)
|
102 |
-
if 'ingredients' in row and row['ingredients']:
|
103 |
-
try:
|
104 |
-
import ast
|
105 |
-
ingredients_str = row['ingredients']
|
106 |
-
|
107 |
-
# Parse ingredients list
|
108 |
-
if ingredients_str.startswith('['):
|
109 |
-
ingredients = ast.literal_eval(ingredients_str)
|
110 |
-
else:
|
111 |
-
ingredients = [ingredients_str]
|
112 |
-
|
113 |
-
# Parse measures if available
|
114 |
-
measures = []
|
115 |
-
if 'ingredientMeasures' in row and row['ingredientMeasures']:
|
116 |
-
measures_str = row['ingredientMeasures']
|
117 |
-
if measures_str.startswith('['):
|
118 |
-
measures = ast.literal_eval(measures_str)
|
119 |
-
else:
|
120 |
-
measures = [measures_str]
|
121 |
-
|
122 |
-
# Combine ingredients with measures
|
123 |
-
for i, ingredient in enumerate(ingredients):
|
124 |
-
if ingredient and str(ingredient).strip() and str(ingredient).strip() != 'None':
|
125 |
-
if i < len(measures) and measures[i] and str(measures[i]).strip() != 'None':
|
126 |
-
recipe += f"- {measures[i]} {ingredient}\n"
|
127 |
-
else:
|
128 |
-
recipe += f"- {ingredient}\n"
|
129 |
-
|
130 |
-
except Exception as e:
|
131 |
-
# Fallback: treat as simple string
|
132 |
-
recipe += f"- {row['ingredients']}\n"
|
133 |
-
|
134 |
-
else:
|
135 |
-
# Handle old format (strIngredient1, strIngredient2, etc.)
|
136 |
-
for i in range(1, 16): # Assuming max 15 ingredients
|
137 |
-
ingredient = row.get(f'strIngredient{i}')
|
138 |
-
measure = row.get(f'strMeasure{i}')
|
139 |
-
if ingredient and str(ingredient).strip() and str(ingredient).strip() != 'nan':
|
140 |
-
if measure and str(measure).strip() and str(measure).strip() != 'nan':
|
141 |
-
recipe += f"- {measure} {ingredient}\n"
|
142 |
-
else:
|
143 |
-
recipe += f"- {ingredient}\n"
|
144 |
-
|
145 |
-
return recipe
|
146 |
-
|
147 |
-
def get_ingredients_list(self, row):
|
148 |
-
"""Extract ingredients as a comma-separated string"""
|
149 |
-
ingredients = []
|
150 |
-
|
151 |
-
# Handle new format (ingredients as string/list)
|
152 |
-
if 'ingredients' in row and row['ingredients']:
|
153 |
-
try:
|
154 |
-
import ast
|
155 |
-
ingredients_str = row['ingredients']
|
156 |
-
|
157 |
-
# Parse ingredients list
|
158 |
-
if ingredients_str.startswith('['):
|
159 |
-
ingredients_list = ast.literal_eval(ingredients_str)
|
160 |
-
for ingredient in ingredients_list:
|
161 |
-
if ingredient and str(ingredient).strip() and str(ingredient).strip() != 'None':
|
162 |
-
ingredients.append(str(ingredient).strip())
|
163 |
-
else:
|
164 |
-
# Single ingredient as string
|
165 |
-
if ingredients_str.strip():
|
166 |
-
ingredients.append(ingredients_str.strip())
|
167 |
-
|
168 |
-
except Exception as e:
|
169 |
-
# Fallback: treat as simple string
|
170 |
-
if row['ingredients'].strip():
|
171 |
-
ingredients.append(row['ingredients'].strip())
|
172 |
-
|
173 |
-
else:
|
174 |
-
# Handle old format (strIngredient1, strIngredient2, etc.)
|
175 |
-
for i in range(1, 16):
|
176 |
-
ingredient = row.get(f'strIngredient{i}')
|
177 |
-
if ingredient and str(ingredient).strip() and str(ingredient).strip() != 'nan':
|
178 |
-
ingredients.append(str(ingredient).strip())
|
179 |
-
|
180 |
-
return ', '.join(ingredients)
|
181 |
-
|
182 |
-
def store_cocktails(self, df):
|
183 |
-
"""Store cocktails with embeddings in the database"""
|
184 |
-
try:
|
185 |
-
conn = self.db_setup.get_connection()
|
186 |
-
cursor = conn.cursor()
|
187 |
-
|
188 |
-
# Clear existing data
|
189 |
-
cursor.execute("DELETE FROM cocktails")
|
190 |
-
|
191 |
-
print(f"Generating embeddings for {len(df)} cocktails...")
|
192 |
-
# Generate all embeddings at once (much more efficient)
|
193 |
-
all_embeddings = self.generate_embeddings(df['combined_text'].tolist())
|
194 |
-
|
195 |
-
print("Storing cocktails in database...")
|
196 |
-
for idx, (_, row) in enumerate(df.iterrows()):
|
197 |
-
# Get pre-computed embedding
|
198 |
-
embedding = all_embeddings[idx]
|
199 |
-
|
200 |
-
# Prepare data with auto-detected column names
|
201 |
-
name_col = 'name' if 'name' in row else 'strDrink'
|
202 |
-
category_col = 'category' if 'category' in row else 'strCategory'
|
203 |
-
alcoholic_col = 'alcoholic' if 'alcoholic' in row else 'strAlcoholic'
|
204 |
-
glass_col = 'glassType' if 'glassType' in row else 'strGlass'
|
205 |
-
|
206 |
-
name = row.get(name_col, '')
|
207 |
-
ingredients = self.get_ingredients_list(row)
|
208 |
-
recipe = self.create_recipe_text(row)
|
209 |
-
glass = row.get(glass_col, '')
|
210 |
-
category = row.get(category_col, '')
|
211 |
-
iba = row.get('strIBA', '') # This might not exist in new format
|
212 |
-
alcoholic = row.get(alcoholic_col, '')
|
213 |
-
|
214 |
-
# Insert into database
|
215 |
-
cursor.execute("""
|
216 |
-
INSERT INTO cocktails (name, ingredients, recipe, glass, category, iba, alcoholic, embedding)
|
217 |
-
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
218 |
-
""", (name, ingredients, recipe, glass, category, iba, alcoholic, embedding.tolist()))
|
219 |
-
|
220 |
-
if (idx + 1) % 100 == 0:
|
221 |
-
print(f"Stored {idx + 1} cocktails...")
|
222 |
-
|
223 |
-
conn.commit()
|
224 |
-
cursor.close()
|
225 |
-
conn.close()
|
226 |
-
|
227 |
-
print(f"Successfully stored {len(df)} cocktails in the database")
|
228 |
-
|
229 |
-
except Exception as e:
|
230 |
-
print(f"Error storing cocktails: {e}")
|
231 |
-
if 'conn' in locals():
|
232 |
-
conn.rollback()
|
233 |
-
conn.close()
|
234 |
-
|
235 |
-
def process_and_store(self, csv_path):
|
236 |
-
"""Complete pipeline to process and store cocktail data"""
|
237 |
-
# Load data
|
238 |
-
df = self.load_data(csv_path)
|
239 |
-
if df is None:
|
240 |
-
return
|
241 |
-
|
242 |
-
# Clean data
|
243 |
-
df = self.clean_data(df)
|
244 |
-
|
245 |
-
# Store in database
|
246 |
-
self.store_cocktails(df)
|
247 |
-
|
248 |
-
if __name__ == "__main__":
|
249 |
-
processor = CocktailDataProcessor()
|
250 |
-
# Assuming the CSV file is in the data directory
|
251 |
-
csv_path = "data/final_cocktails.csv"
|
252 |
-
if os.path.exists(csv_path):
|
253 |
-
processor.process_and_store(csv_path)
|
254 |
-
else:
|
255 |
-
print(f"Please download the cocktails dataset and place it at {csv_path}")
|
256 |
-
print("Dataset URL: https://www.kaggle.com/datasets/aadyasingh55/cocktails/data")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/debug.py
DELETED
@@ -1,203 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Debug script to help troubleshoot the cocktail recommendation system
|
4 |
-
"""
|
5 |
-
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
|
10 |
-
load_dotenv()
|
11 |
-
|
12 |
-
def check_database():
|
13 |
-
"""Check database connection and contents"""
|
14 |
-
print("🔍 Checking database...")
|
15 |
-
|
16 |
-
try:
|
17 |
-
from database_setup import DatabaseSetup
|
18 |
-
db = DatabaseSetup()
|
19 |
-
|
20 |
-
# Test connection
|
21 |
-
conn = db.get_connection()
|
22 |
-
cursor = conn.cursor()
|
23 |
-
|
24 |
-
# Check if cocktails table exists
|
25 |
-
cursor.execute("""
|
26 |
-
SELECT EXISTS (
|
27 |
-
SELECT FROM information_schema.tables
|
28 |
-
WHERE table_name = 'cocktails'
|
29 |
-
);
|
30 |
-
""")
|
31 |
-
table_exists = cursor.fetchone()[0]
|
32 |
-
|
33 |
-
if not table_exists:
|
34 |
-
print("❌ Cocktails table doesn't exist")
|
35 |
-
print("Run: python database_setup.py")
|
36 |
-
return False
|
37 |
-
|
38 |
-
print("✅ Cocktails table exists")
|
39 |
-
|
40 |
-
# Check number of cocktails
|
41 |
-
cursor.execute("SELECT COUNT(*) FROM cocktails")
|
42 |
-
count = cursor.fetchone()[0]
|
43 |
-
print(f"📊 Found {count} cocktails in database")
|
44 |
-
|
45 |
-
if count == 0:
|
46 |
-
print("❌ No cocktails in database")
|
47 |
-
print("Run: python data_processor.py")
|
48 |
-
return False
|
49 |
-
|
50 |
-
# Check if embeddings exist
|
51 |
-
cursor.execute("SELECT COUNT(*) FROM cocktails WHERE embedding IS NOT NULL")
|
52 |
-
embedding_count = cursor.fetchone()[0]
|
53 |
-
print(f"🧠 {embedding_count} cocktails have embeddings")
|
54 |
-
|
55 |
-
# Test a simple query
|
56 |
-
cursor.execute("SELECT name FROM cocktails LIMIT 3")
|
57 |
-
samples = cursor.fetchall()
|
58 |
-
print("📝 Sample cocktails:")
|
59 |
-
for sample in samples:
|
60 |
-
print(f" - {sample[0]}")
|
61 |
-
|
62 |
-
cursor.close()
|
63 |
-
conn.close()
|
64 |
-
|
65 |
-
return count > 0 and embedding_count > 0
|
66 |
-
|
67 |
-
except Exception as e:
|
68 |
-
print(f"❌ Database error: {e}")
|
69 |
-
return False
|
70 |
-
|
71 |
-
def test_recommender():
|
72 |
-
"""Test the recommendation engine"""
|
73 |
-
print("\n🧠 Testing recommender...")
|
74 |
-
|
75 |
-
try:
|
76 |
-
from recommender import CocktailRecommender
|
77 |
-
recommender = CocktailRecommender()
|
78 |
-
|
79 |
-
# Test random cocktails (simplest query)
|
80 |
-
print("Testing random cocktails...")
|
81 |
-
random_results = recommender.get_random_cocktails(3)
|
82 |
-
|
83 |
-
if random_results:
|
84 |
-
print(f"✅ Random query returned {len(random_results)} results")
|
85 |
-
for result in random_results:
|
86 |
-
cocktail = recommender.format_cocktail_result(result)
|
87 |
-
print(f" - {cocktail['name']}")
|
88 |
-
else:
|
89 |
-
print("❌ Random query returned no results")
|
90 |
-
return False
|
91 |
-
|
92 |
-
# Test ingredient search
|
93 |
-
print("\nTesting ingredient search...")
|
94 |
-
ingredient_results = recommender.recommend_by_ingredients(['vodka'], limit=3)
|
95 |
-
|
96 |
-
if ingredient_results:
|
97 |
-
print(f"✅ Ingredient search returned {len(ingredient_results)} results")
|
98 |
-
for result in ingredient_results:
|
99 |
-
cocktail = recommender.format_cocktail_result(result)
|
100 |
-
print(f" - {cocktail['name']} (Similarity: {cocktail.get('similarity', 'N/A')}%)")
|
101 |
-
else:
|
102 |
-
print("❌ Ingredient search returned no results")
|
103 |
-
|
104 |
-
return True
|
105 |
-
|
106 |
-
except Exception as e:
|
107 |
-
print(f"❌ Recommender error: {e}")
|
108 |
-
import traceback
|
109 |
-
traceback.print_exc()
|
110 |
-
return False
|
111 |
-
|
112 |
-
def check_environment():
|
113 |
-
"""Check environment variables"""
|
114 |
-
print("🔧 Checking environment...")
|
115 |
-
|
116 |
-
required_vars = ['DB_HOST', 'DB_PORT', 'DB_NAME', 'DB_USER', 'DB_PASSWORD']
|
117 |
-
|
118 |
-
for var in required_vars:
|
119 |
-
value = os.getenv(var)
|
120 |
-
if value:
|
121 |
-
# Hide password
|
122 |
-
display_value = "***" if "PASSWORD" in var else value
|
123 |
-
print(f"✅ {var}: {display_value}")
|
124 |
-
else:
|
125 |
-
print(f"❌ {var}: Not set")
|
126 |
-
|
127 |
-
# Check if .env file exists
|
128 |
-
if os.path.exists('.env'):
|
129 |
-
print("✅ .env file exists")
|
130 |
-
else:
|
131 |
-
print("❌ .env file not found")
|
132 |
-
print("Copy .env.example to .env and configure it")
|
133 |
-
|
134 |
-
def check_dataset():
|
135 |
-
"""Check if dataset exists"""
|
136 |
-
print("\n📊 Checking dataset...")
|
137 |
-
|
138 |
-
csv_path = "data/final_cocktails.csv"
|
139 |
-
if os.path.exists(csv_path):
|
140 |
-
print(f"✅ Dataset found at {csv_path}")
|
141 |
-
|
142 |
-
# Check file size
|
143 |
-
size = os.path.getsize(csv_path)
|
144 |
-
print(f"📏 File size: {size / 1024 / 1024:.1f} MB")
|
145 |
-
|
146 |
-
# Try to read first few lines
|
147 |
-
try:
|
148 |
-
import pandas as pd
|
149 |
-
df = pd.read_csv(csv_path, nrows=5)
|
150 |
-
print(f"📋 Columns: {list(df.columns)[:5]}...")
|
151 |
-
print(f"📈 Sample rows: {len(df)}")
|
152 |
-
return True
|
153 |
-
except Exception as e:
|
154 |
-
print(f"��� Error reading dataset: {e}")
|
155 |
-
return False
|
156 |
-
else:
|
157 |
-
print(f"❌ Dataset not found at {csv_path}")
|
158 |
-
print("Download from: https://www.kaggle.com/datasets/aadyasingh55/cocktails/data")
|
159 |
-
return False
|
160 |
-
|
161 |
-
def main():
|
162 |
-
print("🍹 Cocktail Recommendation System - Debug Tool")
|
163 |
-
print("=" * 50)
|
164 |
-
|
165 |
-
# Check environment
|
166 |
-
check_environment()
|
167 |
-
|
168 |
-
# Check dataset
|
169 |
-
dataset_ok = check_dataset()
|
170 |
-
|
171 |
-
# Check database
|
172 |
-
db_ok = check_database()
|
173 |
-
|
174 |
-
# Test recommender if database is OK
|
175 |
-
if db_ok:
|
176 |
-
recommender_ok = test_recommender()
|
177 |
-
else:
|
178 |
-
recommender_ok = False
|
179 |
-
|
180 |
-
print("\n📋 Summary:")
|
181 |
-
print(f"Dataset: {'✅' if dataset_ok else '❌'}")
|
182 |
-
print(f"Database: {'✅' if db_ok else '❌'}")
|
183 |
-
print(f"Recommender: {'✅' if recommender_ok else '❌'}")
|
184 |
-
|
185 |
-
if not dataset_ok:
|
186 |
-
print("\n💡 Next steps:")
|
187 |
-
print("1. Download the cocktail dataset")
|
188 |
-
print("2. Place it as data/cocktails.csv")
|
189 |
-
elif not db_ok:
|
190 |
-
print("\n💡 Next steps:")
|
191 |
-
print("1. Configure .env file")
|
192 |
-
print("2. Run: python database_setup.py")
|
193 |
-
print("3. Run: python data_processor.py")
|
194 |
-
elif not recommender_ok:
|
195 |
-
print("\n💡 Next steps:")
|
196 |
-
print("1. Check the error messages above")
|
197 |
-
print("2. Verify database connectivity")
|
198 |
-
else:
|
199 |
-
print("\n🎉 Everything looks good!")
|
200 |
-
print("You can now run: streamlit run app.py")
|
201 |
-
|
202 |
-
if __name__ == "__main__":
|
203 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/demo_setup.py
DELETED
@@ -1,96 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Quick demo setup using sample data
|
4 |
-
"""
|
5 |
-
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
|
9 |
-
def setup_demo():
|
10 |
-
print("🍹 Setting up demo with sample data...")
|
11 |
-
|
12 |
-
# Check if we have the full dataset
|
13 |
-
full_dataset = "data/cocktails.csv"
|
14 |
-
sample_dataset = "data/sample_cocktails.csv"
|
15 |
-
|
16 |
-
dataset_to_use = None
|
17 |
-
|
18 |
-
if os.path.exists(full_dataset):
|
19 |
-
print(f"✅ Found full dataset: {full_dataset}")
|
20 |
-
dataset_to_use = full_dataset
|
21 |
-
elif os.path.exists(sample_dataset):
|
22 |
-
print(f"✅ Using sample dataset: {sample_dataset}")
|
23 |
-
dataset_to_use = sample_dataset
|
24 |
-
else:
|
25 |
-
print("❌ No dataset found")
|
26 |
-
return False
|
27 |
-
|
28 |
-
# Set up database
|
29 |
-
print("🗄️ Setting up database...")
|
30 |
-
try:
|
31 |
-
from database_setup import DatabaseSetup
|
32 |
-
db_setup = DatabaseSetup()
|
33 |
-
db_setup.create_database()
|
34 |
-
db_setup.setup_pgvector()
|
35 |
-
print("✅ Database setup complete")
|
36 |
-
except Exception as e:
|
37 |
-
print(f"❌ Database setup failed: {e}")
|
38 |
-
return False
|
39 |
-
|
40 |
-
# Process data
|
41 |
-
print("🧠 Processing cocktail data...")
|
42 |
-
try:
|
43 |
-
from data_processor import CocktailDataProcessor
|
44 |
-
processor = CocktailDataProcessor()
|
45 |
-
processor.process_and_store(dataset_to_use)
|
46 |
-
print("✅ Data processing complete")
|
47 |
-
except Exception as e:
|
48 |
-
print(f"❌ Data processing failed: {e}")
|
49 |
-
import traceback
|
50 |
-
traceback.print_exc()
|
51 |
-
return False
|
52 |
-
|
53 |
-
# Test the system
|
54 |
-
print("🧪 Testing the system...")
|
55 |
-
try:
|
56 |
-
from recommender import CocktailRecommender
|
57 |
-
recommender = CocktailRecommender()
|
58 |
-
|
59 |
-
# Test random cocktails
|
60 |
-
results = recommender.get_random_cocktails(3)
|
61 |
-
if results:
|
62 |
-
print(f"✅ System test successful - found {len(results)} cocktails")
|
63 |
-
for result in results:
|
64 |
-
cocktail = recommender.format_cocktail_result(result)
|
65 |
-
print(f" - {cocktail['name']}")
|
66 |
-
else:
|
67 |
-
print("❌ System test failed - no cocktails returned")
|
68 |
-
return False
|
69 |
-
|
70 |
-
except Exception as e:
|
71 |
-
print(f"❌ System test failed: {e}")
|
72 |
-
return False
|
73 |
-
|
74 |
-
return True
|
75 |
-
|
76 |
-
def main():
|
77 |
-
print("🚀 Cocktail Demo Setup")
|
78 |
-
print("=" * 30)
|
79 |
-
|
80 |
-
if setup_demo():
|
81 |
-
print("\n🎉 Demo setup complete!")
|
82 |
-
print("\nYou can now run:")
|
83 |
-
print(" streamlit run app.py")
|
84 |
-
print("\nOr test with:")
|
85 |
-
print(" python debug.py")
|
86 |
-
|
87 |
-
print("\n💡 To use the full dataset:")
|
88 |
-
print("1. Download cocktails.csv from Kaggle")
|
89 |
-
print("2. Place it in data/cocktails.csv")
|
90 |
-
print("3. Run this script again")
|
91 |
-
else:
|
92 |
-
print("\n❌ Demo setup failed!")
|
93 |
-
print("Please check the error messages above")
|
94 |
-
|
95 |
-
if __name__ == "__main__":
|
96 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/docker-compose.yml
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
services:
|
2 |
-
postgres:
|
3 |
-
image: pgvector/pgvector:pg15
|
4 |
-
environment:
|
5 |
-
POSTGRES_DB: cocktails_db
|
6 |
-
POSTGRES_USER: postgres
|
7 |
-
POSTGRES_PASSWORD: your_password
|
8 |
-
ports:
|
9 |
-
- "5432:5432"
|
10 |
-
volumes:
|
11 |
-
- postgres_data:/var/lib/postgresql/data
|
12 |
-
healthcheck:
|
13 |
-
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
14 |
-
interval: 30s
|
15 |
-
timeout: 10s
|
16 |
-
retries: 3
|
17 |
-
|
18 |
-
cocktail-app:
|
19 |
-
build: .
|
20 |
-
ports:
|
21 |
-
- "8501:8501"
|
22 |
-
environment:
|
23 |
-
- DB_HOST=postgres
|
24 |
-
- DB_PORT=5432
|
25 |
-
- DB_NAME=cocktails_db
|
26 |
-
- DB_USER=postgres
|
27 |
-
- DB_PASSWORD=your_password
|
28 |
-
- MODEL_NAME=all-MiniLM-L6-v2
|
29 |
-
depends_on:
|
30 |
-
postgres:
|
31 |
-
condition: service_healthy
|
32 |
-
volumes:
|
33 |
-
- ./data:/app/data
|
34 |
-
- ./logs:/app/logs
|
35 |
-
|
36 |
-
volumes:
|
37 |
-
postgres_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/quickstart.py
DELETED
@@ -1,116 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Quick start script for the Cocktail Suggestions project
|
4 |
-
"""
|
5 |
-
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
import subprocess
|
9 |
-
|
10 |
-
def check_python_version():
|
11 |
-
"""Check if Python version is compatible"""
|
12 |
-
if sys.version_info < (3, 8):
|
13 |
-
print("❌ Python 3.8 or higher is required")
|
14 |
-
return False
|
15 |
-
print(f"✅ Python {sys.version_info.major}.{sys.version_info.minor} detected")
|
16 |
-
return True
|
17 |
-
|
18 |
-
def install_dependencies():
|
19 |
-
"""Install required dependencies"""
|
20 |
-
print("📦 Installing dependencies...")
|
21 |
-
try:
|
22 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
|
23 |
-
print("✅ Dependencies installed successfully")
|
24 |
-
return True
|
25 |
-
except subprocess.CalledProcessError:
|
26 |
-
print("❌ Failed to install dependencies")
|
27 |
-
return False
|
28 |
-
|
29 |
-
def check_env_file():
|
30 |
-
"""Check if .env file exists"""
|
31 |
-
if not os.path.exists('.env'):
|
32 |
-
print("📝 Creating .env file from template...")
|
33 |
-
if os.path.exists('.env.example'):
|
34 |
-
import shutil
|
35 |
-
shutil.copy('.env.example', '.env')
|
36 |
-
print("⚠️ Please edit .env file with your database credentials!")
|
37 |
-
else:
|
38 |
-
print("❌ .env.example not found")
|
39 |
-
return False
|
40 |
-
print("✅ .env file exists")
|
41 |
-
return True
|
42 |
-
|
43 |
-
def create_directories():
|
44 |
-
"""Create necessary directories"""
|
45 |
-
dirs = ['data', 'logs']
|
46 |
-
for dir_name in dirs:
|
47 |
-
os.makedirs(dir_name, exist_ok=True)
|
48 |
-
print(f"📁 Created directory: {dir_name}")
|
49 |
-
|
50 |
-
def check_dataset():
|
51 |
-
"""Check if dataset exists"""
|
52 |
-
csv_path = "data/cocktails.csv"
|
53 |
-
if os.path.exists(csv_path):
|
54 |
-
print(f"✅ Dataset found at {csv_path}")
|
55 |
-
return True
|
56 |
-
else:
|
57 |
-
print(f"⚠️ Dataset not found at {csv_path}")
|
58 |
-
print("Please download from: https://www.kaggle.com/datasets/aadyasingh55/cocktails/data")
|
59 |
-
return False
|
60 |
-
|
61 |
-
def main():
|
62 |
-
print("🍹 Cocktail Suggestions - Quick Start")
|
63 |
-
print("=" * 40)
|
64 |
-
|
65 |
-
# Check Python version
|
66 |
-
if not check_python_version():
|
67 |
-
return
|
68 |
-
|
69 |
-
# Create directories
|
70 |
-
create_directories()
|
71 |
-
|
72 |
-
# Check/create .env file
|
73 |
-
if not check_env_file():
|
74 |
-
return
|
75 |
-
|
76 |
-
# Install dependencies
|
77 |
-
if not install_dependencies():
|
78 |
-
return
|
79 |
-
|
80 |
-
# Check dataset
|
81 |
-
dataset_exists = check_dataset()
|
82 |
-
|
83 |
-
print("\n🎉 Setup completed!")
|
84 |
-
print("\nNext steps:")
|
85 |
-
print("1. Configure your database credentials in .env")
|
86 |
-
if not dataset_exists:
|
87 |
-
print("2. Download and place the cocktail dataset in data/cocktails.csv")
|
88 |
-
print("3. Run: python database_setup.py")
|
89 |
-
print("4. Run: python data_processor.py")
|
90 |
-
else:
|
91 |
-
print("2. Run: python database_setup.py")
|
92 |
-
print("3. Run: python data_processor.py")
|
93 |
-
print("4. Run: streamlit run app.py")
|
94 |
-
|
95 |
-
# Try to import key dependencies to verify installation
|
96 |
-
print("\n🔍 Verifying installations...")
|
97 |
-
try:
|
98 |
-
import streamlit
|
99 |
-
print("✅ Streamlit installed")
|
100 |
-
except ImportError:
|
101 |
-
print("❌ Streamlit not installed")
|
102 |
-
|
103 |
-
try:
|
104 |
-
import psycopg2
|
105 |
-
print("✅ psycopg2 installed")
|
106 |
-
except ImportError:
|
107 |
-
print("❌ psycopg2 not installed")
|
108 |
-
|
109 |
-
try:
|
110 |
-
import sentence_transformers
|
111 |
-
print("✅ sentence-transformers installed")
|
112 |
-
except ImportError:
|
113 |
-
print("❌ sentence-transformers not installed")
|
114 |
-
|
115 |
-
if __name__ == "__main__":
|
116 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/recommender.py
CHANGED
@@ -7,8 +7,18 @@ load_dotenv()
|
|
7 |
|
8 |
class CocktailRecommender:
|
9 |
def __init__(self):
|
10 |
-
self.model_name = os.getenv('MODEL_NAME', 'all-MiniLM-L6-v2')
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
self.db_setup = DatabaseSetup()
|
13 |
|
14 |
def get_user_preferences_embedding(self, preferences):
|
|
|
7 |
|
8 |
class CocktailRecommender:
|
9 |
def __init__(self):
|
10 |
+
self.model_name = os.getenv('MODEL_NAME', 'sentence-transformers/all-MiniLM-L6-v2')
|
11 |
+
|
12 |
+
# Initialize model with proper cache handling
|
13 |
+
try:
|
14 |
+
self.model = SentenceTransformer(self.model_name)
|
15 |
+
except Exception as e:
|
16 |
+
print(f"Warning: Could not load model {self.model_name}: {e}")
|
17 |
+
# Fallback to a simpler model name format
|
18 |
+
fallback_model = 'all-MiniLM-L6-v2'
|
19 |
+
print(f"Trying fallback model: {fallback_model}")
|
20 |
+
self.model = SentenceTransformer(fallback_model)
|
21 |
+
|
22 |
self.db_setup = DatabaseSetup()
|
23 |
|
24 |
def get_user_preferences_embedding(self, preferences):
|
src/requirements.txt
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
streamlit
|
2 |
-
pandas
|
3 |
-
numpy
|
4 |
-
psycopg2-binary
|
5 |
-
pgvector
|
6 |
-
sentence-transformers
|
7 |
-
scikit-learn
|
8 |
-
python-dotenv
|
9 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/setup.sh
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# Setup script for the Cocktail Suggestions project
|
4 |
-
|
5 |
-
echo "🍹 Setting up Cocktail Suggestions Project..."
|
6 |
-
|
7 |
-
# Create necessary directories
|
8 |
-
echo "📁 Creating directories..."
|
9 |
-
mkdir -p data
|
10 |
-
mkdir -p logs
|
11 |
-
|
12 |
-
# Create .env file if it doesn't exist
|
13 |
-
if [ ! -f .env ]; then
|
14 |
-
echo "📝 Creating .env file..."
|
15 |
-
cp .env.example .env
|
16 |
-
echo "⚠️ Please edit .env file with your database credentials!"
|
17 |
-
fi
|
18 |
-
|
19 |
-
# Install Python dependencies
|
20 |
-
echo "📦 Installing Python dependencies..."
|
21 |
-
pip install -r requirements.txt
|
22 |
-
|
23 |
-
echo "✅ Setup complete!"
|
24 |
-
echo ""
|
25 |
-
echo "Next steps:"
|
26 |
-
echo "1. Edit .env file with your database credentials"
|
27 |
-
echo "2. Set up PostgreSQL with pgvector extension"
|
28 |
-
echo "3. Download the cocktail dataset from:"
|
29 |
-
echo " https://www.kaggle.com/datasets/aadyasingh55/cocktails/data"
|
30 |
-
echo "4. Place the CSV file in the data/ directory"
|
31 |
-
echo "5. Run: python database_setup.py"
|
32 |
-
echo "6. Run: python data_processor.py"
|
33 |
-
echo "7. Run: streamlit run app.py"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/test_system.py
DELETED
@@ -1,165 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Simple test script to verify the system components
|
4 |
-
"""
|
5 |
-
|
6 |
-
import sys
|
7 |
-
import os
|
8 |
-
|
9 |
-
def test_imports():
|
10 |
-
"""Test if all required packages can be imported"""
|
11 |
-
print("Testing imports...")
|
12 |
-
|
13 |
-
try:
|
14 |
-
import pandas as pd
|
15 |
-
print("✅ pandas imported successfully")
|
16 |
-
except ImportError as e:
|
17 |
-
print(f"❌ pandas import failed: {e}")
|
18 |
-
return False
|
19 |
-
|
20 |
-
try:
|
21 |
-
import numpy as np
|
22 |
-
print("✅ numpy imported successfully")
|
23 |
-
except ImportError as e:
|
24 |
-
print(f"❌ numpy import failed: {e}")
|
25 |
-
return False
|
26 |
-
|
27 |
-
try:
|
28 |
-
import streamlit as st
|
29 |
-
print("✅ streamlit imported successfully")
|
30 |
-
except ImportError as e:
|
31 |
-
print(f"❌ streamlit import failed: {e}")
|
32 |
-
return False
|
33 |
-
|
34 |
-
try:
|
35 |
-
import psycopg2
|
36 |
-
print("✅ psycopg2 imported successfully")
|
37 |
-
except ImportError as e:
|
38 |
-
print(f"❌ psycopg2 import failed: {e}")
|
39 |
-
return False
|
40 |
-
|
41 |
-
try:
|
42 |
-
from sentence_transformers import SentenceTransformer
|
43 |
-
print("✅ sentence-transformers imported successfully")
|
44 |
-
except ImportError as e:
|
45 |
-
print(f"❌ sentence-transformers import failed: {e}")
|
46 |
-
return False
|
47 |
-
|
48 |
-
try:
|
49 |
-
from dotenv import load_dotenv
|
50 |
-
print("✅ python-dotenv imported successfully")
|
51 |
-
except ImportError as e:
|
52 |
-
print(f"❌ python-dotenv import failed: {e}")
|
53 |
-
return False
|
54 |
-
|
55 |
-
return True
|
56 |
-
|
57 |
-
def test_files():
|
58 |
-
"""Test if required files exist"""
|
59 |
-
print("\nTesting file structure...")
|
60 |
-
|
61 |
-
required_files = [
|
62 |
-
'app.py',
|
63 |
-
'database_setup.py',
|
64 |
-
'data_processor.py',
|
65 |
-
'recommender.py',
|
66 |
-
'requirements.txt',
|
67 |
-
'.env.example'
|
68 |
-
]
|
69 |
-
|
70 |
-
all_good = True
|
71 |
-
for file in required_files:
|
72 |
-
if os.path.exists(file):
|
73 |
-
print(f"✅ {file} exists")
|
74 |
-
else:
|
75 |
-
print(f"❌ {file} missing")
|
76 |
-
all_good = False
|
77 |
-
|
78 |
-
return all_good
|
79 |
-
|
80 |
-
def test_database_connection():
|
81 |
-
"""Test database connection"""
|
82 |
-
print("\nTesting database connection...")
|
83 |
-
|
84 |
-
try:
|
85 |
-
from dotenv import load_dotenv
|
86 |
-
load_dotenv()
|
87 |
-
|
88 |
-
import psycopg2
|
89 |
-
|
90 |
-
# Try to connect to default postgres database first
|
91 |
-
host = os.getenv('DB_HOST', 'localhost')
|
92 |
-
port = os.getenv('DB_PORT', '5432')
|
93 |
-
user = os.getenv('DB_USER', 'postgres')
|
94 |
-
password = os.getenv('DB_PASSWORD', 'your_password')
|
95 |
-
|
96 |
-
conn = psycopg2.connect(
|
97 |
-
host=host,
|
98 |
-
port=port,
|
99 |
-
user=user,
|
100 |
-
password=password,
|
101 |
-
database='postgres'
|
102 |
-
)
|
103 |
-
conn.close()
|
104 |
-
print("✅ Database connection successful")
|
105 |
-
return True
|
106 |
-
|
107 |
-
except Exception as e:
|
108 |
-
print(f"❌ Database connection failed: {e}")
|
109 |
-
print("Make sure PostgreSQL is running and credentials are correct in .env")
|
110 |
-
return False
|
111 |
-
|
112 |
-
def test_model_loading():
|
113 |
-
"""Test if the AI model can be loaded"""
|
114 |
-
print("\nTesting AI model loading...")
|
115 |
-
|
116 |
-
try:
|
117 |
-
from sentence_transformers import SentenceTransformer
|
118 |
-
model = SentenceTransformer('all-MiniLM-L6-v2')
|
119 |
-
print("✅ AI model loaded successfully")
|
120 |
-
|
121 |
-
# Test embedding generation
|
122 |
-
test_text = "vodka cranberry cocktail"
|
123 |
-
embedding = model.encode([test_text])
|
124 |
-
print(f"✅ Embedding generated successfully (shape: {embedding.shape})")
|
125 |
-
return True
|
126 |
-
|
127 |
-
except Exception as e:
|
128 |
-
print(f"❌ Model loading failed: {e}")
|
129 |
-
return False
|
130 |
-
|
131 |
-
def main():
|
132 |
-
print("🧪 Running System Tests")
|
133 |
-
print("=" * 40)
|
134 |
-
|
135 |
-
# Test imports
|
136 |
-
imports_ok = test_imports()
|
137 |
-
|
138 |
-
# Test files
|
139 |
-
files_ok = test_files()
|
140 |
-
|
141 |
-
# Test database (only if .env exists)
|
142 |
-
db_ok = True
|
143 |
-
if os.path.exists('.env'):
|
144 |
-
db_ok = test_database_connection()
|
145 |
-
else:
|
146 |
-
print("\n⚠️ Skipping database test (.env file not found)")
|
147 |
-
|
148 |
-
# Test model loading
|
149 |
-
model_ok = test_model_loading()
|
150 |
-
|
151 |
-
print("\n📊 Test Summary:")
|
152 |
-
print(f"Imports: {'✅' if imports_ok else '❌'}")
|
153 |
-
print(f"Files: {'✅' if files_ok else '❌'}")
|
154 |
-
print(f"Database: {'✅' if db_ok else '❌'}")
|
155 |
-
print(f"AI Model: {'✅' if model_ok else '❌'}")
|
156 |
-
|
157 |
-
if all([imports_ok, files_ok, db_ok, model_ok]):
|
158 |
-
print("\n🎉 All tests passed! System is ready.")
|
159 |
-
return 0
|
160 |
-
else:
|
161 |
-
print("\n❌ Some tests failed. Please check the issues above.")
|
162 |
-
return 1
|
163 |
-
|
164 |
-
if __name__ == "__main__":
|
165 |
-
sys.exit(main())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Ensure proper permissions and directories for Hugging Face Spaces
|
4 |
+
mkdir -p $HOME/.cache/huggingface
|
5 |
+
mkdir -p $HOME/.streamlit
|
6 |
+
mkdir -p $HOME/.cache/torch
|
7 |
+
|
8 |
+
# Set proper permissions
|
9 |
+
chmod -R 755 $HOME/.cache
|
10 |
+
chmod -R 755 $HOME/.streamlit
|
11 |
+
|
12 |
+
# Export environment variables
|
13 |
+
export HF_HOME=$HOME/.cache/huggingface
|
14 |
+
export TRANSFORMERS_CACHE=$HOME/.cache/huggingface
|
15 |
+
export SENTENCE_TRANSFORMERS_HOME=$HOME/.cache/huggingface
|
16 |
+
export TORCH_HOME=$HOME/.cache/torch
|
17 |
+
|
18 |
+
# Disable Streamlit usage stats
|
19 |
+
export STREAMLIT_SERVER_HEADLESS=true
|
20 |
+
export STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
21 |
+
|
22 |
+
echo "Starting Streamlit app..."
|
23 |
+
exec streamlit run src/app.py --server.port=8501 --server.address=0.0.0.0 --server.headless=true --browser.gatherUsageStats=false
|