Update app.py
Browse files
app.py
CHANGED
@@ -4,14 +4,14 @@ import os
|
|
4 |
import requests
|
5 |
import time
|
6 |
|
7 |
-
# Percorso locale del modello -
|
8 |
-
MODEL_PATH = "
|
9 |
-
MODEL_URL = "https://huggingface.co/
|
10 |
|
11 |
def download_model():
|
12 |
"""Scarica il modello se non esiste già"""
|
13 |
if not os.path.exists(MODEL_PATH):
|
14 |
-
print("📥 Downloading
|
15 |
try:
|
16 |
response = requests.get(MODEL_URL, stream=True, timeout=300)
|
17 |
response.raise_for_status()
|
@@ -29,7 +29,7 @@ def download_model():
|
|
29 |
print(f"📥 Download progress: {progress:.1f}%")
|
30 |
|
31 |
# Verifica che il file sia completo
|
32 |
-
if os.path.getsize(MODEL_PATH) <
|
33 |
print("❌ Downloaded file seems corrupted")
|
34 |
os.remove(MODEL_PATH)
|
35 |
return False
|
@@ -45,7 +45,7 @@ def download_model():
|
|
45 |
else:
|
46 |
print("✅ Model already exists!")
|
47 |
# Verifica che il file esistente sia valido
|
48 |
-
if os.path.getsize(MODEL_PATH) <
|
49 |
print("❌ Existing file seems corrupted, re-downloading...")
|
50 |
os.remove(MODEL_PATH)
|
51 |
return download_model() # Riprova
|
@@ -56,28 +56,32 @@ model_loaded = download_model()
|
|
56 |
llm = None # Inizializza a None
|
57 |
|
58 |
if model_loaded:
|
59 |
-
# Inizializza il modello OTTIMIZZATO
|
60 |
try:
|
61 |
llm = Llama(
|
62 |
model_path=MODEL_PATH,
|
63 |
-
n_ctx=
|
64 |
-
n_threads=
|
65 |
-
n_batch=
|
66 |
use_mlock=False, # Disabilitato per HF Free
|
67 |
verbose=False,
|
68 |
n_gpu_layers=0,
|
69 |
use_mmap=True, # Usa memory mapping per efficienza
|
70 |
-
low_vram=True
|
|
|
|
|
71 |
)
|
72 |
-
print("✅ Model loaded successfully!")
|
73 |
except Exception as e:
|
74 |
print(f"❌ Error loading model: {e}")
|
75 |
llm = None
|
76 |
else:
|
77 |
print("❌ Model not available, using fallback responses")
|
78 |
|
79 |
-
# System prompt
|
80 |
-
system_prompt = """
|
|
|
|
|
81 |
|
82 |
def generate_random_opening():
|
83 |
"""Genera un inizio casuale per l'avventura usando l'AI"""
|
@@ -92,23 +96,24 @@ def generate_random_opening():
|
|
92 |
return f"🌟 **New Adventure!** 🌟\n\n{random.choice(openings)}"
|
93 |
|
94 |
try:
|
95 |
-
# Prompt per
|
96 |
-
opening_prompt = f"{system_prompt}
|
|
|
|
|
|
|
|
|
97 |
|
98 |
output = llm(
|
99 |
opening_prompt,
|
100 |
-
max_tokens=
|
101 |
temperature=0.8,
|
102 |
top_p=0.9,
|
103 |
repeat_penalty=1.1,
|
104 |
-
stop=["
|
105 |
)
|
106 |
|
107 |
opening = output["choices"][0]["text"].strip()
|
108 |
|
109 |
-
if opening.startswith("DM:"):
|
110 |
-
opening = opening[3:].strip()
|
111 |
-
|
112 |
# Assicurati che finisca con una domanda
|
113 |
if not opening.endswith('?'):
|
114 |
opening += " What do you do?"
|
@@ -121,8 +126,8 @@ def generate_random_opening():
|
|
121 |
|
122 |
chat_history = []
|
123 |
|
124 |
-
def generate_dm_response_with_timeout(message, timeout=
|
125 |
-
"""Genera risposta con timeout per
|
126 |
if llm is None:
|
127 |
# Fallback responses se il modello non è disponibile
|
128 |
import random
|
@@ -136,42 +141,43 @@ def generate_dm_response_with_timeout(message, timeout=45):
|
|
136 |
return random.choice(fallbacks)
|
137 |
|
138 |
try:
|
139 |
-
# Prompt
|
140 |
-
prompt = f"{system_prompt}\n
|
141 |
|
142 |
-
#
|
143 |
-
|
144 |
-
|
145 |
-
prompt += f"
|
|
|
146 |
|
147 |
-
prompt += f"
|
148 |
|
149 |
-
# Parametri
|
150 |
start_time = time.time()
|
151 |
output = llm(
|
152 |
prompt,
|
153 |
-
max_tokens=
|
154 |
-
stop=["
|
155 |
-
temperature=0.
|
156 |
-
top_p=0.
|
157 |
-
repeat_penalty=1.
|
158 |
-
top_k=
|
|
|
159 |
)
|
160 |
|
161 |
# Verifica se ha impiegato troppo tempo
|
162 |
-
|
163 |
-
|
|
|
164 |
return "Time passes quickly. What do you do next?"
|
165 |
|
166 |
text = output["choices"][0]["text"].strip()
|
167 |
|
168 |
-
if text.startswith("DM:"):
|
169 |
-
text = text[3:].strip()
|
170 |
-
|
171 |
# Assicurati che ci sia sempre una domanda
|
172 |
if not text.endswith(('?', '!', '.')):
|
173 |
text += "?"
|
174 |
|
|
|
175 |
return text
|
176 |
|
177 |
except Exception as e:
|
@@ -184,13 +190,13 @@ def chat(message, history):
|
|
184 |
if not message.strip():
|
185 |
return "You stand there, unsure. What would you like to do?"
|
186 |
|
187 |
-
# Genera risposta del DM con timeout
|
188 |
dm_response = generate_dm_response_with_timeout(message)
|
189 |
|
190 |
-
# Aggiorna cronologia (mantieni
|
191 |
chat_history.append({"user": message, "ai": dm_response})
|
192 |
-
if len(chat_history) > 2
|
193 |
-
chat_history = chat_history[-
|
194 |
|
195 |
return dm_response
|
196 |
|
@@ -199,22 +205,22 @@ def reset():
|
|
199 |
chat_history = []
|
200 |
return generate_random_opening()
|
201 |
|
202 |
-
# Crea l'interfaccia OTTIMIZZATA
|
203 |
-
with gr.Blocks(title="Infinite Dungeon", theme=gr.themes.Soft()) as demo:
|
204 |
-
gr.Markdown("#
|
205 |
-
gr.Markdown("*
|
206 |
-
gr.Markdown("
|
207 |
|
208 |
-
# Inizializza la chat
|
209 |
chatbot = gr.Chatbot(
|
210 |
-
value=[(None, "
|
211 |
height=400,
|
212 |
show_label=False
|
213 |
)
|
214 |
|
215 |
msg = gr.Textbox(
|
216 |
label="Your action",
|
217 |
-
placeholder="
|
218 |
max_lines=2
|
219 |
)
|
220 |
|
@@ -222,7 +228,7 @@ with gr.Blocks(title="Infinite Dungeon", theme=gr.themes.Soft()) as demo:
|
|
222 |
submit = gr.Button("⚔️ Act", variant="primary", size="lg")
|
223 |
reset_btn = gr.Button("🔄 New Adventure", variant="secondary")
|
224 |
|
225 |
-
gr.Markdown("
|
226 |
|
227 |
# Funzione per gestire la chat
|
228 |
def respond(message, chat_history_ui):
|
|
|
4 |
import requests
|
5 |
import time
|
6 |
|
7 |
+
# Percorso locale del modello - Qwen2.5-0.5B-Instruct VELOCE
|
8 |
+
MODEL_PATH = "qwen2.5-0.5b-instruct-q4_k_m.gguf"
|
9 |
+
MODEL_URL = "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf"
|
10 |
|
11 |
def download_model():
|
12 |
"""Scarica il modello se non esiste già"""
|
13 |
if not os.path.exists(MODEL_PATH):
|
14 |
+
print("📥 Downloading Qwen2.5-0.5B-Instruct model...")
|
15 |
try:
|
16 |
response = requests.get(MODEL_URL, stream=True, timeout=300)
|
17 |
response.raise_for_status()
|
|
|
29 |
print(f"📥 Download progress: {progress:.1f}%")
|
30 |
|
31 |
# Verifica che il file sia completo
|
32 |
+
if os.path.getsize(MODEL_PATH) < 100000: # Almeno 100KB
|
33 |
print("❌ Downloaded file seems corrupted")
|
34 |
os.remove(MODEL_PATH)
|
35 |
return False
|
|
|
45 |
else:
|
46 |
print("✅ Model already exists!")
|
47 |
# Verifica che il file esistente sia valido
|
48 |
+
if os.path.getsize(MODEL_PATH) < 100000:
|
49 |
print("❌ Existing file seems corrupted, re-downloading...")
|
50 |
os.remove(MODEL_PATH)
|
51 |
return download_model() # Riprova
|
|
|
56 |
llm = None # Inizializza a None
|
57 |
|
58 |
if model_loaded:
|
59 |
+
# Inizializza il modello SUPER OTTIMIZZATO con Qwen2.5-0.5B
|
60 |
try:
|
61 |
llm = Llama(
|
62 |
model_path=MODEL_PATH,
|
63 |
+
n_ctx=2048, # Aumentato grazie al modello più piccolo
|
64 |
+
n_threads=4, # Più thread possibili con modello piccolo
|
65 |
+
n_batch=256, # Batch size ottimizzato
|
66 |
use_mlock=False, # Disabilitato per HF Free
|
67 |
verbose=False,
|
68 |
n_gpu_layers=0,
|
69 |
use_mmap=True, # Usa memory mapping per efficienza
|
70 |
+
low_vram=True, # Modalità low memory
|
71 |
+
rope_scaling_type=1, # Ottimizzazione RoPE
|
72 |
+
rope_freq_base=10000.0
|
73 |
)
|
74 |
+
print("✅ Qwen2.5-0.5B Model loaded successfully!")
|
75 |
except Exception as e:
|
76 |
print(f"❌ Error loading model: {e}")
|
77 |
llm = None
|
78 |
else:
|
79 |
print("❌ Model not available, using fallback responses")
|
80 |
|
81 |
+
# System prompt OTTIMIZZATO per Qwen2.5
|
82 |
+
system_prompt = """<|im_start|>system
|
83 |
+
You are an expert D&D Dungeon Master. Create immersive, engaging adventures with vivid descriptions. Always end your responses with a question or choice for the player. Keep responses concise but atmospheric.
|
84 |
+
<|im_end|>"""
|
85 |
|
86 |
def generate_random_opening():
|
87 |
"""Genera un inizio casuale per l'avventura usando l'AI"""
|
|
|
96 |
return f"🌟 **New Adventure!** 🌟\n\n{random.choice(openings)}"
|
97 |
|
98 |
try:
|
99 |
+
# Prompt ottimizzato per Qwen2.5
|
100 |
+
opening_prompt = f"""{system_prompt}
|
101 |
+
<|im_start|>user
|
102 |
+
Generate a creative D&D adventure opening in 2-3 sentences. Set an intriguing scene and end with a question for the player.
|
103 |
+
<|im_end|>
|
104 |
+
<|im_start|>assistant"""
|
105 |
|
106 |
output = llm(
|
107 |
opening_prompt,
|
108 |
+
max_tokens=80, # Leggermente più alto per qualità
|
109 |
temperature=0.8,
|
110 |
top_p=0.9,
|
111 |
repeat_penalty=1.1,
|
112 |
+
stop=["<|im_end|>", "<|im_start|>", "User:", "Player:"]
|
113 |
)
|
114 |
|
115 |
opening = output["choices"][0]["text"].strip()
|
116 |
|
|
|
|
|
|
|
117 |
# Assicurati che finisca con una domanda
|
118 |
if not opening.endswith('?'):
|
119 |
opening += " What do you do?"
|
|
|
126 |
|
127 |
chat_history = []
|
128 |
|
129 |
+
def generate_dm_response_with_timeout(message, timeout=30):
|
130 |
+
"""Genera risposta con timeout ridotto per velocità"""
|
131 |
if llm is None:
|
132 |
# Fallback responses se il modello non è disponibile
|
133 |
import random
|
|
|
141 |
return random.choice(fallbacks)
|
142 |
|
143 |
try:
|
144 |
+
# Prompt ottimizzato per Qwen2.5 con chat template
|
145 |
+
prompt = f"{system_prompt}\n"
|
146 |
|
147 |
+
# Mantieni più contesto grazie al modello efficiente
|
148 |
+
context_turns = min(len(chat_history), 3) # Ultimi 3 turni
|
149 |
+
for turn in chat_history[-context_turns:]:
|
150 |
+
prompt += f"<|im_start|>user\n{turn['user']}\n<|im_end|>\n"
|
151 |
+
prompt += f"<|im_start|>assistant\n{turn['ai']}\n<|im_end|>\n"
|
152 |
|
153 |
+
prompt += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
|
154 |
|
155 |
+
# Parametri ottimizzati per Qwen2.5-0.5B
|
156 |
start_time = time.time()
|
157 |
output = llm(
|
158 |
prompt,
|
159 |
+
max_tokens=100, # Aumentato per qualità migliore
|
160 |
+
stop=["<|im_end|>", "<|im_start|>", "User:", "Player:"],
|
161 |
+
temperature=0.7,
|
162 |
+
top_p=0.8,
|
163 |
+
repeat_penalty=1.2,
|
164 |
+
top_k=40,
|
165 |
+
min_p=0.1 # Miglior controllo qualità
|
166 |
)
|
167 |
|
168 |
# Verifica se ha impiegato troppo tempo
|
169 |
+
elapsed_time = time.time() - start_time
|
170 |
+
if elapsed_time > timeout:
|
171 |
+
print(f"Response took {elapsed_time:.1f}s (timeout: {timeout}s)")
|
172 |
return "Time passes quickly. What do you do next?"
|
173 |
|
174 |
text = output["choices"][0]["text"].strip()
|
175 |
|
|
|
|
|
|
|
176 |
# Assicurati che ci sia sempre una domanda
|
177 |
if not text.endswith(('?', '!', '.')):
|
178 |
text += "?"
|
179 |
|
180 |
+
print(f"✅ Response generated in {elapsed_time:.1f}s")
|
181 |
return text
|
182 |
|
183 |
except Exception as e:
|
|
|
190 |
if not message.strip():
|
191 |
return "You stand there, unsure. What would you like to do?"
|
192 |
|
193 |
+
# Genera risposta del DM con timeout ridotto
|
194 |
dm_response = generate_dm_response_with_timeout(message)
|
195 |
|
196 |
+
# Aggiorna cronologia (mantieni più turni grazie al modello efficiente)
|
197 |
chat_history.append({"user": message, "ai": dm_response})
|
198 |
+
if len(chat_history) > 5: # Mantieni 5 turni invece di 2
|
199 |
+
chat_history = chat_history[-5:]
|
200 |
|
201 |
return dm_response
|
202 |
|
|
|
205 |
chat_history = []
|
206 |
return generate_random_opening()
|
207 |
|
208 |
+
# Crea l'interfaccia SUPER OTTIMIZZATA
|
209 |
+
with gr.Blocks(title="Infinite Dungeon - Lightning Fast", theme=gr.themes.Soft()) as demo:
|
210 |
+
gr.Markdown("# ⚡ Infinite Dungeon - Lightning Fast")
|
211 |
+
gr.Markdown("*Powered by Qwen2.5-0.5B - Optimized for 5-15 second responses*")
|
212 |
+
gr.Markdown("🚀 **Super fast AI D&D with perfect memory retention**")
|
213 |
|
214 |
+
# Inizializza la chat
|
215 |
chatbot = gr.Chatbot(
|
216 |
+
value=[(None, "⚡ **Lightning Fast Adventure Ready!** ⚡\n\nPress 'New Adventure' to begin your quest!")],
|
217 |
height=400,
|
218 |
show_label=False
|
219 |
)
|
220 |
|
221 |
msg = gr.Textbox(
|
222 |
label="Your action",
|
223 |
+
placeholder="What do you do? (e.g., 'I search the room', 'I attack the orc', 'I cast a spell')",
|
224 |
max_lines=2
|
225 |
)
|
226 |
|
|
|
228 |
submit = gr.Button("⚔️ Act", variant="primary", size="lg")
|
229 |
reset_btn = gr.Button("🔄 New Adventure", variant="secondary")
|
230 |
|
231 |
+
gr.Markdown("⚡ **Ultra-fast responses**: 5-15 seconds | 🧠 **Perfect memory**: Never forgets your adventure!")
|
232 |
|
233 |
# Funzione per gestire la chat
|
234 |
def respond(message, chat_history_ui):
|