Update app.py
Browse files
app.py
CHANGED
@@ -4,36 +4,36 @@ import torch
|
|
4 |
import os
|
5 |
from huggingface_hub import login
|
6 |
|
7 |
-
# Login
|
8 |
hf_token = os.environ["HF_TOKEN"]
|
9 |
login(token=hf_token)
|
10 |
|
11 |
-
#
|
12 |
device = 0 if torch.cuda.is_available() else -1
|
13 |
|
14 |
-
#
|
15 |
pipe = pipeline(
|
16 |
"text-generation",
|
17 |
-
model="
|
18 |
device=device
|
19 |
)
|
20 |
|
21 |
-
# Formato chat
|
22 |
def responder(prompt):
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
max_new_tokens=
|
27 |
do_sample=True,
|
28 |
temperature=0.7,
|
29 |
-
top_k=50,
|
30 |
top_p=0.9
|
31 |
-
)[0][
|
32 |
-
|
|
|
33 |
|
34 |
-
# Interfaz
|
35 |
with gr.Blocks() as demo:
|
36 |
-
gr.Markdown("##
|
37 |
entrada = gr.Textbox(label="Escribe tu mensaje")
|
38 |
salida = gr.Textbox(label="Respuesta")
|
39 |
entrada.submit(fn=responder, inputs=entrada, outputs=salida)
|
|
|
4 |
import os
|
5 |
from huggingface_hub import login
|
6 |
|
7 |
+
# Login Hugging Face
|
8 |
hf_token = os.environ["HF_TOKEN"]
|
9 |
login(token=hf_token)
|
10 |
|
11 |
+
# GPU si hay
|
12 |
device = 0 if torch.cuda.is_available() else -1
|
13 |
|
14 |
+
# Cargar TinyLlama chat
|
15 |
pipe = pipeline(
|
16 |
"text-generation",
|
17 |
+
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
18 |
device=device
|
19 |
)
|
20 |
|
21 |
+
# Formato tipo chat
|
22 |
def responder(prompt):
|
23 |
+
prompt_chat = f"<|system|>Eres un asistente útil.<|user|>{prompt}<|assistant|>"
|
24 |
+
output = pipe(
|
25 |
+
prompt_chat,
|
26 |
+
max_new_tokens=100,
|
27 |
do_sample=True,
|
28 |
temperature=0.7,
|
|
|
29 |
top_p=0.9
|
30 |
+
)[0]['generated_text']
|
31 |
+
respuesta = output.replace(prompt_chat, "").strip()
|
32 |
+
return respuesta
|
33 |
|
34 |
+
# Interfaz
|
35 |
with gr.Blocks() as demo:
|
36 |
+
gr.Markdown("## 🤖 AmInside 1.0 – Asistente rápido y conversacional")
|
37 |
entrada = gr.Textbox(label="Escribe tu mensaje")
|
38 |
salida = gr.Textbox(label="Respuesta")
|
39 |
entrada.submit(fn=responder, inputs=entrada, outputs=salida)
|