Spaces:
Sleeping
Sleeping
# app.py | |
import os | |
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
MODEL = "speakleash/Bielik-1.5B-v3.0-Instruct" | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
if not HF_TOKEN: | |
raise RuntimeError( | |
"Brak HF_TOKEN. Dodaj secret 'HF_TOKEN' w ustawieniach Space (Settings → Secrets)." | |
) | |
# jawne ładowanie z tokenem (upewniamy się, że auth token jest przekazany) | |
token_kwargs = {"use_auth_token": HF_TOKEN} | |
tokenizer = AutoTokenizer.from_pretrained(MODEL, **token_kwargs) | |
model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", **token_kwargs) | |
chat_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
def respond(message, history): | |
# proste sklejenie kontekstu z historii (opcjonalne, można rozbudować) | |
prompt = "" | |
if history: | |
for u, b in history: | |
prompt += f"User: {u}\nAssistant: {b}\n" | |
prompt += f"User: {message}\nAssistant:" | |
out = chat_pipe( | |
prompt, | |
max_new_tokens=256, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9 | |
) | |
gen = out[0]["generated_text"] | |
# odczytanie tylko nowo wygenerowanej części (usuwamy prompt, jeśli model go powtórzył) | |
reply = gen[len(prompt):] if gen.startswith(prompt) else gen | |
history = history or [] | |
history.append((message, reply)) | |
return reply, history | |
gr.ChatInterface(respond).launch() |