MarcinSmolnik commited on
Commit
074e5f4
·
verified ·
1 Parent(s): 6fee24b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -14
app.py CHANGED
@@ -1,25 +1,46 @@
 
 
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- MODEL_NAME = "speakleash/Bielik-1.5B-v3.0-Instruct"
 
5
 
6
- # Tworzymy pipeline
7
- chat = pipeline(
8
- "text-generation",
9
- model=MODEL_NAME,
10
- device_map="auto" # automatycznie wybierze CPU lub GPU
11
- )
 
 
 
 
 
 
12
 
13
- # Funkcja do obsługi czatu
14
  def respond(message, history):
15
- output = chat(
16
- message,
17
- max_length=256,
 
 
 
 
 
 
 
18
  do_sample=True,
19
  temperature=0.7,
20
  top_p=0.9
21
  )
22
- return output[0]['generated_text']
 
 
 
 
 
 
 
23
 
24
- # Interfejs czatu
25
  gr.ChatInterface(respond).launch()
 
1
+ # app.py
2
+ import os
3
  import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
 
6
+ MODEL = "speakleash/Bielik-1.5B-v3.0-Instruct"
7
+ HF_TOKEN = os.environ.get("HF_TOKEN")
8
 
9
+ if not HF_TOKEN:
10
+ raise RuntimeError(
11
+ "Brak HF_TOKEN. Dodaj secret 'HF_TOKEN' w ustawieniach Space (Settings → Secrets)."
12
+ )
13
+
14
+ # jawne ładowanie z tokenem (upewniamy się, że auth token jest przekazany)
15
+ token_kwargs = {"use_auth_token": HF_TOKEN}
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, **token_kwargs)
18
+ model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", **token_kwargs)
19
+
20
+ chat_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
21
 
 
22
  def respond(message, history):
23
+ # proste sklejenie kontekstu z historii (opcjonalne, można rozbudować)
24
+ prompt = ""
25
+ if history:
26
+ for u, b in history:
27
+ prompt += f"User: {u}\nAssistant: {b}\n"
28
+ prompt += f"User: {message}\nAssistant:"
29
+
30
+ out = chat_pipe(
31
+ prompt,
32
+ max_new_tokens=256,
33
  do_sample=True,
34
  temperature=0.7,
35
  top_p=0.9
36
  )
37
+ gen = out[0]["generated_text"]
38
+
39
+ # odczytanie tylko nowo wygenerowanej części (usuwamy prompt, jeśli model go powtórzył)
40
+ reply = gen[len(prompt):] if gen.startswith(prompt) else gen
41
+
42
+ history = history or []
43
+ history.append((message, reply))
44
+ return reply, history
45
 
 
46
  gr.ChatInterface(respond).launch()