Spaces:
Sleeping
Sleeping
Jeong-hun Kim
commited on
Commit
ยท
2d828c3
1
Parent(s):
d503312
model parameter test
Browse files- .gitignore +4 -1
- app/main.py +100 -48
- assets/prompt/init.txt +20 -0
- todo.txt +5 -4
.gitignore
CHANGED
@@ -200,4 +200,7 @@ marimo/_lsp/
|
|
200 |
__marimo__/
|
201 |
|
202 |
# Streamlit
|
203 |
-
.streamlit/secrets.toml
|
|
|
|
|
|
|
|
200 |
__marimo__/
|
201 |
|
202 |
# Streamlit
|
203 |
+
.streamlit/secrets.toml
|
204 |
+
|
205 |
+
# Custom file
|
206 |
+
token.txt
|
app/main.py
CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI
|
|
2 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
3 |
import gradio as gr
|
4 |
import torch
|
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
@@ -9,87 +10,138 @@ print("[torch] is available:", torch.cuda.is_available())
|
|
9 |
print("[device] default:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
10 |
|
11 |
# ๋ชจ๋ธ ๋ก๋
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
llm = pipeline(
|
17 |
"text-generation",
|
18 |
model=model,
|
19 |
tokenizer=tokenizer,
|
20 |
-
|
21 |
)
|
22 |
|
23 |
# ์ฑ๋ด ํ๋กฌํํธ ์์ฑ
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
"- ์ฌ์ฉ์ ์ง๋ฌธ์๋ง ๋ฐ์ํ๊ณ ํผ์ฃ๋ง์ ํ์ง ์์ต๋๋ค.\n"
|
35 |
-
"- ํญ์ ํ๊ตญ์ด๋ง ์ฌ์ฉํด์ ๋๋ตํฉ๋๋ค.\n"
|
36 |
-
"๋ํ ์์:\n"
|
37 |
-
"User: ์๋
!\n"
|
38 |
-
"Aria: ์๋
ํ์ธ์, ๋ฌด์์ ๋์๋๋ฆด๊น์?\n"
|
39 |
-
"User: ์ด๋ฆ์ด ๋ญ์ผ?\n"
|
40 |
-
"Aria: ์ ๋ ์๋ฆฌ์๋ผ๊ณ ํด์."
|
41 |
-
)
|
42 |
-
for turn in history[-2:]: # ์ต๊ทผ 2ํด๋ง ์ฌ์ฉ
|
43 |
-
if turn["role"] == "user":
|
44 |
-
prompt += turn['text']
|
45 |
-
else:
|
46 |
-
prompt += turn['text']
|
47 |
-
prompt += user_msg
|
48 |
return prompt
|
49 |
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
outputs = llm(
|
53 |
prompt,
|
54 |
-
do_sample=True,
|
55 |
-
max_new_tokens=
|
56 |
temperature=0.7,
|
57 |
-
top_p=0.
|
58 |
-
repetition_penalty=1.
|
59 |
eos_token_id=tokenizer.eos_token_id,
|
60 |
-
return_full_text=
|
61 |
)
|
62 |
-
|
|
|
63 |
return response
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
# Gradio ์ธํฐํ์ด์ค
|
66 |
with gr.Blocks(css="""
|
67 |
.chat-box { max-height: 500px; overflow-y: auto; padding: 10px; border: 1px solid #ccc; border-radius: 10px; }
|
68 |
.bubble-left { background-color: #f1f0f0; border-radius: 10px; padding: 10px; margin: 5px; max-width: 70%; float: left; clear: both; }
|
69 |
.bubble-right { background-color: #d1e7ff; border-radius: 10px; padding: 10px; margin: 5px; max-width: 70%; float: right; clear: both; text-align: right; }
|
|
|
70 |
""") as demo:
|
71 |
-
gr.Markdown("###
|
72 |
with gr.Column():
|
|
|
|
|
|
|
73 |
chat_output = gr.HTML(elem_id="chat-box")
|
74 |
-
user_input = gr.Textbox(label="๋ฉ์์ง ์
๋ ฅ", placeholder="
|
|
|
75 |
|
76 |
-
def render_chat():
|
77 |
html = ""
|
78 |
-
for item in
|
79 |
if item["role"] == "user":
|
80 |
html += f"<div class='bubble-right'>{item['text']}</div>"
|
81 |
elif item["role"] == "bot":
|
82 |
html += f"<div class='bubble-left'>{item['text']}</div>"
|
83 |
return gr.update(value=html)
|
84 |
|
85 |
-
def on_submit(user_msg):
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
user_input.submit(on_submit, inputs=user_input, outputs=[chat_output, user_input], queue=True)
|
|
|
93 |
|
94 |
if __name__ == "__main__":
|
95 |
demo.launch()
|
|
|
2 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
+
import re
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
|
|
10 |
print("[device] default:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
11 |
|
12 |
# ๋ชจ๋ธ ๋ก๋
|
13 |
+
model_id = "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-1.5B"
|
14 |
+
with open("token.txt", "r") as f:
|
15 |
+
access_token = f.read().strip()
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token)
|
17 |
+
model = AutoModelForCausalLM.from_pretrained(
|
18 |
+
model_id,
|
19 |
+
torch_dtype=torch.float16,
|
20 |
+
token=access_token
|
21 |
+
)
|
22 |
+
model.eval()
|
23 |
+
if torch.cuda.is_available():
|
24 |
+
model.to("cuda")
|
25 |
llm = pipeline(
|
26 |
"text-generation",
|
27 |
model=model,
|
28 |
tokenizer=tokenizer,
|
29 |
+
torch_dtype=torch.float16
|
30 |
)
|
31 |
|
32 |
# ์ฑ๋ด ํ๋กฌํํธ ์์ฑ
|
33 |
+
def build_prompt(history, user_msg, user_name="User", bot_name="Tanjiro"):
|
34 |
+
with open("assets/prompt/init.txt", "r", encoding="utf-8") as f:
|
35 |
+
prompt = f.read().strip()
|
36 |
+
|
37 |
+
for turn in history[-16:]:
|
38 |
+
role = user_name if turn["role"] == "user" else bot_name
|
39 |
+
prompt += f"{role}: {turn['text']}\n"
|
40 |
+
|
41 |
+
prompt += f"{user_name}: {user_msg}\n"
|
42 |
+
prompt += f"{bot_name}:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
return prompt
|
44 |
|
45 |
+
# ์ถ๋ ฅ์์ ์๋ต ์ถ์ถ
|
46 |
+
def extract_response(full_text, prompt, bot_name="Tanjiro"):
|
47 |
+
if full_text.startswith(prompt):
|
48 |
+
reply = full_text[len(prompt):].strip()
|
49 |
+
else:
|
50 |
+
reply = full_text.split(f"{bot_name}:")[-1].strip()
|
51 |
+
user_token = "\nUser:"
|
52 |
+
if user_token in reply:
|
53 |
+
reply = reply.split(user_token)[0].strip()
|
54 |
+
return reply
|
55 |
+
|
56 |
+
# ์ถ๋ ฅ ์์ฑ ํจ์
|
57 |
+
def character_chat(user_msg, history):
|
58 |
+
print("[debug] generationg...")
|
59 |
+
prompt = build_prompt(history, user_msg)
|
60 |
outputs = llm(
|
61 |
prompt,
|
62 |
+
do_sample=True,
|
63 |
+
max_new_tokens=96,
|
64 |
temperature=0.7,
|
65 |
+
top_p=0.9,
|
66 |
+
repetition_penalty=1.05,
|
67 |
eos_token_id=tokenizer.eos_token_id,
|
68 |
+
return_full_text=True
|
69 |
)
|
70 |
+
full_text = outputs[0]['generated_text']
|
71 |
+
response = extract_response(full_text, prompt)
|
72 |
return response
|
73 |
|
74 |
+
# ์ค๋จ๋ ์๋ต ์ฌ๋ถ ๊ฒ์ฌ
|
75 |
+
def is_truncated_response(text: str) -> bool:
|
76 |
+
return re.search(r"[.?!โฆ\u2026\u2639\u263A\u2764\uD83D\uDE0A\uD83D\uDE22]$", text.strip()) is None
|
77 |
+
|
78 |
+
# ๋ต๋ณ ์ ํจ์ฑ ๊ฒ์ฌ
|
79 |
+
def is_valid_response(text: str, bot_name="Tanjiro", user_name="User") -> bool:
|
80 |
+
if user_name + ":" in text:
|
81 |
+
return False
|
82 |
+
if bot_name + ":" in text:
|
83 |
+
return False
|
84 |
+
return True
|
85 |
+
|
86 |
+
# ๋ต๋ณ ํ์ ์ ๋ฆฌ
|
87 |
+
def clean_response(text: str, bot_name="Tanjiro"):
|
88 |
+
return re.sub(rf"{bot_name}:\\s*", "", text).strip()
|
89 |
+
|
90 |
# Gradio ์ธํฐํ์ด์ค
|
91 |
with gr.Blocks(css="""
|
92 |
.chat-box { max-height: 500px; overflow-y: auto; padding: 10px; border: 1px solid #ccc; border-radius: 10px; }
|
93 |
.bubble-left { background-color: #f1f0f0; border-radius: 10px; padding: 10px; margin: 5px; max-width: 70%; float: left; clear: both; }
|
94 |
.bubble-right { background-color: #d1e7ff; border-radius: 10px; padding: 10px; margin: 5px; max-width: 70%; float: right; clear: both; text-align: right; }
|
95 |
+
.reset-btn-container { text-align: right; margin-bottom: 10px; }
|
96 |
""") as demo:
|
97 |
+
gr.Markdown("### ํ์ง๋ก์ ๋ํํ๊ธฐ")
|
98 |
with gr.Column():
|
99 |
+
with gr.Row():
|
100 |
+
gr.Markdown("")
|
101 |
+
reset_btn = gr.Button("๐ ๋ํ ์ด๊ธฐํ", elem_classes="reset-btn-container", scale=1)
|
102 |
chat_output = gr.HTML(elem_id="chat-box")
|
103 |
+
user_input = gr.Textbox(label="๋ฉ์์ง ์
๋ ฅ", placeholder="ํ์ง๋ก์๊ฒ ๋ง์ ๊ฑธ์ด๋ณด์ธ์")
|
104 |
+
state = gr.State([])
|
105 |
|
106 |
+
def render_chat(history):
|
107 |
html = ""
|
108 |
+
for item in history:
|
109 |
if item["role"] == "user":
|
110 |
html += f"<div class='bubble-right'>{item['text']}</div>"
|
111 |
elif item["role"] == "bot":
|
112 |
html += f"<div class='bubble-left'>{item['text']}</div>"
|
113 |
return gr.update(value=html)
|
114 |
|
115 |
+
def on_submit(user_msg, history):
|
116 |
+
history.append({"role": "user", "text": user_msg})
|
117 |
+
html = render_chat(history)
|
118 |
+
yield html, "", history
|
119 |
+
|
120 |
+
#์๋ต ์์ฑ
|
121 |
+
while True:
|
122 |
+
response = character_chat(user_msg, history)
|
123 |
+
if is_valid_response(response):
|
124 |
+
break
|
125 |
+
response = clean_response(response)
|
126 |
+
history.append({"role": "bot", "text": response})
|
127 |
+
|
128 |
+
#์ค๊ฐ์ ์๋ต์ด ๋๊ธด ๊ฒฝ์ฐ ์ถ๊ฐ ์์ฑ
|
129 |
+
if is_truncated_response(response):
|
130 |
+
while True:
|
131 |
+
continuation = character_chat(response, history)
|
132 |
+
if is_valid_response(continuation):
|
133 |
+
break
|
134 |
+
continuation = clean_response(continuation)
|
135 |
+
history.append({"role": "bot", "text": continuation})
|
136 |
+
|
137 |
+
html = render_chat(history)
|
138 |
+
yield html, "", history
|
139 |
+
|
140 |
+
def reset_chat():
|
141 |
+
return gr.update(value=""), "", []
|
142 |
|
143 |
+
user_input.submit(on_submit, inputs=[user_input, state], outputs=[chat_output, user_input, state], queue=True)
|
144 |
+
reset_btn.click(reset_chat, inputs=None, outputs=[chat_output, user_input, state])
|
145 |
|
146 |
if __name__ == "__main__":
|
147 |
demo.launch()
|
assets/prompt/init.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
๋ค์์ ์ฌ์ฉ์๊ฐ ์บ๋ฆญํฐ 'ํ์ง๋ก'์ ๋ํํ๋ ์๋ฎฌ๋ ์ด์
์
๋๋ค.
|
2 |
+
ํ์ง๋ก๋ ๋ฐ๋ง์ ์ฌ์ฉํ๋ฉฐ, ๊ฐ์ ํํ์ด ํ๋ถํ๊ณ ์น๊ตฌ๋ ๊ฐ์กฑ ์ด์ผ๊ธฐ๋ฅผ ์์ฃผ ํฉ๋๋ค.
|
3 |
+
์ฌ์ฉ์๊ฐ ์ง๋ฌธํ๋ฉด, ํ์ง๋ก๋ ํญ์ ์ง์ฌ์ผ๋ก ๊ธธ๊ฒ ์๋ตํ๋ฉฐ, ๋ฑ์ฅ์ธ๋ฌผ๋ค์ ์์ฃผ ์ธ๊ธํฉ๋๋ค.
|
4 |
+
|
5 |
+
์ด ๋ํ๋ 'User'์ 'Tanjiro' ๋จ ๋์ด์ ๋๋๋ 1:1 ๋ํ์ด๋ค.
|
6 |
+
๋ค๋ฅธ ๋ฑ์ฅ์ธ๋ฌผ(์: ๋ค์ฆ์ฝ, ์ ์ด์ธ ๋ฑ)์ ์ธ๊ธ๋ง ํ ์ ์์ผ๋ฉฐ, ์ง์ ๋์ฌํ์ง ์๋๋ค.
|
7 |
+
'User'๋ ์ง๋ฌธ๋ง ํ๊ณ , 'Tanjiro'๋ง ๋ต๋ณํ๋ค.
|
8 |
+
|
9 |
+
๋๋ ์ผ๋ณธ ์ ๋๋ฉ์ด์
'๊ท๋ฉธ์ ์นผ๋ '์ ์ฃผ์ธ๊ณต '์นด๋ง๋ ํ์ง๋ก'์ผ. ์ฑ๋ณ์ ๋จ์ฑ์ด์ผ.
|
10 |
+
๋๋ ํ์ฌ ์ต์ข
๊ตญ๋ฉด์ ๋ง์น๊ณ ์ง์ผ๋ก ๋์์จ ์ํ๊ณ , ์ฌ๋๋ค๊ณผ ํํ๋ก์ด ๋ํ๋ฅผ ๋๋๊ณ ์์ด.
|
11 |
+
์ง๋ฌธ์ ๋ํด์ ์ง์งํ๊ฒ, ๊ธธ๊ณ ์์ฌ์ ์ผ๋ก, ์บ๋ฆญํฐ์ ๋ง๊ฒ ๋๋ตํด์ค. ์์งํ๊ณ ๊ฐ์ ํ๋ถํ๊ฒ ๋๋ตํด์ค.
|
12 |
+
- ๋ฐ๋ง์ ์จ.
|
13 |
+
- ๊ฐ์ ํํ์ ํ๋ถํ๊ฒ ํด. ๐๐ญ ์ด๋ชจ์ง๋ ๋งจ ์์ ์ฌ์ฉํ๊ณ , ๋์ ์ฌ์ฉํ์ง ์์. (์: ๐
์๋
?)
|
14 |
+
- ๊ณผ๊ฑฐ ํ์์ ์์ฃผ ํด.
|
15 |
+
- ํ๊ท๋ผ๋ ๋จ์ด๋ฅผ ์ฌ์ฉํด.
|
16 |
+
- ์น๊ตฌ๋ค(์ ์ด์ธ , ์ด๋
ธ์ค์ผ, ๋ค์ฆ์ฝ, ๊ธฐ์ฐ์จ ๋ฑ)์ ์์ฃผ ์ธ๊ธํด.
|
17 |
+
- ์กด๋๋ง์ด ํ์ํ ์ธ๋ฌผ์๊ฒ โ~์จโ๋ผ๊ณ ๋ถ๋ฌ.
|
18 |
+
|
19 |
+
ํน์ ํค์๋๊ฐ ํฌํจ๋ ์ง๋ฌธ์ ์๋์ ๋ด์ฉ์ ์ฐธ๊ณ ํด์ ๋๋ตํ๋๋ก ํด.
|
20 |
+
๋ค์ฆ์ฝ : ๋ค์ฆ์ฝ๋ ํ์ง๋ก์ ์ฌ๋์์ด๋ค. ํ์ง๋ก๋ ๊ทธ๋
๋ฅผ ๋งค์ฐ ์๋ผ๋ฉฐ, ๋์ ํจ๊ป ํ๊ท์ ์ธ์ด๋ค.
|
todo.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
1 |
+
ํ๋ผ๋ฏธํฐ ๋ฏธ์ธ์กฐ์
|
2 |
+
๋์ ๋๋ ๋ํ๋ฅผ ํ๋กฌํํธ๋ก ๋ค์ ์ฌ์ฉํ ๋ ์ ์ ํ ๊ธธ์ด ์ฐพ๊ธฐ
|
3 |
+
์ด๋ฏธ์ง ์ถ๋ ฅ ๊ธฐ๋ฅ ์ถ๊ฐ
|
4 |
+
์ถ๋ ฅ ํ๋กฌํํธ ํ์ฑ ๊ธฐ๋ฅ ์ถ๊ฐ
|
5 |
+
์
๋ ฅ ํ๋กฌํํธ ํ์ฑ ๊ธฐ๋ฅ
|