FM-1976 commited on
Commit
0721572
·
1 Parent(s): e1fd389

Create app.py

Browse files

change link to image with url address
change model_path to the TheBloke link file

Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ import datetime
4
+
5
+ #MODEL SETTINGS also for DISPLAY
6
+ convHistory = ''
7
+ modelfile = "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/blob/main/starling-lm-7b-alpha.Q4_K_M.gguf"
8
+ repetitionpenalty = 1.15
9
+ contextlength=8192
10
+ logfile = 'StarlingLM7B_logs.txt'
11
+ print("loading model...")
12
+ stt = datetime.datetime.now()
13
+ # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
14
+ llm = Llama(
15
+ model_path=modelfile, # Download the model file first
16
+ n_ctx=contextlength, # The max sequence length to use - note that longer sequence lengths require much more resources
17
+ #n_threads=2, # The number of CPU threads to use, tailor to your system and the resulting performance
18
+ )
19
+ dt = datetime.datetime.now() - stt
20
+ print(f"Model loaded in {dt}")
21
+
22
+ def writehistory(text):
23
+ with open(logfile, 'a') as f:
24
+ f.write(text)
25
+ f.write('\n')
26
+ f.close()
27
+
28
+ """
29
+ gr.themes.Base()
30
+ gr.themes.Default()
31
+ gr.themes.Glass()
32
+ gr.themes.Monochrome()
33
+ gr.themes.Soft()
34
+ """
35
+ def combine(a, b, c, d,e,f):
36
+ global convHistory
37
+ import datetime
38
+ SYSTEM_PROMPT = f"""{a}
39
+
40
+
41
+ """
42
+ # parameters here: https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__
43
+ temperature = c
44
+ max_new_tokens = d
45
+ repeat_penalty = f
46
+ top_p = e
47
+ prompt = f"GPT4 User: {b}<|end_of_turn|>GPT4 Assistant:"
48
+ start = datetime.datetime.now()
49
+ generation = ""
50
+ delta = ""
51
+ prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
52
+ generated_text = ""
53
+ answer_tokens = ''
54
+ total_tokens = ''
55
+ for character in llm(prompt,
56
+ max_tokens=max_new_tokens,
57
+ stop=["</s>"],
58
+ temperature = temperature,
59
+ repeat_penalty = repeat_penalty,
60
+ top_p = top_p, # Example stop token - not necessarily correct for this specific model! Please check before using.
61
+ echo=False,
62
+ stream=True):
63
+ generation += character["choices"][0]["text"]
64
+
65
+ answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
66
+ total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
67
+ delta = datetime.datetime.now() - start
68
+ yield generation, delta, prompt_tokens, answer_tokens, total_tokens
69
+ timestamp = datetime.datetime.now()
70
+ logger = f"""time: {timestamp}\n Temp: {temperature} - MaxNewTokens: {max_new_tokens} - RepPenalty: 1.5 \nPROMPT: \n{prompt}\nClaude2Alpaca-7B: {generation}\nGenerated in {delta}\nPromptTokens: {prompt_tokens} Output Tokens: {answer_tokens} Total Tokens: {total_tokens}\n\n---\n\n"""
71
+ writehistory(logger)
72
+ convHistory = convHistory + prompt + "\n" + generation + "\n"
73
+ print(convHistory)
74
+ return generation, delta, prompt_tokens, answer_tokens, total_tokens
75
+ #return generation, delta
76
+
77
+
78
+ # MAIN GRADIO INTERFACE
79
+ with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly'
80
+ #TITLE SECTION
81
+ with gr.Row(variant='compact'):
82
+ with gr.Column(scale=12):
83
+ gr.HTML("<center>"
84
+ + "<h3>Prompt Engineering Playground!</h3>"
85
+ + "<h1>🐦 StarlingLM-7b 8K context window</h2></center>")
86
+ gr.Image(value='https://starling.cs.berkeley.edu/starling.png', height=95, show_label = False,
87
+ show_download_button = False, container = False)
88
+ # INTERACTIVE INFOGRAPHIC SECTION
89
+ with gr.Row():
90
+ with gr.Column(min_width=80):
91
+ gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
92
+ with gr.Column(min_width=80):
93
+ prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
94
+ with gr.Column(min_width=80):
95
+ outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
96
+ with gr.Column(min_width=80):
97
+ totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
98
+
99
+ # PLAYGROUND INTERFACE SECTION
100
+ with gr.Row():
101
+ with gr.Column(scale=1):
102
+ gr.Markdown(
103
+ f"""
104
+ ### Tunning Parameters""")
105
+ temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.42)
106
+ top_p = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.8)
107
+ repPen = gr.Slider(label="Temperature",minimum=0.0, maximum=4.0, step=0.01, value=1.2)
108
+ max_len = gr.Slider(label="Maximum output lenght", minimum=10,maximum=7000,step=2, value=900)
109
+ gr.Markdown(
110
+ """
111
+ Fill the System Prompt and User Prompt
112
+ And then click the Button below
113
+ """)
114
+ btn = gr.Button(value="🐦 Generate", variant='primary')
115
+ gr.Markdown(
116
+ f"""
117
+ - **Prompt Template**: OpenChat 🐦
118
+ - **Repetition Penalty**: {repetitionpenalty}
119
+ - **Context Lenght**: {contextlength} tokens
120
+ - **LLM Engine**: CTransformers
121
+ - **Model**: 🐦 StarlingLM-7b
122
+ - **Log File**: {logfile}
123
+ """)
124
+
125
+
126
+ with gr.Column(scale=4):
127
+ txt = gr.Textbox(label="System Prompt", value = "", placeholder = "This models does not have any System prompt...",lines=1, interactive = False)
128
+ txt_2 = gr.Textbox(label="User Prompt", lines=6)
129
+ txt_3 = gr.Textbox(value="", label="Output", lines = 13, show_copy_button=True)
130
+ btn.click(combine, inputs=[txt, txt_2,temp,max_len,top_p,repPen], outputs=[txt_3,gentime,prompttokens,outputokens,totaltokens])
131
+
132
+
133
+ if __name__ == "__main__":
134
+ demo.launch(inbrowser=True)