Omnibus commited on
Commit
6e0c63c
·
verified ·
1 Parent(s): cb18d46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -51,10 +51,10 @@ def format_prompt(message, history):
51
  prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
52
  #print(prompt)
53
  return prompt
 
54
 
55
 
56
-
57
- def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,rep_p):
58
  #token max=8192
59
  hist_len=0
60
  client=clients[int(client_choice)-1]
@@ -62,17 +62,19 @@ def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,r
62
  history = []
63
  hist_len=0
64
  if history:
65
- for ea in history:
66
  hist_len+=len(str(ea))
67
  print(hist_len)
68
  in_len=len(system_prompt+prompt)+hist_len
 
 
69
  print("\n######### HIST "+str(in_len))
70
  print("\n######### TOKENS "+str(tokens))
71
  if (in_len+tokens) > 8000:
72
  yield [(prompt,"Wait. I need to compress our Chat history...")]
73
- history=compress_history(history,client_choice,seed,temp,tokens,top_p,rep_p)
74
  yield [(prompt,"History has been compressed, processing request...")]
75
-
76
  generate_kwargs = dict(
77
  temperature=temp,
78
  max_new_tokens=tokens,
@@ -82,7 +84,7 @@ def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,r
82
  seed=seed,
83
  )
84
  #formatted_prompt=prompt
85
- formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
86
  print("\n######### PROMPT "+str(len(formatted_prompt)))
87
 
88
 
@@ -95,7 +97,8 @@ def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,r
95
  output += response.token.text
96
  yield [(prompt,output)]
97
  history.append((prompt,output))
98
- yield history
 
99
 
100
  def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",wait=3000,header=True):
101
  print(chatblock)
@@ -121,6 +124,7 @@ def check_rand(inp,val):
121
 
122
 
123
  with gr.Blocks() as app:
 
124
  gr.HTML("""<center><h1 style='font-size:xx-large;'>Google Gemma Models</h1><br><h3>running on Huggingface Inference Client</h3><br><h7>EXPERIMENTAL""")
125
  chat_b = gr.Chatbot(height=500)
126
  with gr.Group():
@@ -145,6 +149,7 @@ with gr.Blocks() as app:
145
  temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
146
  top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
147
  rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
 
148
  with gr.Accordion(label="Screenshot",open=False):
149
  with gr.Row():
150
  with gr.Column(scale=3):
@@ -161,8 +166,10 @@ with gr.Blocks() as app:
161
 
162
 
163
  im_go=im_btn.click(get_screenshot,[chat_b,im_height,im_width,chatblock,theme,wait_time],img)
164
- chat_sub=inp.submit(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,client_choice,seed,temp,tokens,top_p,rep_p],chat_b)
165
- go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,client_choice,seed,temp,tokens,top_p,rep_p],chat_b)
 
 
166
  stop_btn.click(None,None,None,cancels=[go,im_go,chat_sub])
167
  clear_btn.click(clear_fn,None,[inp,sys_inp,chat_b])
168
  app.queue(default_concurrency_limit=10).launch()
 
51
  prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
52
  #print(prompt)
53
  return prompt
54
+ result = []
55
 
56
 
57
+ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem):
 
58
  #token max=8192
59
  hist_len=0
60
  client=clients[int(client_choice)-1]
 
62
  history = []
63
  hist_len=0
64
  if history:
65
+ for ea in history[0-chat_mem:]:
66
  hist_len+=len(str(ea))
67
  print(hist_len)
68
  in_len=len(system_prompt+prompt)+hist_len
69
+
70
+
71
  print("\n######### HIST "+str(in_len))
72
  print("\n######### TOKENS "+str(tokens))
73
  if (in_len+tokens) > 8000:
74
  yield [(prompt,"Wait. I need to compress our Chat history...")]
75
+ hist=compress_history(history[-5:],client_choice,seed,temp,tokens,top_p,rep_p)
76
  yield [(prompt,"History has been compressed, processing request...")]
77
+ history = [(prompt,hist)]
78
  generate_kwargs = dict(
79
  temperature=temp,
80
  max_new_tokens=tokens,
 
84
  seed=seed,
85
  )
86
  #formatted_prompt=prompt
87
+ formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history[0-chat_mem:])
88
  print("\n######### PROMPT "+str(len(formatted_prompt)))
89
 
90
 
 
97
  output += response.token.text
98
  yield [(prompt,output)]
99
  history.append((prompt,output))
100
+ memory=history
101
+ yield history,memory
102
 
103
  def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",wait=3000,header=True):
104
  print(chatblock)
 
124
 
125
 
126
  with gr.Blocks() as app:
127
+ memory=gr.State()
128
  gr.HTML("""<center><h1 style='font-size:xx-large;'>Google Gemma Models</h1><br><h3>running on Huggingface Inference Client</h3><br><h7>EXPERIMENTAL""")
129
  chat_b = gr.Chatbot(height=500)
130
  with gr.Group():
 
149
  temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
150
  top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
151
  rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
152
+ chat_mem=gr.Number(label="Chat Memory", info="Number of previous chats to retain",value=5)
153
  with gr.Accordion(label="Screenshot",open=False):
154
  with gr.Row():
155
  with gr.Column(scale=3):
 
166
 
167
 
168
  im_go=im_btn.click(get_screenshot,[chat_b,im_height,im_width,chatblock,theme,wait_time],img)
169
+ chat_sub=inp.submit(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem],chat_b)
170
+
171
+ go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem],[chat_b,memory])
172
+
173
  stop_btn.click(None,None,None,cancels=[go,im_go,chat_sub])
174
  clear_btn.click(clear_fn,None,[inp,sys_inp,chat_b])
175
  app.queue(default_concurrency_limit=10).launch()