Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,8 +6,6 @@ from huggingface_hub import AsyncInferenceClient
|
|
| 6 |
|
| 7 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 8 |
api_url = os.getenv('API_URL')
|
| 9 |
-
#api_url_nostream = os.getenv('API_URL_NOSTREAM')
|
| 10 |
-
#headers = {'Content-Type': 'application/json',}
|
| 11 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 12 |
client = AsyncInferenceClient(api_url)
|
| 13 |
|
|
@@ -41,7 +39,7 @@ examples=[
|
|
| 41 |
# <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
|
| 42 |
|
| 43 |
|
| 44 |
-
# Stream text
|
| 45 |
async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
| 46 |
|
| 47 |
if system_prompt != "":
|
|
@@ -72,10 +70,9 @@ async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_t
|
|
| 72 |
yield partial_message
|
| 73 |
|
| 74 |
|
| 75 |
-
# No Stream
|
| 76 |
def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
| 77 |
-
|
| 78 |
-
print(f"chatbot - {chatbot}")
|
| 79 |
if system_prompt != "":
|
| 80 |
input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
|
| 81 |
else:
|
|
@@ -104,16 +101,10 @@ def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_t
|
|
| 104 |
}
|
| 105 |
|
| 106 |
response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
|
| 107 |
-
print(f"response - {response}")
|
| 108 |
-
print(f"response.status_code - {response.status_code}")
|
| 109 |
-
print(f"response.text - {response.text}")
|
| 110 |
-
print(f"type(response.text) - {type(response.text)}")
|
| 111 |
|
| 112 |
if response.status_code == 200: # check if the request was successful
|
| 113 |
try:
|
| 114 |
json_obj = response.json()
|
| 115 |
-
print(f"type(response.json) - {type(json_obj)}")
|
| 116 |
-
print(f"response.json - {json_obj}")
|
| 117 |
if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
|
| 118 |
return json_obj[0]['generated_text']
|
| 119 |
elif 'error' in json_obj[0]:
|
|
@@ -199,12 +190,12 @@ chat_interface_batch=gr.ChatInterface(predict_batch,
|
|
| 199 |
with gr.Blocks() as demo:
|
| 200 |
|
| 201 |
with gr.Tab("Streaming"):
|
| 202 |
-
#
|
| 203 |
chatbot_stream.like(vote, None, None)
|
| 204 |
chat_interface_stream.render()
|
| 205 |
|
| 206 |
with gr.Tab("Batch"):
|
| 207 |
-
#
|
| 208 |
chatbot_batch.like(vote, None, None)
|
| 209 |
chat_interface_batch.render()
|
| 210 |
|
|
|
|
| 6 |
|
| 7 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 8 |
api_url = os.getenv('API_URL')
|
|
|
|
|
|
|
| 9 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 10 |
client = AsyncInferenceClient(api_url)
|
| 11 |
|
|
|
|
| 39 |
# <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
|
| 40 |
|
| 41 |
|
| 42 |
+
# Stream text - stream tokens with InferenceClient from TGI
|
| 43 |
async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
| 44 |
|
| 45 |
if system_prompt != "":
|
|
|
|
| 70 |
yield partial_message
|
| 71 |
|
| 72 |
|
| 73 |
+
# No Stream - batch produce tokens using TGI inference endpoint
|
| 74 |
def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
| 75 |
+
|
|
|
|
| 76 |
if system_prompt != "":
|
| 77 |
input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
|
| 78 |
else:
|
|
|
|
| 101 |
}
|
| 102 |
|
| 103 |
response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
if response.status_code == 200: # check if the request was successful
|
| 106 |
try:
|
| 107 |
json_obj = response.json()
|
|
|
|
|
|
|
| 108 |
if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
|
| 109 |
return json_obj[0]['generated_text']
|
| 110 |
elif 'error' in json_obj[0]:
|
|
|
|
| 190 |
with gr.Blocks() as demo:
|
| 191 |
|
| 192 |
with gr.Tab("Streaming"):
|
| 193 |
+
# streaming chatbot
|
| 194 |
chatbot_stream.like(vote, None, None)
|
| 195 |
chat_interface_stream.render()
|
| 196 |
|
| 197 |
with gr.Tab("Batch"):
|
| 198 |
+
# non-streaming chatbot
|
| 199 |
chatbot_batch.like(vote, None, None)
|
| 200 |
chat_interface_batch.render()
|
| 201 |
|