Spaces:
Paused
Paused
Commit
Β·
67e309d
1
Parent(s):
0e3ede0
resetting env var and trying to display tokens used
Browse files
app.py
CHANGED
@@ -19,9 +19,6 @@ This Space demonstrates model [Llama-2-13b-chat](https://huggingface.co/meta-lla
|
|
19 |
|
20 |
π For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
|
21 |
|
22 |
-
π¨ Looking for an even more powerful model? Check out the large [**70B** model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
|
23 |
-
π For a smaller model that you can run on many GPUs, check our [7B model demo](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat).
|
24 |
-
|
25 |
"""
|
26 |
|
27 |
LICENSE = """
|
@@ -89,7 +86,11 @@ def check_input_token_length(message: str, chat_history: list[tuple[str, str]],
|
|
89 |
input_token_length = get_input_token_length(message, chat_history, system_prompt)
|
90 |
if input_token_length > MAX_INPUT_TOKEN_LENGTH:
|
91 |
raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
|
|
|
92 |
|
|
|
|
|
|
|
93 |
|
94 |
with gr.Blocks(css='style.css') as demo:
|
95 |
gr.Markdown(DESCRIPTION)
|
|
|
19 |
|
20 |
π For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
|
21 |
|
|
|
|
|
|
|
22 |
"""
|
23 |
|
24 |
LICENSE = """
|
|
|
86 |
input_token_length = get_input_token_length(message, chat_history, system_prompt)
|
87 |
if input_token_length > MAX_INPUT_TOKEN_LENGTH:
|
88 |
raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
|
89 |
+
DESCRIPTION += f'\ntokens used: {input_token_length}, max available {MAX_INPUT_TOKEN_LENGTH}'
|
90 |
|
91 |
+
def get_tokens_used(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> (int, int):
|
92 |
+
input_token_length = get_input_token_length(message, chat_history, system_prompt)
|
93 |
+
return input_token_length, MAX_INPUT_TOKEN_LENGTH
|
94 |
|
95 |
with gr.Blocks(css='style.css') as demo:
|
96 |
gr.Markdown(DESCRIPTION)
|
model.py
CHANGED
@@ -9,14 +9,12 @@ from huggingface_hub import login
|
|
9 |
model_id = 'meta-llama/Llama-2-13b-chat-hf'
|
10 |
|
11 |
if torch.cuda.is_available():
|
12 |
-
|
13 |
tok = os.environ['HF_TOKEN']
|
14 |
-
|
15 |
login(new_session=True,
|
16 |
-
write_permission=False
|
17 |
-
|
18 |
-
|
19 |
-
#, token="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"
|
20 |
)
|
21 |
|
22 |
config = AutoConfig.from_pretrained(model_id,
|
|
|
9 |
model_id = 'meta-llama/Llama-2-13b-chat-hf'
|
10 |
|
11 |
if torch.cuda.is_available():
|
12 |
+
|
13 |
tok = os.environ['HF_TOKEN']
|
14 |
+
|
15 |
login(new_session=True,
|
16 |
+
write_permission=False,
|
17 |
+
token=tok
|
|
|
|
|
18 |
)
|
19 |
|
20 |
config = AutoConfig.from_pretrained(model_id,
|