jaymojnidar commited on
Commit
67e309d
Β·
1 Parent(s): 0e3ede0

resetting env var and trying to display tokens used

Browse files
Files changed (2) hide show
  1. app.py +4 -3
  2. model.py +4 -6
app.py CHANGED
@@ -19,9 +19,6 @@ This Space demonstrates model [Llama-2-13b-chat](https://huggingface.co/meta-lla
19
 
20
  πŸ”Ž For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
21
 
22
- πŸ”¨ Looking for an even more powerful model? Check out the large [**70B** model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
23
- πŸ‡ For a smaller model that you can run on many GPUs, check our [7B model demo](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat).
24
-
25
  """
26
 
27
  LICENSE = """
@@ -89,7 +86,11 @@ def check_input_token_length(message: str, chat_history: list[tuple[str, str]],
89
  input_token_length = get_input_token_length(message, chat_history, system_prompt)
90
  if input_token_length > MAX_INPUT_TOKEN_LENGTH:
91
  raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
 
92
 
 
 
 
93
 
94
  with gr.Blocks(css='style.css') as demo:
95
  gr.Markdown(DESCRIPTION)
 
19
 
20
  πŸ”Ž For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
21
 
 
 
 
22
  """
23
 
24
  LICENSE = """
 
86
  input_token_length = get_input_token_length(message, chat_history, system_prompt)
87
  if input_token_length > MAX_INPUT_TOKEN_LENGTH:
88
  raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
89
+ DESCRIPTION += f'\ntokens used: {input_token_length}, max available {MAX_INPUT_TOKEN_LENGTH}'
90
 
91
+ def get_tokens_used(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> (int, int):
92
+ input_token_length = get_input_token_length(message, chat_history, system_prompt)
93
+ return input_token_length, MAX_INPUT_TOKEN_LENGTH
94
 
95
  with gr.Blocks(css='style.css') as demo:
96
  gr.Markdown(DESCRIPTION)
model.py CHANGED
@@ -9,14 +9,12 @@ from huggingface_hub import login
9
  model_id = 'meta-llama/Llama-2-13b-chat-hf'
10
 
11
  if torch.cuda.is_available():
12
- '''
13
  tok = os.environ['HF_TOKEN']
14
- '''
15
  login(new_session=True,
16
- write_permission=False
17
- # token=tok
18
-
19
- #, token="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"
20
  )
21
 
22
  config = AutoConfig.from_pretrained(model_id,
 
9
  model_id = 'meta-llama/Llama-2-13b-chat-hf'
10
 
11
  if torch.cuda.is_available():
12
+
13
  tok = os.environ['HF_TOKEN']
14
+
15
  login(new_session=True,
16
+ write_permission=False,
17
+ token=tok
 
 
18
  )
19
 
20
  config = AutoConfig.from_pretrained(model_id,