Spaces:

jaymojnidar
/

mojchat

Paused

jaymojnidar commited on Sep 6, 2023

Commit

67e309d

1 Parent(s): 0e3ede0

resetting env var and trying to display tokens used

Files changed (2) hide show

app.py CHANGED Viewed

@@ -19,9 +19,6 @@ This Space demonstrates model [Llama-2-13b-chat](https://huggingface.co/meta-lla
 🔎 For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
-🔨 Looking for an even more powerful model? Check out the large [**70B** model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
-🐇 For a smaller model that you can run on many GPUs, check our [7B model demo](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat).
 """
 LICENSE = """
@@ -89,7 +86,11 @@ def check_input_token_length(message: str, chat_history: list[tuple[str, str]],
     input_token_length = get_input_token_length(message, chat_history, system_prompt)
     if input_token_length > MAX_INPUT_TOKEN_LENGTH:
         raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
 with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)

 🔎 For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
 """
 LICENSE = """
     input_token_length = get_input_token_length(message, chat_history, system_prompt)
     if input_token_length > MAX_INPUT_TOKEN_LENGTH:
         raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
+    DESCRIPTION += f'\ntokens used: {input_token_length}, max available  {MAX_INPUT_TOKEN_LENGTH}'
+def get_tokens_used(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> (int, int):
+    input_token_length = get_input_token_length(message, chat_history, system_prompt)
+    return input_token_length, MAX_INPUT_TOKEN_LENGTH
 with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)

model.py CHANGED Viewed

@@ -9,14 +9,12 @@ from huggingface_hub import login
 model_id = 'meta-llama/Llama-2-13b-chat-hf'
 if torch.cuda.is_available():
-    '''
     tok = os.environ['HF_TOKEN']
-    '''
     login(new_session=True,
-          write_permission=False
-         # token=tok
-          #, token="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"
           )
     config = AutoConfig.from_pretrained(model_id,

 model_id = 'meta-llama/Llama-2-13b-chat-hf'
 if torch.cuda.is_available():
     tok = os.environ['HF_TOKEN']
     login(new_session=True,
+          write_permission=False,
+         token=tok
           )
     config = AutoConfig.from_pretrained(model_id,