Spaces:

ppaihack
/

zLlamaskClear

Sleeping

theostos commited on Sep 28, 2024

Commit

4289215

1 Parent(s): 6c3fbea

add llamask

Files changed (4) hide show

app.py CHANGED Viewed

@@ -1,21 +1,41 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
     history: list[tuple[str, str]],
-    system_message,
     max_tokens,
     temperature,
-    top_p,
 ):
-    return "test", []
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

 import gradio as gr
 from huggingface_hub import InferenceClient
+import torch
+from transformers import AutoTokenizer
+from .models.modeling_llamask import LlamaskForCausalLM
+from masked_llm.models.tokenizer_utils import generate_custom_mask, prepare_tokenizer
+model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+device = 'cpu'
+model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16)
+model = model.to(device)
+tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
+prepare_tokenizer(tokenizer)
 def respond(
     message,
     history: list[tuple[str, str]],
     max_tokens,
     temperature,
 ):
+    prompt = """<|start_header_id|>system<|end_header_id|>
+    You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
+    {message}
+    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
+    """
+    model_inputs = generate_custom_mask(tokenizer, [prompt], device)
+    outputs = model.generate(temperature=0.7, max_tokens=64, **model_inputs)
+    outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
+    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return result, []
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

{models → model}/modeling_llamask.py RENAMED Viewed

File without changes

{models → model}/tokenizer_utils.py RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

+huggingface_hub==0.22.2
+pyyaml
+transformers
+torch