Spaces:

xuxw98
/

TAPA

Runtime error

App Files Files Community

xuxw98 commited on Jul 1, 2023

Commit

ec09e34

1 Parent(s): 3e4df34

Upload app.py

Browse files

Files changed (1) hide show

app.py +64 -3

app.py CHANGED Viewed

@@ -2,7 +2,20 @@ import sys
 import time
 import warnings
 from pathlib import Path
 # 配置hugface环境
 from huggingface_hub import hf_hub_download
@@ -12,8 +25,34 @@ import glob
 import json
 # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-# torch.set_float32_matmul_precision("high")
 def instruct_generate(
@@ -42,17 +81,39 @@ def instruct_generate(
         top_k: The number of top most probable tokens to consider in the sampling process.
         temperature: A value controlling the randomness of the sampling process. Higher values result in more random
     """
-    output = [prompt, input, max_new_tokens, top_k, temperature]
     print(output)
     return output
 # 配置具体参数
 example_path = "example.json"
 # 1024如果不够, 调整为512
 max_seq_len = 1024
 max_batch_size = 1
 with open(example_path, 'r') as f:
     content = f.read()
     example_dict = json.loads(content)

 import time
 import warnings
 from pathlib import Path
+from typing import Optional
+import lightning as L
+import torch
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+from generate import generate
+from lit_llama import Tokenizer
+from lit_llama.adapter import LLaMA
+from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup
+from scripts.prepare_alpaca import generate_prompt
 # 配置hugface环境
 from huggingface_hub import hf_hub_download
 import json
 # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+torch.set_float32_matmul_precision("high")
+def model_load(
+    adapter_path: Path = Path("out/adapter/alpaca/lit-llama-adapter-finetuned_15k.pth"),
+    pretrained_path: Path = Path("checkpoints/lit-llama/7B/lit-llama.pth"),
+    quantize: Optional[str] = "llm.int8",
+):
+    fabric = L.Fabric(devices=1)
+    dtype = torch.bfloat16 if fabric.device.type == "cuda" and torch.cuda.is_bf16_supported() else torch.float32
+    with lazy_load(pretrained_path) as pretrained_checkpoint, lazy_load(adapter_path) as adapter_checkpoint:
+        name = llama_model_lookup(pretrained_checkpoint)
+        with EmptyInitOnDevice(
+                device=fabric.device, dtype=dtype, quantization_mode=quantize
+        ):
+            model = LLaMA.from_name(name)
+        # 1. Load the pretrained weights
+        model.load_state_dict(pretrained_checkpoint, strict=False)
+        # 2. Load the fine-tuned adapter weights
+        model.load_state_dict(adapter_checkpoint, strict=False)
+    model.eval()
+    model = fabric.setup_module(model)
+    return model
 def instruct_generate(
         top_k: The number of top most probable tokens to consider in the sampling process.
         temperature: A value controlling the randomness of the sampling process. Higher values result in more random
     """
+    sample = {"instruction": prompt, "input": input}
+    prompt = generate_prompt(sample)
+    encoded = tokenizer.encode(prompt, bos=True, eos=False, device=model.device)
+    # prompt_length = encoded.size(0)
+    y = generate(
+        model,
+        idx=encoded,
+        max_seq_length=max_new_tokens,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_k=top_k,
+        eos_id=tokenizer.eos_id
+    )
+    output = tokenizer.decode(y)
+    output = output.split("### Response:")[1].strip()
     print(output)
     return output
 # 配置具体参数
+pretrained_path = hf_hub_download(
+    repo_id="xxw/tapa_model", filename="lit-llama.pth")
+tokenizer_path = hf_hub_download(
+    repo_id="xxw/tapa_model", filename="tokenizer.model")
+adapter_path = "lit-llama-adapter-finetuned_15k.pth"
 example_path = "example.json"
 # 1024如果不够, 调整为512
 max_seq_len = 1024
 max_batch_size = 1
+model = model_load(adapter_path, pretrained_path)
+tokenizer = Tokenizer(tokenizer_path)
 with open(example_path, 'r') as f:
     content = f.read()
     example_dict = json.loads(content)