|
try: |
|
from rkllm.api import RKLLM |
|
except ImportError: |
|
print("RKLLM not installed. Please install from wheel 'https://github.com/airockchip/rknn-llm'.") |
|
|
|
|
|
llm = RKLLM() |
|
from getpass import getpass |
|
from huggingface_hub import snapshot_download, hf_hub_download |
|
|
|
def DownloadLoraModel(token) : |
|
|
|
repo_id = "Prince-1/orpheus-3b-0.1-ft_4_25" |
|
local_dir = "OrpheusLora" |
|
print("Downloading Lora model from Hugging Face Hub...") |
|
|
|
snapshot_download(repo_id=repo_id, local_dir=local_dir, token= token) |
|
print("Lora model downloaded successfully.") |
|
|
|
print("Downloading main model from Hugging Face Hub...") |
|
repo_id = "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" |
|
local_dir = "OrpheusMain" |
|
snapshot_download(repo_id=repo_id, local_dir=local_dir, token= token) |
|
print("Main model downloaded successfully.") |
|
|
|
|
|
return ("OrpheusMain","OrpheusLora") |
|
|
|
def DownloadGGUF(token) : |
|
print("Downloading GGUF model from Hugging Face Hub...") |
|
path = hf_hub_download(repo_id="Prince-1/orpheus_3b_0.1_GGUF", filename="unsloth.F16.gguf",token= token,local_dir="GGUF") |
|
print("GGUF model downloaded successfully.") |
|
return path |
|
|
|
|
|
def UsingHf(llm,modelpath,modelLora) : |
|
|
|
print("Loading model...") |
|
print(modelpath,modelLora) |
|
ret = llm.load_huggingface(model=modelpath, model_lora = modelLora,device='cpu') |
|
|
|
if ret != 0: |
|
print('Load model failed!') |
|
exit(ret) |
|
return llm |
|
|
|
def UsingGGUF(llm,modelpath) : |
|
print("Loading model...") |
|
ret = llm.load_gguf(model=modelpath) |
|
|
|
if ret != 0: |
|
print('Load model failed!') |
|
exit(ret) |
|
return llm |
|
|
|
|
|
password = getpass("Please Enter your Hugging Face Token: ") |
|
if password == "" : |
|
print("No token provided.") |
|
exit(1) |
|
|
|
|
|
|
|
while True : |
|
print("Do you want to download Lora model or GGUF model ?") |
|
print("1. Lora") |
|
print("2. GGUF") |
|
i = input() |
|
if i == "1" : |
|
main,lora = DownloadLoraModel(password) |
|
UsingHf(llm,main,lora) |
|
|
|
break |
|
elif i == "2" : |
|
gguf = DownloadGGUF(password) |
|
UsingGGUF(llm,gguf) |
|
break |
|
else : |
|
print("Invalid input. Please enter 1 or 2.") |
|
continue |
|
|
|
|
|
|
|
|
|
dataset = None |
|
qparams = None |
|
target_platform = "RK3588" |
|
optimization_level = 1 |
|
quantized_dtype = "w8a8" |
|
quantized_algorithm = "normal" |
|
num_npu_core = 3 |
|
|
|
print("Building model...") |
|
ret = llm.build( |
|
do_quantization=False,optimization_level=optimization_level, |
|
quantized_dtype=quantized_dtype,quantized_algorithm=quantized_algorithm, |
|
target_platform=target_platform, num_npu_core=num_npu_core, |
|
extra_qparams=qparams, dataset=dataset) |
|
if ret != 0: |
|
print('Build model failed!') |
|
exit(ret) |
|
|
|
print("Model Build successfully.") |
|
|
|
|
|
ret =llm.export_rkllm(f"orpheus_3b_0.1_ft_{quantized_dtype}_{target_platform[2:]}.rkllm") |
|
if ret != 0: |
|
print('Export model failed!') |
|
exit(ret) |
|
|
|
print("Model Export successfully.") |