Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,11 @@ import gradio as gr
|
|
| 2 |
import os
|
| 3 |
from datetime import datetime
|
| 4 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
title = "RWKV-4 14B fp16
|
| 7 |
desc = '''Links:
|
| 8 |
<a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 1em">ChatRWKV</a>
|
| 9 |
<a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 1em">RWKV-LM</a>
|
|
@@ -14,8 +17,10 @@ os.environ["RWKV_JIT_ON"] = '1'
|
|
| 14 |
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
| 15 |
|
| 16 |
from rwkv.model import RWKV
|
| 17 |
-
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-
|
| 18 |
-
model = RWKV(model=model_path, strategy='cuda fp16 *
|
|
|
|
|
|
|
| 19 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
| 20 |
pipeline = PIPELINE(model, "20B_tokenizer.json")
|
| 21 |
|
|
@@ -39,13 +44,16 @@ def infer(
|
|
| 39 |
else:
|
| 40 |
ctx = f'\n{ctx.strip()}'
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
all_tokens = []
|
| 43 |
out_last = 0
|
| 44 |
out_str = ''
|
| 45 |
occurrence = {}
|
| 46 |
state = None
|
| 47 |
for i in range(int(token_count)):
|
| 48 |
-
out, state = model.forward(pipeline.encode(ctx)[:
|
| 49 |
for n in args.token_ban:
|
| 50 |
out[n] = -float('inf')
|
| 51 |
for n in occurrence:
|
|
|
|
| 2 |
import os
|
| 3 |
from datetime import datetime
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
+
from pynvml import *
|
| 6 |
+
nvmlInit()
|
| 7 |
+
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
| 8 |
|
| 9 |
+
title = "RWKV-4 14B fp16 ctx1024"
|
| 10 |
desc = '''Links:
|
| 11 |
<a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 1em">ChatRWKV</a>
|
| 12 |
<a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 1em">RWKV-LM</a>
|
|
|
|
| 17 |
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
| 18 |
|
| 19 |
from rwkv.model import RWKV
|
| 20 |
+
# model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
|
| 21 |
+
# model = RWKV(model=model_path, strategy='cuda fp16 *34 -> cpu fp32')
|
| 22 |
+
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
|
| 23 |
+
model = RWKV(model=model_path, strategy='cuda fp16')
|
| 24 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
| 25 |
pipeline = PIPELINE(model, "20B_tokenizer.json")
|
| 26 |
|
|
|
|
| 44 |
else:
|
| 45 |
ctx = f'\n{ctx.strip()}'
|
| 46 |
|
| 47 |
+
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
|
| 48 |
+
print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
|
| 49 |
+
|
| 50 |
all_tokens = []
|
| 51 |
out_last = 0
|
| 52 |
out_str = ''
|
| 53 |
occurrence = {}
|
| 54 |
state = None
|
| 55 |
for i in range(int(token_count)):
|
| 56 |
+
out, state = model.forward(pipeline.encode(ctx)[:1024] if i == 0 else [token], state)
|
| 57 |
for n in args.token_ban:
|
| 58 |
out[n] = -float('inf')
|
| 59 |
for n in occurrence:
|