Spaces:

dicklee2046
/

content

Sleeping

App Files Files Community

dicklee2046 commited on 21 days ago

Commit

f39e4e5

verified ·

1 Parent(s): e9e576f

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -21

app.py CHANGED Viewed

@@ -19,57 +19,67 @@ Original file is located at
 # !pip install Ninja
 import gradio as gr
-import gc, copy, re
 from huggingface_hub import hf_hub_download
 from pynvml import *
 # Flag to check if GPU is present
-HAS_GPU = True
 # Model title and context size limit
-ctx_limit = 8192
-title = "RWKV-5-H-World-7B"
-model_file = "rwkv-5-h-world-7B"
-#title = "RWKV-5-H-World-3B"
-#model_file = "rwkv-5-h-world-3B"
-# Get the GPU count
 try:
     nvmlInit()
     GPU_COUNT = nvmlDeviceGetCount()
     if GPU_COUNT > 0:
         HAS_GPU = True
         gpu_h = nvmlDeviceGetHandleByIndex(0)
 except NVMLError as error:
-    print(error)
 os.environ["RWKV_JIT_ON"] = '1'
 # Model strat to use
-MODEL_STRAT="cpu bf16"
-os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
 # Switch to GPU mode
-print(f"HAS_GPU = {HAS_GPU}")
-if HAS_GPU == True :
     os.environ["RWKV_CUDA_ON"] = '1'
-    MODEL_STRAT = "cuda bf16"
-    #MODEL_STRAT = "cuda bf16 *20 -> cpu fp32"
-    #MODEL_STRAT = "cuda fp16i8" # or "cuda int8" if that's supported
 print(f"MODEL_STRAT: {MODEL_STRAT}")
 # Load the model accordingly
-from rwkv.model import RWKV
 model_path = hf_hub_download(repo_id="a686d380/rwkv-5-h-world", filename=f"{model_file}.pth")
 model = RWKV(model=model_path, strategy=MODEL_STRAT)
-from rwkv.utils import PIPELINE, PIPELINE_ARGS
 pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
-print("RWKV model loaded successfully!")
 def generate_prompt(instruction, input=None, history=None):
     if instruction:

 # !pip install Ninja
 import gradio as gr
+import os, gc, copy, torch # Keep torch here for the CUDA_HOME fix
+from datetime import datetime
 from huggingface_hub import hf_hub_download
 from pynvml import *
+# Set CUDA_HOME explicitly for custom CUDA kernel compilation
+os.environ["CUDA_HOME"] = "/usr/local/cuda"
 # Flag to check if GPU is present
+HAS_GPU = False # Initialize to False, let pynvml determine
+GPU_COUNT = 0
 # Model title and context size limit
+ctx_limit = 2000
+# You are loading 3B here, which is good.
+title = "RWKV-5-H-World-3B" # This was causing OOM
+model_file = "rwkv-5-h-world-3B" # Stick with 3B for now
+#title = "RWKV-5-H-World-7B" # This was causing OOM
+#model_file = "rwkv-5-h-world-7B" # Stick with 7B for now
+# Get the GPU count (this part is fine, though pynvml might warn)
 try:
     nvmlInit()
     GPU_COUNT = nvmlDeviceGetCount()
     if GPU_COUNT > 0:
         HAS_GPU = True
         gpu_h = nvmlDeviceGetHandleByIndex(0)
+        # Removed .decode() as per previous fix
+        print(f"GPU detected: {nvmlDeviceGetName(gpu_h)} with {nvmlDeviceGetMemoryInfo(gpu_h).total / (1024**3):.2f} GB VRAM")
+    else:
+        print("No NVIDIA GPU detected. Will use CPU strategy.")
 except NVMLError as error:
+    print(f"NVIDIA driver not found or error: {error}. Will use CPU strategy.")
+except Exception as e: # Catch other potential errors during NVML init
+    print(f"An unexpected error occurred during GPU detection: {e}. Will use CPU strategy.")
 os.environ["RWKV_JIT_ON"] = '1'
 # Model strat to use
+MODEL_STRAT="cpu bf16" # Default to CPU
+os.environ["RWKV_CUDA_ON"] = '0' # Default to 0
 # Switch to GPU mode
+if HAS_GPU: # Use this more robust check
     os.environ["RWKV_CUDA_ON"] = '1'
+    MODEL_STRAT = "cuda bf16" # Keep bf16 for 3B model, as it fits.
+    # If you were to try 7B again, THIS is where you'd change to "cuda fp16i8"
 print(f"MODEL_STRAT: {MODEL_STRAT}")
 # Load the model accordingly
+from rwkv.model import RWKV # Keep this import here as per your working code structure
 model_path = hf_hub_download(repo_id="a686d380/rwkv-5-h-world", filename=f"{model_file}.pth")
 model = RWKV(model=model_path, strategy=MODEL_STRAT)
+from rwkv.utils import PIPELINE, PIPELINE_ARGS # Keep this import here
 pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
+print("RWKV model and pipeline loaded successfully!")
 def generate_prompt(instruction, input=None, history=None):
     if instruction: