Remostart commited on
Commit
edc81e3
·
verified ·
1 Parent(s): a685b90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -1,16 +1,20 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
  from spaces import GPU
5
 
6
- # Load model & tokenizer
7
- MODEL_NAME = "ubiodee/plutus_llm"
8
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
9
- model = AutoModelForCausalLM.from_pretrained(
10
- MODEL_NAME,
11
- torch_dtype=torch.float16, # Use FP16 to reduce memory usage
12
- device_map="auto" # Automatically distribute across CPU/GPU
 
13
  )
 
 
14
 
15
  # Set padding token
16
  if tokenizer.pad_token is None:
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel
5
  from spaces import GPU
6
 
7
+ # Load base model and tokenizer
8
+ BASE_MODEL_NAME = "NousResearch/Meta-Llama-3-8B"
9
+ LORA_MODEL_NAME = "ubiodee/plutus_llm"
10
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, use_fast=False)
11
+ base_model = AutoModelForCausalLM.from_pretrained(
12
+ BASE_MODEL_NAME,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto"
15
  )
16
+ # Apply LoRA weights
17
+ model = PeftModel.from_pretrained(base_model, LORA_MODEL_NAME)
18
 
19
  # Set padding token
20
  if tokenizer.pad_token is None: