Update app.py
Browse files
app.py
CHANGED
@@ -21,49 +21,3 @@ base_model_name = "meta-llama/Llama-3.1-8B"
|
|
21 |
# Your fine-tuned LoRA adapter (uploaded to Hugging Face)
|
22 |
lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned"
|
23 |
|
24 |
-
# Login because LLaMA 3.1 8B is a gated model
|
25 |
-
login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
|
26 |
-
print("After login")
|
27 |
-
# Enable 4-bit Quantization with BitsAndBytes
|
28 |
-
quantization_config = BitsAndBytesConfig(
|
29 |
-
load_in_4bit=True, # ✅ Enables 4-bit quantization for memory efficiency
|
30 |
-
bnb_4bit_compute_dtype=torch.float16, # ✅ Uses float16 for performance
|
31 |
-
bnb_4bit_use_double_quant=True, # ✅ Optimizes quantization
|
32 |
-
bnb_4bit_quant_type="nf4" # ✅ Normalized Float-4 for better accuracy
|
33 |
-
)
|
34 |
-
|
35 |
-
base_model = AutoModelForCausalLM.from_pretrained(
|
36 |
-
base_model_name,
|
37 |
-
quantization_config=quantization_config,
|
38 |
-
device_map="auto"
|
39 |
-
)
|
40 |
-
print("After basemodel")
|
41 |
-
# Load tokenizer
|
42 |
-
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
43 |
-
print("After Tokenizer")
|
44 |
-
# Load LoRA Adapter
|
45 |
-
print("Loading LoRA adapter...")
|
46 |
-
model = PeftModel.from_pretrained(base_model, lora_model_name)
|
47 |
-
print("After peft model")
|
48 |
-
# Function to generate responses
|
49 |
-
def chatbot_response(user_input):
|
50 |
-
try:
|
51 |
-
inputs = tokenizer(user_input, return_tensors="pt").to("cuda")
|
52 |
-
outputs = model.generate(**inputs, max_length=200)
|
53 |
-
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
54 |
-
except Exception as e:
|
55 |
-
error_message = f"AssertionError: {str(e)}\n{traceback.format_exc()}"
|
56 |
-
print(error_message) # ✅ Logs detailed error messages
|
57 |
-
return "An error occurred. Check the logs for details."
|
58 |
-
|
59 |
-
|
60 |
-
# Launch the Gradio chatbot
|
61 |
-
interface = gr.Interface(
|
62 |
-
fn=chatbot_response,
|
63 |
-
inputs=gr.Textbox(lines=2, placeholder="Ask me about the Christian Church Fathers..."),
|
64 |
-
outputs="text",
|
65 |
-
title="Early Christian Church Fathers Fine-Tuned LLaMA 3.1 8B with LoRA",
|
66 |
-
description="A chatbot using a fine-tuned LoRA adapter on LLaMA 3.1 8B, tuned on thousands of writings of the early Christian Church Fathers.",
|
67 |
-
)
|
68 |
-
|
69 |
-
interface.launch()
|
|
|
21 |
# Your fine-tuned LoRA adapter (uploaded to Hugging Face)
|
22 |
lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned"
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|