Vijayendra
/

Phi3-LoRA-GSM8k

@@ -4,13 +4,9 @@ library_name: peft
 ---
 How to use :
 ```python
-import torch
-from transformers import TextStreamer
-from unsloth import FastLanguageModel
-# Hugging Face repository details
-model_name = "Vijayendra/Phi3-LoRA-GSM8k"
 # Function to generate and solve problems using the fine-tuned model
 def generate_and_solve_problems(model, tokenizer, num_problems=5):
@@ -30,6 +26,7 @@ def generate_and_solve_problems(model, tokenizer, num_problems=5):
 ### Solution:"""
     test_problems = [
         "A car travels at 40 mph for 2 hours, then at 60 mph for another 3 hours. How far does it travel in total?",
         "If the sum of three consecutive integers is 72, what are the integers?",
@@ -38,30 +35,45 @@ def generate_and_solve_problems(model, tokenizer, num_problems=5):
         "If a person invests $1000 in a savings account that earns 5% annual interest compounded yearly, how much money will be in the account after 10 years?"
     ]
-    # Adjust for the requested number of problems
     test_problems = test_problems[:num_problems]
-    # Generate solutions
-    model.eval()  # Set model to evaluation mode
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model.to(device)  # Move model to the appropriate device
-    for idx, problem in enumerate(test_problems, 1):
-        # Format the problem into the prompt
-        input_text = test_prompt.format(problem)
-        inputs = tokenizer(input_text, return_tensors="pt").to(device)
-        # Generate a response
-        print(f"\nProblem {idx}: {problem}\nSolution:")
-        streamer = TextStreamer(tokenizer)
-        _ = model.generate(**inputs, streamer=streamer, max_new_tokens=512)
-        print("\n" + "=" * 80 + "\n")
-# Load the fine-tuned model and tokenizer from Hugging Face
-model, tokenizer = FastLanguageModel.from_pretrained(model_name, max_seq_length=2048)
-# Prepare the model for inference
-FastLanguageModel.for_inference(model)
-# Test the model by generating and solving problems
-generate_and_solve_problems(model, tokenizer, num_problems=5)

 ---
 How to use :
 ```python
+!pip install peft accelerate bitsandbytes
+from peft import PeftModel, PeftConfig
+from transformers import AutoModelForCausalLM, AutoTokenizer
 # Function to generate and solve problems using the fine-tuned model
 def generate_and_solve_problems(model, tokenizer, num_problems=5):
 ### Solution:"""
+    # Sample test problems
     test_problems = [
         "A car travels at 40 mph for 2 hours, then at 60 mph for another 3 hours. How far does it travel in total?",
         "If the sum of three consecutive integers is 72, what are the integers?",
         "If a person invests $1000 in a savings account that earns 5% annual interest compounded yearly, how much money will be in the account after 10 years?"
     ]
+    # Use only the specified number of problems
     test_problems = test_problems[:num_problems]
+    for problem in test_problems:
+        # Create the prompt
+        prompt = test_prompt.format(problem)
+        # Tokenize and generate response
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cuda")
+        outputs = model.generate(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            max_length=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+        )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Print the problem and the solution
+        print("### Problem:")
+        print(problem)
+        print("### Solution:")
+        print(response)
+        print("\n" + "="*50 + "\n")
+# Example usage with model and tokenizer
+base_model_name = "unsloth/phi-3-mini-4k-instruct-bnb-4bit"
+lora_model_name = "Vijayendra/Phi3-LoRA-GSM8k"
+# Load base model and tokenizer
+base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto", torch_dtype="auto")
+tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+# Load the fine-tuned LoRA model
+model = PeftModel.from_pretrained(base_model, lora_model_name)
+model.eval()
+# Call the function to solve problems
+generate_and_solve_problems(model, tokenizer)