Spaces:
Sleeping
Sleeping
OpeoluwaAdekoya
commited on
Commit
•
c9c4d75
1
Parent(s):
4714732
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,193 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
from huggingface_hub import login
|
4 |
+
|
5 |
+
! pip install accelerate peft bitsandbytes pip install git+https://github.com/huggingface/transformers trl py7zr auto-gptq optimum
|
6 |
+
|
7 |
+
import torch
|
8 |
+
from datasets import Dataset
|
9 |
+
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
|
10 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
|
11 |
+
from trl import SFTTrainer
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
import json
|
15 |
+
import pandas as pd
|
16 |
+
|
17 |
+
# def load_data_to_dataframe(json_file_path):
|
18 |
+
# """
|
19 |
+
# Load data from a JSON file and create a DataFrame with questions and answers.
|
20 |
+
|
21 |
+
# Args:
|
22 |
+
# json_file_path (str): Path to the JSON file.
|
23 |
+
|
24 |
+
# Returns:
|
25 |
+
# pd.DataFrame: DataFrame containing the questions and answers.
|
26 |
+
# """
|
27 |
+
# questions = []
|
28 |
+
# answers = []
|
29 |
+
|
30 |
+
# with open(json_file_path, 'r') as f:
|
31 |
+
# data = json.load(f)
|
32 |
+
|
33 |
+
# for entry in data:
|
34 |
+
# for message in entry["messages"]:
|
35 |
+
# if message["role"] == "user":
|
36 |
+
# questions.append(message["content"])
|
37 |
+
# elif message["role"] == "assistant":
|
38 |
+
# answers.append(message["content"])
|
39 |
+
|
40 |
+
# # Create DataFrame
|
41 |
+
# df = pd.DataFrame({
|
42 |
+
# 'question': questions,
|
43 |
+
# 'answer': answers
|
44 |
+
# })
|
45 |
+
|
46 |
+
# return df
|
47 |
+
|
48 |
+
# def finetune_mistral_7b():
|
49 |
+
# # Replace 'your_token' with your actual Hugging Face token
|
50 |
+
# json_file_path = 'Dataset for finetuning Viv.json'
|
51 |
+
# df = load_data_to_dataframe(json_file_path)
|
52 |
+
# df["text"] = df[["question", "answer"]].apply(lambda x: "###Human: Answer this question: " + x["question"] + "\n###Assistant: " +x["answer"], axis=1)
|
53 |
+
# print(df.iloc[0])
|
54 |
+
# data = Dataset.from_pandas(df)
|
55 |
+
# tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
|
56 |
+
# tokenizer.pad_token = tokenizer.eos_token
|
57 |
+
# quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer)
|
58 |
+
# model = AutoModelForCausalLM.from_pretrained(
|
59 |
+
# "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
|
60 |
+
# quantization_config=quantization_config_loading,
|
61 |
+
# device_map="auto"
|
62 |
+
# )
|
63 |
+
|
64 |
+
# print(model)
|
65 |
+
# model.config.use_cache = False
|
66 |
+
# model.config.pretraining_tp = 1
|
67 |
+
# model.gradient_checkpointing_enable()
|
68 |
+
# model = prepare_model_for_kbit_training(model)
|
69 |
+
|
70 |
+
# peft_config = LoraConfig(
|
71 |
+
# r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
|
72 |
+
# )
|
73 |
+
# model = get_peft_model(model, peft_config)
|
74 |
+
|
75 |
+
# training_arguments = TrainingArguments(
|
76 |
+
# output_dir="mistral-finetuned-Viv",
|
77 |
+
# per_device_train_batch_size=8,
|
78 |
+
# gradient_accumulation_steps=1,
|
79 |
+
# optim="paged_adamw_32bit",
|
80 |
+
# learning_rate=2e-4,
|
81 |
+
# lr_scheduler_type="cosine",
|
82 |
+
# save_strategy="epoch",
|
83 |
+
# logging_steps=100,
|
84 |
+
# num_train_epochs=1,
|
85 |
+
# max_steps=100,
|
86 |
+
# fp16=True,
|
87 |
+
# push_to_hub=True,
|
88 |
+
# hub_model_id="Dumele/viv-updated2", # Specify the repository name
|
89 |
+
# hub_strategy="every_save"
|
90 |
+
# )
|
91 |
+
|
92 |
+
# trainer = SFTTrainer(
|
93 |
+
# model=model,
|
94 |
+
# train_dataset=data,
|
95 |
+
# peft_config=peft_config,
|
96 |
+
# dataset_text_field="text",
|
97 |
+
# args=training_arguments,
|
98 |
+
# tokenizer=tokenizer,
|
99 |
+
# packing=False,
|
100 |
+
# max_seq_length=512
|
101 |
+
# )
|
102 |
+
|
103 |
+
# trainer.train()
|
104 |
+
# trainer.push_to_hub()
|
105 |
+
|
106 |
+
# if __name__ == "__main__":
|
107 |
+
# finetune_mistral_7b()
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
|
116 |
+
import torch
|
117 |
+
|
118 |
+
# Define the repository where your model is saved
|
119 |
+
model_repo = "Dumele/viv-updated2" # Replace with your actual repository
|
120 |
+
|
121 |
+
# Load the tokenizer from the repository
|
122 |
+
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
123 |
+
|
124 |
+
# Define the configuration with `disable_exllama` set to True
|
125 |
+
quantization_config = GPTQConfig(bits=4, disable_exllama=True)
|
126 |
+
|
127 |
+
# Load the model with the custom configuration
|
128 |
+
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
|
129 |
+
|
130 |
+
# Move the model to GPU if available
|
131 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
132 |
+
model.to(device)
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
from transformers import pipeline
|
138 |
+
|
139 |
+
# Create a text generation pipeline
|
140 |
+
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
|
141 |
+
|
142 |
+
# Define a prompt
|
143 |
+
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"
|
144 |
+
|
145 |
+
# Generate text
|
146 |
+
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
|
147 |
+
|
148 |
+
# Print the generated text
|
149 |
+
print(generated_text[0]['generated_text'])
|
150 |
+
|
151 |
+
# pip install gradio
|
152 |
+
|
153 |
+
import torch
|
154 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
155 |
+
import gradio as gr
|
156 |
+
|
157 |
+
# Define the repository where your model is saved
|
158 |
+
model_repo = "Dumele/viv-updated2" # Replace with your actual repository name
|
159 |
+
|
160 |
+
# Load the tokenizer from the repository
|
161 |
+
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
162 |
+
|
163 |
+
# Define the configuration with `disable_exllama` set to True
|
164 |
+
quantization_config = GPTQConfig(bits=4, disable_exllama=True)
|
165 |
+
|
166 |
+
# Load the model with the custom configuration
|
167 |
+
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)
|
168 |
+
|
169 |
+
# Move the model to GPU if available
|
170 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
171 |
+
model.to(device)
|
172 |
+
|
173 |
+
# Create a text generation pipeline
|
174 |
+
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
|
175 |
+
|
176 |
+
def generate_response(prompt):
|
177 |
+
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
|
178 |
+
return generated_text[0]['generated_text']
|
179 |
+
|
180 |
+
# Create a Gradio interface
|
181 |
+
iface = gr.Interface(
|
182 |
+
fn=generate_response,
|
183 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
184 |
+
outputs="text",
|
185 |
+
title="Chat with VivBeta",
|
186 |
+
description="Enter a prompt to interact with the fine-tuned model."
|
187 |
+
)
|
188 |
+
|
189 |
+
iface.launch()
|
190 |
+
|
191 |
+
# Commented out IPython magic to ensure Python compatibility.
|
192 |
+
# %%bash
|
193 |
+
#
|