Spaces:

Dumele
/

viv-final-autotrain

Sleeping

File size: 6,264 Bytes

abaea54
 
c9c4d75
 
9c2ae81
c9c4d75
 
9c2ae81
 
c9c4d75
9c2ae81
 
c9c4d75
9c2ae81
 
c9c4d75

import gradio as gr

from huggingface_hub import login

# ! pip install accelerate peft bitsandbytes pip install git+https://github.com/huggingface/transformers trl py7zr auto-gptq optimum

import torch
# from datasets import Dataset
# from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
# from trl import SFTTrainer
# import pandas as pd

# import json
# import pandas as pd

# def load_data_to_dataframe(json_file_path):
#     """
#     Load data from a JSON file and create a DataFrame with questions and answers.

#     Args:
#     json_file_path (str): Path to the JSON file.

#     Returns:
#     pd.DataFrame: DataFrame containing the questions and answers.
#     """
#     questions = []
#     answers = []

#     with open(json_file_path, 'r') as f:
#         data = json.load(f)

#         for entry in data:
#             for message in entry["messages"]:
#                 if message["role"] == "user":
#                     questions.append(message["content"])
#                 elif message["role"] == "assistant":
#                     answers.append(message["content"])

#     # Create DataFrame
#     df = pd.DataFrame({
#         'question': questions,
#         'answer': answers
#     })

#     return df

# def finetune_mistral_7b():
# # Replace 'your_token' with your actual Hugging Face token
#     json_file_path = 'Dataset for finetuning Viv.json'
#     df = load_data_to_dataframe(json_file_path)
#     df["text"] = df[["question", "answer"]].apply(lambda x: "###Human: Answer this question: " + x["question"] + "\n###Assistant: " +x["answer"], axis=1)
#     print(df.iloc[0])
#     data = Dataset.from_pandas(df)
#     tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
#     tokenizer.pad_token = tokenizer.eos_token
#     quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer)
#     model = AutoModelForCausalLM.from_pretrained(
#                               "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
#                               quantization_config=quantization_config_loading,
#                               device_map="auto"
#                         )

#     print(model)
#     model.config.use_cache = False
#     model.config.pretraining_tp = 1
#     model.gradient_checkpointing_enable()
#     model = prepare_model_for_kbit_training(model)

#     peft_config = LoraConfig(
#         r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
#     )
#     model = get_peft_model(model, peft_config)

#     training_arguments = TrainingArguments(
#         output_dir="mistral-finetuned-Viv",
#         per_device_train_batch_size=8,
#         gradient_accumulation_steps=1,
#         optim="paged_adamw_32bit",
#         learning_rate=2e-4,
#         lr_scheduler_type="cosine",
#         save_strategy="epoch",
#         logging_steps=100,
#         num_train_epochs=1,
#         max_steps=100,
#         fp16=True,
#         push_to_hub=True,
#         hub_model_id="Dumele/viv-updated2",  # Specify the repository name
#         hub_strategy="every_save"
#     )

#     trainer = SFTTrainer(
#         model=model,
#         train_dataset=data,
#         peft_config=peft_config,
#         dataset_text_field="text",
#         args=training_arguments,
#         tokenizer=tokenizer,
#         packing=False,
#         max_seq_length=512
#     )

#     trainer.train()
#     trainer.push_to_hub()

# if __name__ == "__main__":
#     finetune_mistral_7b()







from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch

# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2"  # Replace with your actual repository

# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)

# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)

# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)




from transformers import pipeline

# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Define a prompt
prompt = "###Human: Answer this question: What exactly does Viv do?\n###Assistant:"

# Generate text
generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)

# Print the generated text
print(generated_text[0]['generated_text'])

# pip install gradio

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr

# Define the repository where your model is saved
model_repo = "Dumele/viv-updated2"  # Replace with your actual repository name

# Load the tokenizer from the repository
tokenizer = AutoTokenizer.from_pretrained(model_repo)

# Define the configuration with `disable_exllama` set to True
quantization_config = GPTQConfig(bits=4, disable_exllama=True)

# Load the model with the custom configuration
model = AutoModelForCausalLM.from_pretrained(model_repo, quantization_config=quantization_config)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

def generate_response(prompt):
    generated_text = text_generator(prompt, max_length=100, num_return_sequences=1)
    return generated_text[0]['generated_text']

# Create a Gradio interface
iface = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
    outputs="text",
    title="Chat with VivBeta",
    description="Enter a prompt to interact with the fine-tuned model."
)

iface.launch()

# Commented out IPython magic to ensure Python compatibility.
# %%bash
#