# -*- coding: utf-8 -*- """Untitled4.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/19SAJcA_N4eQVyeNjT1iFdgpyLvvtSSEw """ !pip install transformers datasets accelerate -q from google.colab import files uploaded = files.upload() from datasets import Dataset from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments import pandas as pd import torch # Load CSV file (adjust filename if needed) df = pd.read_csv("flan_t5_true_false_dataset.csv") # Convert to Hugging Face Dataset dataset = Dataset.from_pandas(df) # Load tokenizer and model model_name = "google/flan-t5-base" tokenizer = T5Tokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) # Preprocessing def preprocess(example): inputs = tokenizer(example["input"], padding="max_length", truncation=True, max_length=256) with tokenizer.as_target_tokenizer(): labels = tokenizer(example["output"], padding="max_length", truncation=True, max_length=64) inputs["labels"] = labels["input_ids"] return inputs # Tokenize dataset tokenized_dataset = dataset.map(preprocess, batched=True) # Define training arguments training_args = Seq2SeqTrainingArguments( output_dir="./flan_t5_finetuned_model", per_device_train_batch_size=4, per_device_eval_batch_size=4, # Added evaluation batch size num_train_epochs=3, save_steps=500, logging_steps=100, save_total_limit=1, fp16=torch.cuda.is_available() ) # Trainer setup trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, tokenizer=tokenizer, data_collator=DataCollatorForSeq2Seq(tokenizer, model) ) # Start training trainer.train() !zip -r flan_t5_finetuned_model.zip flan_t5_finetuned_model files.download("flan_t5_finetuned_model.zip") import pandas as pd data = [ { "input": f"Convert this fact into a true/false question: The moon is made of cheese {i}.", "output": f"The moon is made of cheese {i}. True or False?" } for i in range(150) ] df = pd.DataFrame(data) df.to_csv("flan_t5_eval.csv", index=False) from google.colab import files files.download('flan_t5_eval.csv') !pip install transformers datasets bert-score sentence-transformers -q from google.colab import files uploaded = files.upload() EVAL_CSV = "/content/flan_t5_eval.csv" !ls -l ./flan_t5_finetuned