Spaces:

ilyada
/

web_accessibility

Running

App Files Files Community

ilyada commited on May 28, 2024

Commit

d6e2e1a

verified ·

1 Parent(s): ff94b50

Create train_web_accessibility.py

Browse files

Files changed (1) hide show

train_web_accessibility.py +54 -0

train_web_accessibility.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from datasets import load_dataset
+from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
+import torch
+# Load the dataset
+dataset = load_dataset("ilyada/web_accessibility_dataset")
+# Load pre-trained model and tokenizer
+model_name = "bert-base-uncased"
+tokenizer = BertTokenizer.from_pretrained(model_name)
+model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
+# Tokenize the dataset
+def tokenize_function(examples):
+    return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+# Split the dataset into train and test
+train_test_split = tokenized_datasets["train"].train_test_split(test_size=0.2)
+train_dataset = train_test_split['train']
+test_dataset = train_test_split['test']
+# Define training arguments
+training_args = TrainingArguments(
+    output_dir="./results",
+    evaluation_strategy="epoch",
+    learning_rate=2e-5,
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=8,
+    num_train_epochs=3,
+    weight_decay=0.01,
+    push_to_hub=True,  # This enables pushing the model to Hugging Face Hub
+    hub_model_id="ilyada/web_accessibility_model",  # Replace with your Hugging Face model ID
+    hub_strategy="end",
+)
+# Initialize the Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=test_dataset,
+)
+# Train the model
+trainer.train()
+# Evaluate the model
+results = trainer.evaluate()
+print(results)
+# Push model to Hugging Face Hub
+trainer.push_to_hub()