Spaces:
Runtime error
Runtime error
import gradio as gr | |
import time | |
import threading | |
import random | |
from datetime import datetime | |
from datasets import load_dataset | |
import pandas as pd | |
# Global state | |
class TrainingState: | |
def __init__(self): | |
self.status = "idle" | |
self.progress = 0 | |
self.logs = ["β System initialized"] | |
self.start_time = None | |
self.model_name = "tasal9/pashto-base-bloom" | |
self.active_process = None | |
self.dataset_loaded = False | |
self.dataset_info = "No dataset loaded" | |
self.dataset_sample = pd.DataFrame() | |
def load_dataset(self): | |
try: | |
self.logs.append("β³ Loading dataset: tasal9/ZamAi-Pashto-Datasets-V2") | |
dataset = load_dataset("tasal9/ZamAi-Pashto-Datasets-V2") | |
self.dataset_loaded = True | |
self.dataset_info = f"β Dataset loaded!\nName: ZamAi-Pashto-Datasets-V2\nSize: {len(dataset['train'])} examples" | |
self.dataset_sample = pd.DataFrame(dataset['train'].select(range(5))) | |
self.logs.append(f"π {len(dataset['train'])} Pashto examples loaded") | |
return True | |
except Exception as e: | |
self.logs.append(f"β Error loading dataset: {str(e)}") | |
self.dataset_info = f"Error: {str(e)}" | |
return False | |
def start_training(self, size): | |
self.status = "training" | |
self.progress = 0 | |
self.logs = [f"ποΈ Training started at {datetime.now().strftime('%H:%M:%S')}"] | |
self.logs.append(f"π Data size: {size} characters") | |
self.start_time = time.time() | |
def start_finetuning(self, size): | |
self.status = "fine-tuning" | |
self.progress = 0 | |
self.logs = [f"π― Fine-tuning started at {datetime.now().strftime('%H:%M:%S')}"] | |
self.logs.append(f"π Data size: {size} characters") | |
self.start_time = time.time() | |
def update_progress(self, progress): | |
self.progress = min(100, max(0, progress)) | |
if progress >= 100: | |
self.complete_process() | |
def add_log(self, msg): | |
self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}") | |
if len(self.logs) > 15: | |
self.logs.pop(0) | |
def complete_process(self): | |
elapsed = time.time() - self.start_time | |
self.add_log(f"π {self.status.capitalize()} completed in {elapsed:.1f}s") | |
self.status = "idle" | |
self.progress = 100 | |
with gr.Tab("π Status"): | |
with gr.Row(): | |
status_box = gr.Textbox(label="Current Status", interactive=False) | |
progress_bar = gr.Slider(minimum=0, maximum=1, value=0, step=0.01, interactive=False, label="Progress") | |
log_output = gr.Textbox(label="Logs", lines=10, interactive=False) | |
refresh_btn = gr.Button("π Refresh Status") | |
refresh_btn.click(get_current_status, outputs=[status_box, progress_bar, log_output]) | |
state = TrainingState() | |
def test_model(text): | |
if not text.strip(): | |
return "β Enter text to test." | |
options = [ | |
f"Processed: '{text}'", | |
f"Model response to: {text}", | |
f"Pashto analysis: {len(text)} characters", | |
f"β Got it: {text}", | |
f"Generated: {text}... [simulated]", | |
f"π Words: {len(text.split())}" | |
] | |
return random.choice(options) | |
def simulate_process(duration, process_type, data_size): | |
if process_type == "train": | |
state.start_training(data_size) | |
else: | |
state.start_finetuning(data_size) | |
steps = 10 | |
for i in range(steps + 1): | |
time.sleep(duration / steps) | |
state.update_progress(int((i / steps) * 100)) | |
if i % 3 == 0: | |
state.add_log(random.choice([ | |
f"Batch {i}/{steps}", | |
f"Loss: {random.uniform(0.1, 1.0):.3f}", | |
f"LR: {random.uniform(1e-5, 1e-3):.6f}", | |
f"GPU: {random.randint(60, 95)}% (sim)", | |
])) | |
state.complete_process() | |
def train_model(text): | |
if not text.strip(): | |
return "β Add training data.", "" | |
if not state.dataset_loaded: | |
return "β Load dataset first.", "" | |
if state.status != "idle": | |
return "β³ Wait for current process.", "" | |
threading.Thread(target=simulate_process, args=(15, "train", len(text)), daemon=True).start() | |
return "β Training started", "" | |
def finetune_model(text): | |
if not text.strip(): | |
return "β Add fine-tuning data.", "" | |
if not state.dataset_loaded: | |
return "β Load dataset first.", "" | |
if state.status != "idle": | |
return "β³ Wait for current process.", "" | |
threading.Thread(target=simulate_process, args=(10, "fine-tune", len(text)), daemon=True).start() | |
return "β Fine-tuning started", "" | |
def load_hf_dataset(): | |
ok = state.load_dataset() | |
return { | |
dataset_status: state.dataset_info, | |
dataset_preview: state.dataset_sample if ok else pd.DataFrame(), | |
dataset_btn: "β Loaded" if ok else "Retry" | |
} | |
def get_current_status(): | |
return { | |
status_box: state.get_status(), | |
progress_bar: state.progress / 100, | |
log_output: "\n".join(state.logs) if state.logs else "No logs yet" | |
} | |
with gr.Blocks(title="Pashto Base Bloom Trainer", theme="soft") as demo: | |
gr.Markdown("# πΈ Pashto-Base-Bloom Trainer") | |
gr.Markdown("Train & fine-tune Pashto model: `tasal9/pashto-base-bloom`") | |
with gr.Tab("π Dataset"): | |
gr.Markdown("### Load Dataset from Hugging Face") | |
with gr.Row(): | |
dataset_btn = gr.Button("Load Dataset") | |
dataset_status = gr.Textbox(label="Status", lines=2, interactive=False) | |
dataset_preview = gr.DataFrame(label="Sample Preview", interactive=False) | |
dataset_btn.click(load_hf_dataset, outputs=[dataset_status, dataset_preview, dataset_btn]) | |
with gr.Tab("π§ͺ Test Model"): | |
with gr.Row(): | |
test_input = gr.Textbox(label="Input", lines=3) | |
test_btn = gr.Button("Test") | |
test_output = gr.Textbox(label="Output", lines=3, interactive=False) | |
test_btn.click(test_model, inputs=test_input, outputs=test_output) | |
with gr.Tab("ποΈ Train"): | |
train_input = gr.Textbox(label="Training Data", lines=6) | |
train_btn = gr.Button("Start Training") | |
train_output = gr.Textbox(label="Status", lines=2, interactive=False) | |
train_btn.click(train_model, inputs=train_input, outputs=train_output) | |
with gr.Tab("π― Fine-tune"): | |
finetune_input = gr.Textbox(label="Fine-tuning Data", lines=6) | |
finetune_btn = gr.Button("Start Fine-tuning") | |
finetune_output = gr.Textbox(label="Status", lines=2, interactive=False) | |
finetune_btn.click(finetune_model, inputs=finetune_input, outputs=finetune_output) | |
with gr.Tab("π Status"): | |
with gr.Row(): | |
status_box = gr.Textbox(label="Current Status", interactive=False) | |
progress_bar = gr.Slider(minimum=0, maximum=1, value=0, step=0.01, interactive=False, label="Progress") | |
log_output = gr.Textbox(label="Logs", lines=10, interactive=False) | |
refresh_btn = gr.Button("π Refresh") | |
auto_refresh = gr.Checkbox(label="Auto-refresh every 5s", value=True) | |
refresh_btn.click(get_current_status, outputs=[status_box, progress_bar, log_output]) | |
auto_refresh_component = gr.Interval(5, visible=True) | |
auto_refresh_component.click(get_current_status, outputs=[status_box, progress_bar, log_output], every=5) | |
if __name__ == "__main__": | |
demo.launch(share=True) |