import gradio as gr import time import threading import random from datetime import datetime from datasets import load_dataset import pandas as pd # Global state class TrainingState: def __init__(self): self.status = "idle" self.progress = 0 self.logs = ["โœ… System initialized"] self.start_time = None self.model_name = "tasal9/pashto-base-bloom" self.active_process = None self.dataset_loaded = False self.dataset_info = "No dataset loaded" self.dataset_sample = pd.DataFrame() def load_dataset(self): try: self.logs.append("โณ Loading dataset: tasal9/ZamAi-Pashto-Datasets-V2") dataset = load_dataset("tasal9/ZamAi-Pashto-Datasets-V2") self.dataset_loaded = True self.dataset_info = f"โœ… Dataset loaded!\nName: ZamAi-Pashto-Datasets-V2\nSize: {len(dataset['train'])} examples" self.dataset_sample = pd.DataFrame(dataset['train'].select(range(5))) self.logs.append(f"๐Ÿ“Š {len(dataset['train'])} Pashto examples loaded") return True except Exception as e: self.logs.append(f"โŒ Error loading dataset: {str(e)}") self.dataset_info = f"Error: {str(e)}" return False def start_training(self, size): self.status = "training" self.progress = 0 self.logs = [f"๐Ÿ‹๏ธ Training started at {datetime.now().strftime('%H:%M:%S')}"] self.logs.append(f"๐Ÿ“ Data size: {size} characters") self.start_time = time.time() def start_finetuning(self, size): self.status = "fine-tuning" self.progress = 0 self.logs = [f"๐ŸŽฏ Fine-tuning started at {datetime.now().strftime('%H:%M:%S')}"] self.logs.append(f"๐Ÿ“ Data size: {size} characters") self.start_time = time.time() def update_progress(self, progress): self.progress = min(100, max(0, progress)) if progress >= 100: self.complete_process() def add_log(self, msg): self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}") if len(self.logs) > 15: self.logs.pop(0) def complete_process(self): elapsed = time.time() - self.start_time self.add_log(f"๐Ÿ {self.status.capitalize()} completed in {elapsed:.1f}s") self.status = "idle" self.progress = 100 with gr.Tab("๐Ÿ“Š Status"): with gr.Row(): status_box = gr.Textbox(label="Current Status", interactive=False) progress_bar = gr.Slider(minimum=0, maximum=1, value=0, step=0.01, interactive=False, label="Progress") log_output = gr.Textbox(label="Logs", lines=10, interactive=False) refresh_btn = gr.Button("๐Ÿ”„ Refresh Status") refresh_btn.click(get_current_status, outputs=[status_box, progress_bar, log_output]) state = TrainingState() def test_model(text): if not text.strip(): return "โ— Enter text to test." options = [ f"Processed: '{text}'", f"Model response to: {text}", f"Pashto analysis: {len(text)} characters", f"โœ… Got it: {text}", f"Generated: {text}... [simulated]", f"๐Ÿ” Words: {len(text.split())}" ] return random.choice(options) def simulate_process(duration, process_type, data_size): if process_type == "train": state.start_training(data_size) else: state.start_finetuning(data_size) steps = 10 for i in range(steps + 1): time.sleep(duration / steps) state.update_progress(int((i / steps) * 100)) if i % 3 == 0: state.add_log(random.choice([ f"Batch {i}/{steps}", f"Loss: {random.uniform(0.1, 1.0):.3f}", f"LR: {random.uniform(1e-5, 1e-3):.6f}", f"GPU: {random.randint(60, 95)}% (sim)", ])) state.complete_process() def train_model(text): if not text.strip(): return "โŒ Add training data.", "" if not state.dataset_loaded: return "โŒ Load dataset first.", "" if state.status != "idle": return "โณ Wait for current process.", "" threading.Thread(target=simulate_process, args=(15, "train", len(text)), daemon=True).start() return "โœ… Training started", "" def finetune_model(text): if not text.strip(): return "โŒ Add fine-tuning data.", "" if not state.dataset_loaded: return "โŒ Load dataset first.", "" if state.status != "idle": return "โณ Wait for current process.", "" threading.Thread(target=simulate_process, args=(10, "fine-tune", len(text)), daemon=True).start() return "โœ… Fine-tuning started", "" def load_hf_dataset(): ok = state.load_dataset() return { dataset_status: state.dataset_info, dataset_preview: state.dataset_sample if ok else pd.DataFrame(), dataset_btn: "โœ… Loaded" if ok else "Retry" } def get_current_status(): return { status_box: state.get_status(), progress_bar: state.progress / 100, log_output: "\n".join(state.logs) if state.logs else "No logs yet" } with gr.Blocks(title="Pashto Base Bloom Trainer", theme="soft") as demo: gr.Markdown("# ๐ŸŒธ Pashto-Base-Bloom Trainer") gr.Markdown("Train & fine-tune Pashto model: `tasal9/pashto-base-bloom`") with gr.Tab("๐Ÿ“‚ Dataset"): gr.Markdown("### Load Dataset from Hugging Face") with gr.Row(): dataset_btn = gr.Button("Load Dataset") dataset_status = gr.Textbox(label="Status", lines=2, interactive=False) dataset_preview = gr.DataFrame(label="Sample Preview", interactive=False) dataset_btn.click(load_hf_dataset, outputs=[dataset_status, dataset_preview, dataset_btn]) with gr.Tab("๐Ÿงช Test Model"): with gr.Row(): test_input = gr.Textbox(label="Input", lines=3) test_btn = gr.Button("Test") test_output = gr.Textbox(label="Output", lines=3, interactive=False) test_btn.click(test_model, inputs=test_input, outputs=test_output) with gr.Tab("๐Ÿ‹๏ธ Train"): train_input = gr.Textbox(label="Training Data", lines=6) train_btn = gr.Button("Start Training") train_output = gr.Textbox(label="Status", lines=2, interactive=False) train_btn.click(train_model, inputs=train_input, outputs=train_output) with gr.Tab("๐ŸŽฏ Fine-tune"): finetune_input = gr.Textbox(label="Fine-tuning Data", lines=6) finetune_btn = gr.Button("Start Fine-tuning") finetune_output = gr.Textbox(label="Status", lines=2, interactive=False) finetune_btn.click(finetune_model, inputs=finetune_input, outputs=finetune_output) with gr.Tab("๐Ÿ“Š Status"): with gr.Row(): status_box = gr.Textbox(label="Current Status", interactive=False) progress_bar = gr.Slider(minimum=0, maximum=1, value=0, step=0.01, interactive=False, label="Progress") log_output = gr.Textbox(label="Logs", lines=10, interactive=False) refresh_btn = gr.Button("๐Ÿ”„ Refresh") auto_refresh = gr.Checkbox(label="Auto-refresh every 5s", value=True) refresh_btn.click(get_current_status, outputs=[status_box, progress_bar, log_output]) auto_refresh_component = gr.Interval(5, visible=True) auto_refresh_component.click(get_current_status, outputs=[status_box, progress_bar, log_output], every=5) if __name__ == "__main__": demo.launch(share=True)