tasal9 commited on
Commit
885cdd2
Β·
verified Β·
1 Parent(s): 09ab5d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -188
app.py CHANGED
@@ -1,193 +1,69 @@
1
  import gradio as gr
2
  import time
3
- import threading
4
- import random
5
- from datetime import datetime
6
 
7
- # Global state to track training/fine-tuning status
8
- class TrainingState:
9
  def __init__(self):
10
- self.status = "idle"
11
- self.progress = 0
12
- self.logs = []
13
- self.start_time = None
14
- self.model_name = "tasal9/pashto-base-bloom"
15
- self.active_process = None
16
-
17
- def start_training(self, data_size):
18
- self.status = "training"
19
- self.progress = 0
20
- self.logs = [f"Training started at {datetime.now().strftime('%H:%M:%S')}"]
21
- self.logs.append(f"Training data size: {data_size} characters")
22
- self.start_time = time.time()
23
- from huggingface_hub import spaces
24
-
25
-
26
- def start_finetuning(self, data_size):
27
- self.status = "fine-tuning"
28
- self.progress = 0
29
- self.logs = [f"Fine-tuning started at {datetime.now().strftime('%H:%M:%S')}"]
30
- self.logs.append(f"Fine-tuning data size: {data_size} characters")
31
- self.start_time = time.time()
32
-
33
- def update_progress(self, progress):
34
- self.progress = min(100, max(0, progress))
35
- if progress >= 100 and self.status != "idle":
36
- self.complete_process()
37
-
38
- def add_log(self, message):
39
- self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {message}")
40
- if len(self.logs) > 10:
41
- self.logs.pop(0)
42
-
43
- def complete_process(self):
44
- elapsed = time.time() - self.start_time if self.start_time else 0
45
- current_status = self.status
46
- self.add_log(f"{current_status.capitalize()} completed in {elapsed:.1f} seconds!")
47
- self.status = "idle"
48
- self.progress = 100
49
-
 
 
 
 
 
 
 
 
 
50
  def get_status(self):
51
- status_map = {
52
- "idle": "βœ… Ready",
53
- "training": "πŸ”„ Training in progress",
54
- "fine-tuning": "πŸ”„ Fine-tuning in progress"
55
- }
56
- return status_map.get(self.status, "❓ Unknown status")
57
-
58
- # Create global state
59
- state = TrainingState()
60
-
61
- def test_model(input_text):
62
- if not input_text.strip():
63
- return "Please enter some text to test."
64
- responses = [
65
- f"Processed: '{input_text}'",
66
- f"Model response to: {input_text}",
67
- f"Analysis: This appears to be Pashto text with {len(input_text)} characters",
68
- f"βœ… Received: {input_text}",
69
- f"Generated continuation: {input_text}... [simulated output]"
70
- ]
71
- return random.choice(responses)
72
-
73
- def simulate_process(duration, process_type, data_size):
74
- if process_type == "train":
75
- state.start_training(data_size)
76
- else:
77
- state.start_finetuning(data_size)
78
-
79
- steps = 10
80
- for i in range(steps + 1):
81
- time.sleep(duration / steps)
82
- progress = int((i / steps) * 100)
83
- state.update_progress(progress)
84
- if i % 3 == 0:
85
- messages = [
86
- f"Processing batch {i*5}/{steps*5}",
87
- f"Loss: {random.uniform(0.1, 1.0):.4f}",
88
- f"Accuracy: {random.uniform(80, 95):.1f}%",
89
- f"Learning rate: {random.uniform(1e-5, 1e-3):.6f}"
90
- ]
91
- state.add_log(random.choice(messages))
92
- state.complete_process()
93
-
94
- def train_model(dataset_text):
95
- if not dataset_text.strip():
96
- return "Please provide training data.", ""
97
- data_size = len(dataset_text)
98
- if getattr(state, 'status', 'idle') != "idle":
99
- return "Another process is already running. Please wait.", ""
100
- threading.Thread(
101
- target=simulate_process,
102
- args=(15, "train", data_size),
103
- daemon=True
104
- ).start()
105
- return "Training started successfully! Check status in the Status tab.", ""
106
-
107
- from huggingface_hub import spaces
108
-
109
- @spaces.GPU
110
- def finetune_model(dataset_text):
111
- if not dataset_text.strip():
112
- return "Please provide fine-tuning data.", ""
113
- data_size = len(dataset_text)
114
- if getattr(state, 'status', 'idle') != "idle":
115
- return "Another process is already running. Please wait.", ""
116
- threading.Thread(
117
- target=simulate_process,
118
- args=(10, "fine-tune", data_size),
119
- daemon=True
120
- ).start()
121
- return "Fine-tuning started successfully! Check status in the Status tab.", ""
122
-
123
- # Interface placeholders (declared early to link in refresh)
124
- status_box = gr.Textbox(label="Current Status", interactive=False)
125
- progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
126
- log_output = gr.Textbox(label="Process Logs", lines=8, interactive=False)
127
-
128
- def get_current_status():
129
- try:
130
- status_text = state.get_status()
131
- except Exception as e:
132
- status_text = f"❌ Error: {str(e)}"
133
-
134
- try:
135
- progress = state.progress
136
- except:
137
- progress = 0
138
-
139
- try:
140
- logs = "\n".join(state.logs[-10:]) if hasattr(state, 'logs') else "No logs available"
141
- except:
142
- logs = "❌ Failed to retrieve logs."
143
-
144
- return {
145
- status_box: status_text,
146
- progress_bar: progress,
147
- log_output: logs
148
- }
149
-
150
- # Create interface
151
- with gr.Blocks(title="Pashto-Base-Bloom Trainer", theme="soft") as demo:
152
- gr.Markdown("# 🌸 Pashto-Base-Bloom Training Space")
153
- gr.Markdown("Train and fine-tune Pashto language model `tasal9/pashto-base-bloom`")
154
-
155
- with gr.Tab("Test Model"):
156
- gr.Markdown("### Test Model with Sample Text")
157
- with gr.Row():
158
- with gr.Column():
159
- test_input = gr.Textbox(label="Input Text", lines=3, placeholder="Enter Pashto text here...")
160
- test_btn = gr.Button("Run Test", variant="primary")
161
- test_output = gr.Textbox(label="Model Output", lines=4, interactive=False)
162
- test_btn.click(test_model, inputs=test_input, outputs=test_output)
163
-
164
- with gr.Tab("Train Model"):
165
- gr.Markdown("### Train Model with New Data")
166
- with gr.Row():
167
- with gr.Column():
168
- train_input = gr.Textbox(label="Training Data", lines=8, placeholder="Paste training dataset here...")
169
- train_btn = gr.Button("Start Training", variant="primary")
170
- train_output = gr.Textbox(label="Training Status", lines=2, interactive=False)
171
- train_btn.click(train_model, inputs=train_input, outputs=train_output)
172
-
173
- with gr.Tab("Fine-tune Model"):
174
- gr.Markdown("### Fine-tune Model with Specialized Data")
175
- with gr.Row():
176
- with gr.Column():
177
- finetune_input = gr.Textbox(label="Fine-tuning Data", lines=8, placeholder="Paste fine-tuning dataset here...")
178
- finetune_btn = gr.Button("Start Fine-tuning", variant="primary")
179
- finetune_output = gr.Textbox(label="Fine-tuning Status", lines=2, interactive=False)
180
- finetune_btn.click(finetune_model, inputs=finetune_input, outputs=finetune_output)
181
-
182
- with gr.Tab("Status"):
183
- gr.Markdown("### System Status")
184
- with gr.Row():
185
- with gr.Column():
186
- refresh_btn = gr.Button("πŸ”„ Refresh Status", variant="secondary")
187
- auto_refresh = gr.Checkbox(label="Auto-refresh every 5 seconds", value=True)
188
- # UI elements already declared globally
189
- refresh_btn.click(get_current_status, outputs=[status_box, progress_bar, log_output])
190
- demo.load(get_current_status, outputs=[status_box, progress_bar, log_output])
191
-
192
- if __name__ == "__main__":
193
- demo.launch(server_port=7860, ssr_mode=False, share=False)
 
1
  import gradio as gr
2
  import time
3
+ from datasets import load_dataset
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
 
5
 
6
+ class FineTuneApp:
 
7
  def __init__(self):
8
+ self.status = "Idle"
9
+
10
+ def fine_tune(self):
11
+ self.status = "Starting fine-tuning..."
12
+ start_time = time.time()
13
+
14
+ # Load dataset
15
+ dataset = load_dataset("tasal9/ZamAi-Pashto-Datasets-V2")
16
+ train_data = dataset["train"]
17
+
18
+ # Load tokenizer and model
19
+ tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
20
+ model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
21
+
22
+ # Tokenize dataset
23
+ def tokenize_function(example):
24
+ return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)
25
+
26
+ tokenized_data = train_data.map(tokenize_function, batched=True)
27
+
28
+ # Training arguments
29
+ training_args = TrainingArguments(
30
+ output_dir="./results",
31
+ num_train_epochs=1,
32
+ per_device_train_batch_size=2,
33
+ save_steps=10,
34
+ save_total_limit=1,
35
+ logging_steps=5,
36
+ logging_dir="./logs",
37
+ learning_rate=1e-4,
38
+ warmup_steps=10,
39
+ no_cuda=True # βœ… important for ZeroGPU
40
+ )
41
+
42
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
43
+
44
+ trainer = Trainer(
45
+ model=model,
46
+ args=training_args,
47
+ train_dataset=tokenized_data,
48
+ tokenizer=tokenizer,
49
+ data_collator=data_collator,
50
+ )
51
+
52
+ trainer.train()
53
+
54
+ self.status = f"βœ… Done in {round(time.time() - start_time)}s"
55
+ return self.status
56
+
57
  def get_status(self):
58
+ return self.status
59
+
60
+ app_instance = FineTuneApp()
61
+
62
+ with gr.Blocks() as ui:
63
+ gr.Markdown("### ZamAI Pashto BLOOM Fine-tuning")
64
+ status_output = gr.Textbox(label="Status")
65
+ start_button = gr.Button("πŸš€ Start Fine-tuning")
66
+
67
+ start_button.click(fn=app_instance.fine_tune, outputs=status_output)
68
+
69
+ ui.launch()