Spaces:

George-API
/

qwen4bit

Running

App Files Files Community

qwen4bit / app.py

George-API

Upload app.py with huggingface_hub

ba1722a verified 22 days ago

raw

history blame contribute delete

6.85 kB

	import gradio as gr
	import os
	import json
	import torch
	import subprocess
	import sys
	from dotenv import load_dotenv
	import logging
	import threading

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler(),
	logging.FileHandler("app.log")
	]
	)
	logger = logging.getLogger(__name__)

	# Load environment variables
	load_dotenv()

	# Get script directory - important for Hugging Face Space paths
	SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
	BASE_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "."))

	# Load config file
	def load_config(config_path="transformers_config.json"):
	config_path = os.path.join(BASE_DIR, config_path)
	try:
	with open(config_path, 'r') as f:
	config = json.load(f)
	return config
	except Exception as e:
	logger.error(f"Error loading config: {str(e)}")
	return {}

	# Load configuration
	config = load_config()
	model_config = config.get("model_config", {})

	# Model details from config
	MODEL_NAME = model_config.get("model_name_or_path", "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit")
	SPACE_NAME = os.getenv("HF_SPACE_NAME", "phi4training")

	# Function to run training in a thread and stream output to container logs
	def run_training():
	"""Run the training script and stream its output to container logs"""
	# Locate training script using absolute path
	training_script = os.path.join(BASE_DIR, "run_cloud_training.py")

	# Check if file exists and log the path
	if not os.path.exists(training_script):
	print(f"ERROR: Training script not found at: {training_script}")
	print(f"Current directory: {os.getcwd()}")
	print("Available files:")
	for file in os.listdir(BASE_DIR):
	print(f" - {file}")
	return

	print(f"Found training script at: {training_script}")

	process = subprocess.Popen(
	[sys.executable, training_script],
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	universal_newlines=True,
	bufsize=1
	)

	# Stream output directly to sys.stdout (container logs)
	for line in iter(process.stdout.readline, ''):
	sys.stdout.write(line)
	sys.stdout.flush()

	# Function to start the training process
	def start_training():
	try:
	# Print directly to container logs
	print("\n===== STARTING TRAINING PROCESS =====\n")
	print(f"Model: {MODEL_NAME}")
	print(f"Base directory: {BASE_DIR}")
	print(f"Current working directory: {os.getcwd()}")
	print(f"Training with configuration from transformers_config.json")
	print("Training logs will appear below:")
	print("=" * 50)

	# Start training in a separate thread
	training_thread = threading.Thread(target=run_training)
	training_thread.daemon = True # Allow the thread to be terminated when app exits
	training_thread.start()

	# Log the start of training
	logger.info("Training started in background thread")

	return """
	✅ Training process initiated!

	The model is now being fine-tuned in the background.

	To monitor progress:
	1. Check the Hugging Face space logs in the "Logs" tab
	2. You should see training output appearing directly in the logs
	3. The process will continue running in the background

	NOTE: This is a research training phase only, no model outputs will be available.
	"""
	except Exception as e:
	logger.error(f"Error starting training: {str(e)}")
	return f"❌ Error starting training: {str(e)}"

	# Create Gradio interface - training status only, no model outputs
	with gr.Blocks(css="footer {visibility: hidden}") as demo:
	gr.Markdown(f"# {SPACE_NAME}: Research Training Dashboard")

	with gr.Row():
	with gr.Column():
	status = gr.Markdown(
	f"""
	## DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit Training Dashboard

	Model: {MODEL_NAME}
	Dataset: phi4-cognitive-dataset

	This is a multidisciplinary research training phase. The model is not available for interactive use.

	### Training Configuration:
	- Epochs: {config.get("training_config", {}).get("num_train_epochs", 3)}
	- Batch Size: {config.get("training_config", {}).get("per_device_train_batch_size", 2)}
	- Gradient Accumulation Steps: {config.get("training_config", {}).get("gradient_accumulation_steps", 4)}
	- Learning Rate: {config.get("training_config", {}).get("learning_rate", 2e-5)}
	- Max Sequence Length: {config.get("training_config", {}).get("max_seq_length", 2048)}

	⚠️ NOTE: This space does not provide model outputs during the research training phase.
	All logs are available in the Hugging Face "Logs" tab.
	"""
	)

	with gr.Row():
	# Add button for starting training
	start_btn = gr.Button("Start Training", variant="primary")

	# Output area for training start messages
	training_output = gr.Markdown("")

	# Connect start button to function
	start_btn.click(start_training, outputs=training_output)

	gr.Markdown("""
	### Research Training Information

	This model is being fine-tuned on research-focused datasets and is not available for interactive querying.
	The training process will run in the background and logs will be available in the Hugging Face UI.

	#### Instructions
	1. Click "Start Training" to begin the fine-tuning process
	2. Monitor progress in the Hugging Face "Logs" tab
	3. Training metrics and results will be saved to the output directory

	#### About This Project
	The model is being fine-tuned on the phi4-cognitive-dataset with a focus on research capabilities.
	This training phase does not include any interactive features or output generation.
	""")

	# Launch the interface
	if __name__ == "__main__":
	# Start Gradio with minimal features
	print("\n===== RESEARCH TRAINING DASHBOARD STARTED =====\n")
	print(f"Base directory: {BASE_DIR}")
	print(f"Current working directory: {os.getcwd()}")
	print("Available files:")
	for file in os.listdir(BASE_DIR):
	print(f" - {file}")
	print("\nClick 'Start Training' to begin the fine-tuning process")
	print("All training output will appear in these logs")
	logger.info("Starting research training dashboard")
	demo.launch(share=False)