Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
GPT-OSS Model Push Script | |
Specialized script for pushing GPT-OSS models to Hugging Face Hub | |
Handles LoRA weight merging and model card generation | |
""" | |
import os | |
import sys | |
import argparse | |
import json | |
from datetime import datetime | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel | |
import torch | |
def merge_lora_weights(checkpoint_path, base_model_name, output_path): | |
"""Merge LoRA weights with base model for inference""" | |
print(f"Loading base model: {base_model_name}") | |
# Load base model | |
model_kwargs = { | |
"attn_implementation": "eager", | |
"torch_dtype": "auto", | |
"use_cache": True, | |
"device_map": "auto" | |
} | |
base_model = AutoModelForCausalLM.from_pretrained(base_model_name, **model_kwargs).cuda() | |
print(f"Loading LoRA weights from: {checkpoint_path}") | |
# Load and merge LoRA weights | |
model = PeftModel.from_pretrained(base_model, checkpoint_path) | |
model = model.merge_and_unload() | |
print(f"Saving merged model to: {output_path}") | |
model.save_pretrained(output_path) | |
# Save tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
tokenizer.save_pretrained(output_path) | |
return model, tokenizer | |
def create_gpt_oss_model_card(model_name, experiment_name, trackio_url, dataset_repo, author_name, model_description): | |
"""Create a comprehensive model card for GPT-OSS models""" | |
card_content = f"""--- | |
language: | |
- en | |
- es | |
- fr | |
- it | |
- de | |
- zh | |
- hi | |
- ja | |
- ko | |
- ar | |
license: mit | |
tags: | |
- gpt-oss | |
- multilingual | |
- reasoning | |
- chain-of-thought | |
- fine-tuned | |
--- | |
# {model_name} | |
## Model Description | |
{model_description} | |
This model is a fine-tuned version of OpenAI's GPT-OSS-20B model, optimized for multilingual reasoning tasks. It has been trained on the Multilingual-Thinking dataset to generate chain-of-thought reasoning in multiple languages. | |
## Training Details | |
- **Base Model**: openai/gpt-oss-20b | |
- **Training Dataset**: HuggingFaceH4/Multilingual-Thinking | |
- **Training Method**: LoRA (Low-Rank Adaptation) | |
- **Quantization**: MXFP4 | |
- **Experiment**: {experiment_name} | |
- **Monitoring**: {trackio_url} | |
## Usage | |
### Basic Usage | |
```python | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("{model_name}") | |
model = AutoModelForCausalLM.from_pretrained("{model_name}") | |
# Example: Reasoning in Spanish | |
messages = [ | |
{{"role": "system", "content": "reasoning language: Spanish"}}, | |
{{"role": "user", "content": "What is the capital of Australia?"}} | |
] | |
input_ids = tokenizer.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
return_tensors="pt" | |
).to(model.device) | |
output_ids = model.generate(input_ids, max_new_tokens=512) | |
response = tokenizer.batch_decode(output_ids)[0] | |
print(response) | |
``` | |
### Multilingual Reasoning | |
The model supports reasoning in multiple languages: | |
- English | |
- Spanish (Español) | |
- French (Français) | |
- Italian (Italiano) | |
- German (Deutsch) | |
- Chinese (中文) | |
- Hindi (हिन्दी) | |
- Japanese (日本語) | |
- Korean (한국어) | |
- Arabic (العربية) | |
### System Prompt Format | |
To control the reasoning language, use the system prompt: | |
``` | |
reasoning language: [LANGUAGE] | |
``` | |
Example: | |
``` | |
reasoning language: German | |
``` | |
## Training Configuration | |
- **LoRA Rank**: 8 | |
- **LoRA Alpha**: 16 | |
- **Target Modules**: all-linear | |
- **Learning Rate**: 2e-4 | |
- **Batch Size**: 4 | |
- **Sequence Length**: 2048 | |
- **Mixed Precision**: bf16 | |
## Dataset Information | |
The model was trained on the Multilingual-Thinking dataset, which contains 1,000 examples of chain-of-thought reasoning translated into multiple languages. | |
## Limitations | |
- The model is designed for reasoning tasks and may not perform optimally on other tasks | |
- Reasoning quality may vary across languages | |
- The model inherits limitations from the base GPT-OSS-20B model | |
## Citation | |
If you use this model in your research, please cite: | |
```bibtex | |
@misc{{{model_name.replace("/", "_").replace("-", "_")}, | |
author = {{{author_name}}}, | |
title = {{{model_name}}}, | |
year = {{{datetime.now().year}}}, | |
publisher = {{Hugging Face}}, | |
journal = {{Hugging Face repository}}, | |
howpublished = {{\\url{{https://huggingface.co/{model_name}}}}} | |
}} | |
``` | |
## License | |
This model is licensed under the MIT License. | |
## Training Resources | |
- **Training Dataset**: https://huggingface.co/datasets/{dataset_repo} | |
- **Training Monitoring**: {trackio_url} | |
- **Base Model**: https://huggingface.co/openai/gpt-oss-20b | |
## Model Information | |
- **Architecture**: GPT-OSS-20B with LoRA adapters | |
- **Parameters**: 20B base + LoRA adapters | |
- **Context Length**: 2048 tokens | |
- **Languages**: 10+ languages supported | |
- **Task**: Multilingual reasoning and chain-of-thought generation | |
""" | |
return card_content | |
def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description): | |
"""Push GPT-OSS model to Hugging Face Hub""" | |
print("=== GPT-OSS Model Push Pipeline ===") | |
print(f"Checkpoint: {checkpoint_path}") | |
print(f"Repository: {repo_name}") | |
print(f"Experiment: {experiment_name}") | |
print(f"Author: {author_name}") | |
# Validate checkpoint path | |
if not os.path.exists(checkpoint_path): | |
raise FileNotFoundError(f"Checkpoint path not found: {checkpoint_path}") | |
# Create temporary directory for merged model | |
temp_output = f"/tmp/gpt_oss_merged_{datetime.now().strftime('%Y%m%d_%H%M%S')}" | |
os.makedirs(temp_output, exist_ok=True) | |
try: | |
# Merge LoRA weights with base model | |
print("Merging LoRA weights with base model...") | |
model, tokenizer = merge_lora_weights( | |
checkpoint_path=checkpoint_path, | |
base_model_name="openai/gpt-oss-20b", | |
output_path=temp_output | |
) | |
# Create model card | |
print("Creating model card...") | |
model_card_content = create_gpt_oss_model_card( | |
model_name=repo_name, | |
experiment_name=experiment_name, | |
trackio_url=trackio_url, | |
dataset_repo=dataset_repo, | |
author_name=author_name, | |
model_description=model_description | |
) | |
# Save model card | |
model_card_path = os.path.join(temp_output, "README.md") | |
with open(model_card_path, "w", encoding="utf-8") as f: | |
f.write(model_card_content) | |
# Push to Hugging Face Hub | |
print(f"Pushing model to: {repo_name}") | |
# Set HF token | |
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token | |
# Push using transformers | |
from huggingface_hub import HfApi | |
api = HfApi() | |
# Create repository if it doesn't exist | |
try: | |
api.create_repo(repo_name, private=False, exist_ok=True) | |
except Exception as e: | |
print(f"Warning: Could not create repository: {e}") | |
# Upload files | |
print("Uploading model files...") | |
api.upload_folder( | |
folder_path=temp_output, | |
repo_id=repo_name, | |
repo_type="model" | |
) | |
print("✅ GPT-OSS model pushed successfully!") | |
print(f"Model URL: https://huggingface.co/{repo_name}") | |
# Clean up | |
import shutil | |
shutil.rmtree(temp_output) | |
return True | |
except Exception as e: | |
print(f"❌ Error pushing GPT-OSS model: {e}") | |
# Clean up on error | |
if os.path.exists(temp_output): | |
import shutil | |
shutil.rmtree(temp_output) | |
return False | |
def main(): | |
parser = argparse.ArgumentParser(description="Push GPT-OSS model to Hugging Face Hub") | |
parser.add_argument("checkpoint_path", help="Path to model checkpoint") | |
parser.add_argument("repo_name", help="Hugging Face repository name") | |
parser.add_argument("--token", required=True, help="Hugging Face token") | |
parser.add_argument("--trackio-url", help="Trackio URL for model card") | |
parser.add_argument("--experiment-name", help="Experiment name") | |
parser.add_argument("--dataset-repo", help="Dataset repository") | |
parser.add_argument("--author-name", help="Author name") | |
parser.add_argument("--model-description", help="Model description") | |
args = parser.parse_args() | |
# Set defaults | |
experiment_name = args.experiment_name or "gpt_oss_finetune" | |
dataset_repo = args.dataset_repo or "HuggingFaceH4/Multilingual-Thinking" | |
author_name = args.author_name or "GPT-OSS Fine-tuner" | |
model_description = args.model_description or "A fine-tuned version of OpenAI's GPT-OSS-20B model for multilingual reasoning tasks." | |
success = push_gpt_oss_model( | |
checkpoint_path=args.checkpoint_path, | |
repo_name=args.repo_name, | |
hf_token=args.token, | |
trackio_url=args.trackio_url, | |
experiment_name=experiment_name, | |
dataset_repo=dataset_repo, | |
author_name=author_name, | |
model_description=model_description | |
) | |
sys.exit(0 if success else 1) | |
if __name__ == "__main__": | |
main() |