Spaces:
Running
Running
File size: 4,702 Bytes
b82e5c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
#!/usr/bin/env python3
"""
Tests for scripts/generate_model_card.py using the real template in templates/model_card.md.
These tests verify:
- Conditional processing for quantized_models
- Variable replacement for common fields
- File writing via save_model_card
"""
import sys
from pathlib import Path
def _repo_root() -> Path:
return Path(__file__).resolve().parents[1]
def _add_scripts_to_path() -> None:
scripts_dir = _repo_root() / "scripts"
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
def test_model_card_generator_conditionals_truthy(tmp_path):
_add_scripts_to_path()
from generate_model_card import ModelCardGenerator
template_path = _repo_root() / "templates" / "model_card.md"
generator = ModelCardGenerator(str(template_path))
variables = {
"model_name": "My Fine-tuned Model",
"model_description": "A test description.",
"repo_name": "user/repo",
"base_model": "HuggingFaceTB/SmolLM3-3B",
"dataset_name": "OpenHermes-FR",
"training_config_type": "Custom",
"trainer_type": "SFTTrainer",
"batch_size": "8",
"gradient_accumulation_steps": "16",
"learning_rate": "5e-6",
"max_epochs": "3",
"max_seq_length": "2048",
"hardware_info": "CPU",
"experiment_name": "exp-123",
"trackio_url": "https://trackio.space/exp",
"dataset_repo": "tonic/trackio-experiments",
"author_name": "Unit Tester",
"quantized_models": True,
}
content = generator.generate_model_card(variables)
# Conditional: when True, the quantized tag should appear
assert "- quantized" in content
# Common variables replaced in multiple locations
assert "base_model: HuggingFaceTB/SmolLM3-3B" in content
assert "trainer_type: SFTTrainer" in content
assert 'from_pretrained("user/repo")' in content
assert "Hardware\": \"CPU\"" not in content # ensure no escaped quotes left
assert "hardware: \"CPU\"" in content
# Save to file and verify
output_path = tmp_path / "README_test.md"
assert generator.save_model_card(content, str(output_path)) is True
assert output_path.exists()
assert output_path.read_text(encoding="utf-8") == content
def test_model_card_generator_conditionals_falsey(tmp_path):
_add_scripts_to_path()
from generate_model_card import ModelCardGenerator
template_path = _repo_root() / "templates" / "model_card.md"
generator = ModelCardGenerator(str(template_path))
variables = {
"model_name": "My Model",
"model_description": "A test description.",
"repo_name": "user/repo",
"base_model": "HuggingFaceTB/SmolLM3-3B",
"dataset_name": "OpenHermes-FR",
"training_config_type": "Custom",
"trainer_type": "SFTTrainer",
"batch_size": "8",
"learning_rate": "5e-6",
"max_epochs": "3",
"max_seq_length": "2048",
"hardware_info": "CPU",
"quantized_models": False,
}
content = generator.generate_model_card(variables)
# Conditional: quantized tag should be absent
assert "- quantized" not in content
# The if/else block is removed by current implementation when False
assert "{{#if quantized_models}}" not in content
assert "{{/if}}" not in content
# Variable replacement still occurs elsewhere
assert "base_model: HuggingFaceTB/SmolLM3-3B" in content
assert 'from_pretrained("user/repo")' in content
# Save to file
output_path = tmp_path / "README_no_quant.md"
assert generator.save_model_card(content, str(output_path)) is True
assert output_path.exists()
def test_model_card_generator_variable_replacement(tmp_path):
_add_scripts_to_path()
from generate_model_card import ModelCardGenerator
template_path = _repo_root() / "templates" / "model_card.md"
generator = ModelCardGenerator(str(template_path))
base_model = "custom/base-model"
repo_name = "custom/repo-name"
variables = {
"model_name": "Var Test Model",
"model_description": "Testing variable replacement.",
"repo_name": repo_name,
"base_model": base_model,
"dataset_name": "dataset-x",
"trainer_type": "SFTTrainer",
"batch_size": "4",
"gradient_accumulation_steps": "1",
"max_seq_length": "1024",
"hardware_info": "CPU",
"quantized_models": False,
}
content = generator.generate_model_card(variables)
assert f"base_model: {base_model}" in content
assert f'from_pretrained("{repo_name}")' in content
assert "trainer_type: SFTTrainer" in content
|