clip-tagger / model-config.json
sohei1l's picture
Initial model release
361ee5e
raw
history blame
1.12 kB
{
"model_type": "clip-tagger",
"base_model": "Xenova/clap-htsat-unfused",
"version": "1.0.0",
"framework": "transformers.js",
"feature_dim": 512,
"learning_rate": 0.01,
"supported_formats": [
"wav",
"mp3",
"m4a",
"ogg"
],
"default_labels": [
"speech",
"music",
"singing",
"guitar",
"piano",
"drums",
"violin",
"trumpet",
"saxophone",
"flute",
"classical music",
"rock music",
"pop music",
"jazz",
"electronic music",
"ambient",
"nature sounds",
"rain",
"wind",
"ocean waves",
"birds chirping",
"dog barking",
"cat meowing",
"car engine",
"traffic",
"footsteps",
"door closing",
"applause",
"laughter",
"crying",
"coughing",
"sneezing",
"telephone ringing",
"alarm clock",
"typing",
"water running",
"fire crackling",
"thunder",
"helicopter",
"airplane",
"train",
"motorcycle",
"bell ringing",
"whistle",
"horn",
"siren",
"explosion",
"gunshot",
"silence",
"noise",
"distortion"
]
}