Upload 18 files
Browse files- ESC50_class_labels_indices.json +1 -0
- ESC50_class_labels_indices_space.json +1 -0
- FSD50k_class_labels_indices.json +1 -0
- UrbanSound8K_class_labels_indices.json +1 -0
- VGGSound_class_labels_indices.json +1 -0
- audioclip-arch.png +0 -0
- audioset_class_labels_indices.json +1 -0
- bootstrap_pytorch_dist_env.sh +17 -0
- check_ckpt.py +802 -0
- eval_retrieval_freesound.sh +81 -0
- finetune-esc50.sh +88 -0
- finetune-fsd50k.sh +88 -0
- htsat-roberta-large-dataset-fusion.sh +87 -0
- requirements.txt +16 -0
- test_tars.py +120 -0
- train-htsat-roberta.sh +83 -0
- train-pann-roberta.sh +83 -0
- zeroshot_esc50.sh +82 -0
ESC50_class_labels_indices.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dog": 0, "rooster": 1, "pig": 2, "cow": 3, "frog": 4, "cat": 5, "hen": 6, "insects": 7, "sheep": 8, "crow": 9, "rain": 10, "sea_waves": 11, "crackling_fire": 12, "crickets": 13, "chirping_birds": 14, "water_drops": 15, "wind": 16, "pouring_water": 17, "toilet_flush": 18, "thunderstorm": 19, "crying_baby": 20, "sneezing": 21, "clapping": 22, "breathing": 23, "coughing": 24, "footsteps": 25, "laughing": 26, "brushing_teeth": 27, "snoring": 28, "drinking_sipping": 29, "door_wood_knock": 30, "mouse_click": 31, "keyboard_typing": 32, "door_wood_creaks": 33, "can_opening": 34, "washing_machine": 35, "vacuum_cleaner": 36, "clock_alarm": 37, "clock_tick": 38, "glass_breaking": 39, "helicopter": 40, "chainsaw": 41, "siren": 42, "car_horn": 43, "engine": 44, "train": 45, "church_bells": 46, "airplane": 47, "fireworks": 48, "hand_saw": 49}
|
ESC50_class_labels_indices_space.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dog": 0, "rooster": 1, "pig": 2, "cow": 3, "frog": 4, "cat": 5, "hen": 6, "insects": 7, "sheep": 8, "crow": 9, "rain": 10, "sea waves": 11, "crackling fire": 12, "crickets": 13, "chirping birds": 14, "water drops": 15, "wind": 16, "pouring water": 17, "toilet flush": 18, "thunderstorm": 19, "crying baby": 20, "sneezing": 21, "clapping": 22, "breathing": 23, "coughing": 24, "footsteps": 25, "laughing": 26, "brushing teeth": 27, "snoring": 28, "drinking sipping": 29, "door wood knock": 30, "mouse click": 31, "keyboard typing": 32, "door wood creaks": 33, "can opening": 34, "washing machine": 35, "vacuum cleaner": 36, "clock alarm": 37, "clock tick": 38, "glass breaking": 39, "helicopter": 40, "chainsaw": 41, "siren": 42, "car horn": 43, "engine": 44, "train": 45, "church bells": 46, "airplane": 47, "fireworks": 48, "hand saw": 49}
|
FSD50k_class_labels_indices.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"Whispering": 0, "Gunshot, gunfire": 1, "Pour": 2, "Wind chime": 3, "Livestock, farm animals, working animals": 4, "Crackle": 5, "Waves, surf": 6, "Chicken, rooster": 7, "Chatter": 8, "Keyboard (musical)": 9, "Bark": 10, "Rail transport": 11, "Gong": 12, "Shatter": 13, "Ratchet, pawl": 14, "Clapping": 15, "Mallet percussion": 16, "Whoosh, swoosh, swish": 17, "Speech synthesizer": 18, "Respiratory sounds": 19, "Sliding door": 20, "Boat, Water vehicle": 21, "Boiling": 22, "Human voice": 23, "Drip": 24, "Thunderstorm": 25, "Male singing": 26, "Sneeze": 27, "Hi-hat": 28, "Guitar": 29, "Crying, sobbing": 30, "Speech": 31, "Slam": 32, "Crack": 33, "Yell": 34, "Drawer open or close": 35, "Run": 36, "Cheering": 37, "Splash, splatter": 38, "Tabla": 39, "Sigh": 40, "Packing tape, duct tape": 41, "Raindrop": 42, "Cymbal": 43, "Fill (with liquid)": 44, "Harp": 45, "Squeak": 46, "Zipper (clothing)": 47, "Tearing": 48, "Alarm": 49, "Skateboard": 50, "Wind instrument, woodwind instrument": 51, "Chink, clink": 52, "Wind": 53, "Ringtone": 54, "Microwave oven": 55, "Power tool": 56, "Dishes, pots, and pans": 57, "Musical instrument": 58, "Door": 59, "Domestic sounds, home sounds": 60, "Subway, metro, underground": 61, "Glockenspiel": 62, "Female speech, woman speaking": 63, "Coin (dropping)": 64, "Mechanical fan": 65, "Male speech, man speaking": 66, "Crowd": 67, "Screech": 68, "Animal": 69, "Human group actions": 70, "Telephone": 71, "Tools": 72, "Giggle": 73, "Crushing": 74, "Thump, thud": 75, "Hammer": 76, "Engine": 77, "Cupboard open or close": 78, "Glass": 79, "Writing": 80, "Clock": 81, "Plucked string instrument": 82, "Fowl": 83, "Water tap, faucet": 84, "Knock": 85, "Trickle, dribble": 86, "Rattle": 87, "Conversation": 88, "Accelerating, revving, vroom": 89, "Fixed-wing aircraft, airplane": 90, "Screaming": 91, "Walk, footsteps": 92, "Stream": 93, "Printer": 94, "Traffic noise, roadway noise": 95, "Motorcycle": 96, "Water": 97, "Scratching (performance technique)": 98, "Tap": 99, "Percussion": 100, "Chuckle, chortle": 101, "Motor vehicle (road)": 102, "Crow": 103, "Vehicle horn, car horn, honking": 104, "Bird vocalization, bird call, bird song": 105, "Drill": 106, "Race car, auto racing": 107, "Meow": 108, "Bass drum": 109, "Drum kit": 110, "Wild animals": 111, "Crash cymbal": 112, "Cough": 113, "Typing": 114, "Bowed string instrument": 115, "Computer keyboard": 116, "Vehicle": 117, "Train": 118, "Applause": 119, "Bicycle": 120, "Tick": 121, "Drum": 122, "Burping, eructation": 123, "Bicycle bell": 124, "Cowbell": 125, "Accordion": 126, "Toilet flush": 127, "Purr": 128, "Church bell": 129, "Cat": 130, "Insect": 131, "Engine starting": 132, "Chewing, mastication": 133, "Sink (filling or washing)": 134, "Dog": 135, "Bird": 136, "Finger snapping": 137, "Child speech, kid speaking": 138, "Wood": 139, "Music": 140, "Sawing": 141, "Bell": 142, "Fireworks": 143, "Crumpling, crinkling": 144, "Ocean": 145, "Gurgling": 146, "Fart": 147, "Mechanisms": 148, "Acoustic guitar": 149, "Singing": 150, "Boom": 151, "Bus": 152, "Cutlery, silverware": 153, "Liquid": 154, "Explosion": 155, "Gull, seagull": 156, "Thunder": 157, "Siren": 158, "Marimba, xylophone": 159, "Female singing": 160, "Tick-tock": 161, "Frog": 162, "Frying (food)": 163, "Buzz": 164, "Car passing by": 165, "Electric guitar": 166, "Gasp": 167, "Rattle (instrument)": 168, "Piano": 169, "Doorbell": 170, "Chime": 171, "Car": 172, "Fire": 173, "Trumpet": 174, "Truck": 175, "Hands": 176, "Domestic animals, pets": 177, "Chirp, tweet": 178, "Breathing": 179, "Cricket": 180, "Tambourine": 181, "Bass guitar": 182, "Idling": 183, "Scissors": 184, "Rain": 185, "Strum": 186, "Shout": 187, "Keys jangling": 188, "Camera": 189, "Hiss": 190, "Growling": 191, "Snare drum": 192, "Brass instrument": 193, "Bathtub (filling or washing)": 194, "Typewriter": 195, "Aircraft": 196, "Organ": 197, "Laughter": 198, "Harmonica": 199}
|
UrbanSound8K_class_labels_indices.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"air conditioner": 0, "car horn": 1, "children playing": 2, "dog bark": 3, "drilling": 4, "engine idling": 5, "gun shot": 6, "jackhammer": 7, "siren": 8, "street music": 9}
|
VGGSound_class_labels_indices.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"people crowd": 0, "playing mandolin": 1, "pumping water": 2, "horse neighing": 3, "airplane flyby": 4, "playing drum kit": 5, "pheasant crowing": 6, "duck quacking": 7, "wood thrush calling": 8, "dog bow-wow": 9, "arc welding": 10, "writing on blackboard with chalk": 11, "forging swords": 12, "swimming": 13, "bee, wasp, etc. buzzing": 14, "child singing": 15, "mouse clicking": 16, "playing trombone": 17, "telephone bell ringing": 18, "beat boxing": 19, "cattle mooing": 20, "lions roaring": 21, "ambulance siren": 22, "gibbon howling": 23, "people sniggering": 24, "playing clarinet": 25, "playing bassoon": 26, "playing bongo": 27, "playing electric guitar": 28, "playing badminton": 29, "bull bellowing": 30, "cat caterwauling": 31, "playing sitar": 32, "whale calling": 33, "snake hissing": 34, "people burping": 35, "francolin calling": 36, "fireworks banging": 37, "driving buses": 38, "people belly laughing": 39, "chicken clucking": 40, "playing double bass": 41, "canary calling": 42, "people battle cry": 43, "male singing": 44, "horse clip-clop": 45, "baby crying": 46, "cow lowing": 47, "reversing beeps": 48, "otter growling": 49, "cheetah chirrup": 50, "people running": 51, "ice cream truck, ice cream van": 52, "playing harpsichord": 53, "heart sounds, heartbeat": 54, "pig oinking": 55, "police radio chatter": 56, "cat hissing": 57, "wind chime": 58, "elk bugling": 59, "lions growling": 60, "fly, housefly buzzing": 61, "ferret dooking": 62, "railroad car, train wagon": 63, "church bell ringing": 64, "cat meowing": 65, "wind rustling leaves": 66, "bouncing on trampoline": 67, "mouse squeaking": 68, "sheep bleating": 69, "people eating crisps": 70, "people sneezing": 71, "playing squash": 72, "footsteps on snow": 73, "people humming": 74, "tap dancing": 75, "snake rattling": 76, "elephant trumpeting": 77, "people booing": 78, "disc scratching": 79, "skidding": 80, "cupboard opening or closing": 81, "playing bagpipes": 82, "basketball bounce": 83, "chinchilla barking": 84, "parrot talking": 85, "woodpecker pecking tree": 86, "fire truck siren": 87, "slot machine": 88, "playing french horn": 89, "air conditioning noise": 90, "people finger snapping": 91, "eagle screaming": 92, "playing harmonica": 93, "playing tympani": 94, "zebra braying": 95, "hedge trimmer running": 96, "playing acoustic guitar": 97, "hair dryer drying": 98, "orchestra": 99, "playing darts": 100, "children shouting": 101, "people slurping": 102, "alligators, crocodiles hissing": 103, "mouse pattering": 104, "people marching": 105, "vehicle horn, car horn, honking": 106, "sea lion barking": 107, "people clapping": 108, "hail": 109, "fire crackling": 110, "bathroom ventilation fan running": 111, "opening or closing car doors": 112, "skiing": 113, "dog barking": 114, "race car, auto racing": 115, "subway, metro, underground": 116, "underwater bubbling": 117, "car passing by": 118, "playing tennis": 119, "warbler chirping": 120, "helicopter": 121, "driving motorcycle": 122, "train wheels squealing": 123, "baby laughter": 124, "driving snowmobile": 125, "bird squawking": 126, "cuckoo bird calling": 127, "people whistling": 128, "shot football": 129, "playing tuning fork": 130, "dog howling": 131, "playing violin, fiddle": 132, "people eating": 133, "baltimore oriole calling": 134, "playing timbales": 135, "door slamming": 136, "people shuffling": 137, "typing on typewriter": 138, "magpie calling": 139, "playing harp": 140, "playing hammond organ": 141, "people eating apple": 142, "mosquito buzzing": 143, "playing oboe": 144, "playing volleyball": 145, "using sewing machines": 146, "electric grinder grinding": 147, "cutting hair with electric trimmers": 148, "splashing water": 149, "people sobbing": 150, "female singing": 151, "wind noise": 152, "car engine knocking": 153, "black capped chickadee calling": 154, "people screaming": 155, "cat growling": 156, "penguins braying": 157, "people coughing": 158, "metronome": 159, "train horning": 160, "goat bleating": 161, "playing tambourine": 162, "fox barking": 163, "airplane": 164, "firing cannon": 165, "thunder": 166, "smoke detector beeping": 167, "playing erhu": 168, "ice cracking": 169, "dog growling": 170, "playing saxophone": 171, "owl hooting": 172, "playing trumpet": 173, "sailing": 174, "waterfall burbling": 175, "machine gun shooting": 176, "baby babbling": 177, "playing synthesizer": 178, "donkey, ass braying": 179, "people cheering": 180, "playing shofar": 181, "playing hockey": 182, "playing banjo": 183, "cricket chirping": 184, "playing snare drum": 185, "ripping paper": 186, "child speech, kid speaking": 187, "crow cawing": 188, "sloshing water": 189, "playing zither": 190, "scuba diving": 191, "playing steelpan": 192, "goose honking": 193, "tapping guitar": 194, "spraying water": 195, "playing bass drum": 196, "printer printing": 197, "playing ukulele": 198, "ocean burbling": 199, "playing didgeridoo": 200, "sharpen knife": 201, "typing on computer keyboard": 202, "playing table tennis": 203, "rope skipping": 204, "playing marimba, xylophone": 205, "playing bugle": 206, "playing guiro": 207, "playing flute": 208, "tornado roaring": 209, "stream burbling": 210, "electric shaver, electric razor shaving": 211, "playing gong": 212, "eating with cutlery": 213, "playing piano": 214, "people giggling": 215, "chicken crowing": 216, "female speech, woman speaking": 217, "golf driving": 218, "frog croaking": 219, "people eating noodle": 220, "mynah bird singing": 221, "playing timpani": 222, "playing congas": 223, "dinosaurs bellowing": 224, "playing bass guitar": 225, "turkey gobbling": 226, "chipmunk chirping": 227, "chopping food": 228, "striking bowling": 229, "missile launch": 230, "squishing water": 231, "civil defense siren": 232, "blowtorch igniting": 233, "tractor digging": 234, "lighting firecrackers": 235, "playing theremin": 236, "train whistling": 237, "people nose blowing": 238, "car engine starting": 239, "lathe spinning": 240, "playing cello": 241, "motorboat, speedboat acceleration": 242, "playing vibraphone": 243, "playing washboard": 244, "playing cornet": 245, "pigeon, dove cooing": 246, "roller coaster running": 247, "opening or closing car electric windows": 248, "foghorn": 249, "coyote howling": 250, "hammering nails": 251, "toilet flushing": 252, "strike lighter": 253, "bird wings flapping": 254, "playing steel guitar, slide guitar": 255, "volcano explosion": 256, "people whispering": 257, "bowling impact": 258, "yodelling": 259, "firing muskets": 260, "raining": 261, "singing bowl": 262, "plastic bottle crushing": 263, "chimpanzee pant-hooting": 264, "playing electronic organ": 265, "chainsawing trees": 266, "dog baying": 267, "lawn mowing": 268, "people babbling": 269, "striking pool": 270, "eletric blender running": 271, "playing tabla": 272, "cap gun shooting": 273, "planing timber": 274, "air horn": 275, "sliding door": 276, "cell phone buzzing": 277, "sea waves": 278, "playing castanets": 279, "singing choir": 280, "people slapping": 281, "barn swallow calling": 282, "people hiccup": 283, "vacuum cleaner cleaning floors": 284, "playing lacrosse": 285, "bird chirping, tweeting": 286, "lip smacking": 287, "chopping wood": 288, "police car (siren)": 289, "running electric fan": 290, "cattle, bovinae cowbell": 291, "people gargling": 292, "opening or closing drawers": 293, "playing djembe": 294, "skateboarding": 295, "cat purring": 296, "rowboat, canoe, kayak rowing": 297, "engine accelerating, revving, vroom": 298, "playing glockenspiel": 299, "popping popcorn": 300, "car engine idling": 301, "alarm clock ringing": 302, "dog whimpering": 303, "playing accordion": 304, "playing cymbal": 305, "male speech, man speaking": 306, "rapping": 307, "people farting": 308}
|
audioclip-arch.png
ADDED
audioset_class_labels_indices.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"Speech": 0, "Male speech, man speaking": 1, "Female speech, woman speaking": 2, "Child speech, kid speaking": 3, "Conversation": 4, "Narration, monologue": 5, "Babbling": 6, "Speech synthesizer": 7, "Shout": 8, "Bellow": 9, "Whoop": 10, "Yell": 11, "Battle cry": 12, "Children shouting": 13, "Screaming": 14, "Whispering": 15, "Laughter": 16, "Baby laughter": 17, "Giggle": 18, "Snicker": 19, "Belly laugh": 20, "Chuckle, chortle": 21, "Crying, sobbing": 22, "Baby cry, infant cry": 23, "Whimper": 24, "Wail, moan": 25, "Sigh": 26, "Singing": 27, "Choir": 28, "Yodeling": 29, "Chant": 30, "Mantra": 31, "Male singing": 32, "Female singing": 33, "Child singing": 34, "Synthetic singing": 35, "Rapping": 36, "Humming": 37, "Groan": 38, "Grunt": 39, "Whistling": 40, "Breathing": 41, "Wheeze": 42, "Snoring": 43, "Gasp": 44, "Pant": 45, "Snort": 46, "Cough": 47, "Throat clearing": 48, "Sneeze": 49, "Sniff": 50, "Run": 51, "Shuffle": 52, "Walk, footsteps": 53, "Chewing, mastication": 54, "Biting": 55, "Gargling": 56, "Stomach rumble": 57, "Burping, eructation": 58, "Hiccup": 59, "Fart": 60, "Hands": 61, "Finger snapping": 62, "Clapping": 63, "Heart sounds, heartbeat": 64, "Heart murmur": 65, "Cheering": 66, "Applause": 67, "Chatter": 68, "Crowd": 69, "Hubbub, speech noise, speech babble": 70, "Children playing": 71, "Animal": 72, "Domestic animals, pets": 73, "Dog": 74, "Bark": 75, "Yip": 76, "Howl": 77, "Bow-wow": 78, "Growling": 79, "Whimper (dog)": 80, "Cat": 81, "Purr": 82, "Meow": 83, "Hiss": 84, "Caterwaul": 85, "Livestock, farm animals, working animals": 86, "Horse": 87, "Clip-clop": 88, "Neigh, whinny": 89, "Cattle, bovinae": 90, "Moo": 91, "Cowbell": 92, "Pig": 93, "Oink": 94, "Goat": 95, "Bleat": 96, "Sheep": 97, "Fowl": 98, "Chicken, rooster": 99, "Cluck": 100, "Crowing, cock-a-doodle-doo": 101, "Turkey": 102, "Gobble": 103, "Duck": 104, "Quack": 105, "Goose": 106, "Honk": 107, "Wild animals": 108, "Roaring cats (lions, tigers)": 109, "Roar": 110, "Bird": 111, "Bird vocalization, bird call, bird song": 112, "Chirp, tweet": 113, "Squawk": 114, "Pigeon, dove": 115, "Coo": 116, "Crow": 117, "Caw": 118, "Owl": 119, "Hoot": 120, "Bird flight, flapping wings": 121, "Canidae, dogs, wolves": 122, "Rodents, rats, mice": 123, "Mouse": 124, "Patter": 125, "Insect": 126, "Cricket": 127, "Mosquito": 128, "Fly, housefly": 129, "Buzz": 130, "Bee, wasp, etc.": 131, "Frog": 132, "Croak": 133, "Snake": 134, "Rattle": 135, "Whale vocalization": 136, "Music": 137, "Musical instrument": 138, "Plucked string instrument": 139, "Guitar": 140, "Electric guitar": 141, "Bass guitar": 142, "Acoustic guitar": 143, "Steel guitar, slide guitar": 144, "Tapping (guitar technique)": 145, "Strum": 146, "Banjo": 147, "Sitar": 148, "Mandolin": 149, "Zither": 150, "Ukulele": 151, "Keyboard (musical)": 152, "Piano": 153, "Electric piano": 154, "Organ": 155, "Electronic organ": 156, "Hammond organ": 157, "Synthesizer": 158, "Sampler": 159, "Harpsichord": 160, "Percussion": 161, "Drum kit": 162, "Drum machine": 163, "Drum": 164, "Snare drum": 165, "Rimshot": 166, "Drum roll": 167, "Bass drum": 168, "Timpani": 169, "Tabla": 170, "Cymbal": 171, "Hi-hat": 172, "Wood block": 173, "Tambourine": 174, "Rattle (instrument)": 175, "Maraca": 176, "Gong": 177, "Tubular bells": 178, "Mallet percussion": 179, "Marimba, xylophone": 180, "Glockenspiel": 181, "Vibraphone": 182, "Steelpan": 183, "Orchestra": 184, "Brass instrument": 185, "French horn": 186, "Trumpet": 187, "Trombone": 188, "Bowed string instrument": 189, "String section": 190, "Violin, fiddle": 191, "Pizzicato": 192, "Cello": 193, "Double bass": 194, "Wind instrument, woodwind instrument": 195, "Flute": 196, "Saxophone": 197, "Clarinet": 198, "Harp": 199, "Bell": 200, "Church bell": 201, "Jingle bell": 202, "Bicycle bell": 203, "Tuning fork": 204, "Chime": 205, "Wind chime": 206, "Change ringing (campanology)": 207, "Harmonica": 208, "Accordion": 209, "Bagpipes": 210, "Didgeridoo": 211, "Shofar": 212, "Theremin": 213, "Singing bowl": 214, "Scratching (performance technique)": 215, "Pop music": 216, "Hip hop music": 217, "Beatboxing": 218, "Rock music": 219, "Heavy metal": 220, "Punk rock": 221, "Grunge": 222, "Progressive rock": 223, "Rock and roll": 224, "Psychedelic rock": 225, "Rhythm and blues": 226, "Soul music": 227, "Reggae": 228, "Country": 229, "Swing music": 230, "Bluegrass": 231, "Funk": 232, "Folk music": 233, "Middle Eastern music": 234, "Jazz": 235, "Disco": 236, "Classical music": 237, "Opera": 238, "Electronic music": 239, "House music": 240, "Techno": 241, "Dubstep": 242, "Drum and bass": 243, "Electronica": 244, "Electronic dance music": 245, "Ambient music": 246, "Trance music": 247, "Music of Latin America": 248, "Salsa music": 249, "Flamenco": 250, "Blues": 251, "Music for children": 252, "New-age music": 253, "Vocal music": 254, "A capella": 255, "Music of Africa": 256, "Afrobeat": 257, "Christian music": 258, "Gospel music": 259, "Music of Asia": 260, "Carnatic music": 261, "Music of Bollywood": 262, "Ska": 263, "Traditional music": 264, "Independent music": 265, "Song": 266, "Background music": 267, "Theme music": 268, "Jingle (music)": 269, "Soundtrack music": 270, "Lullaby": 271, "Video game music": 272, "Christmas music": 273, "Dance music": 274, "Wedding music": 275, "Happy music": 276, "Funny music": 277, "Sad music": 278, "Tender music": 279, "Exciting music": 280, "Angry music": 281, "Scary music": 282, "Wind": 283, "Rustling leaves": 284, "Wind noise (microphone)": 285, "Thunderstorm": 286, "Thunder": 287, "Water": 288, "Rain": 289, "Raindrop": 290, "Rain on surface": 291, "Stream": 292, "Waterfall": 293, "Ocean": 294, "Waves, surf": 295, "Steam": 296, "Gurgling": 297, "Fire": 298, "Crackle": 299, "Vehicle": 300, "Boat, Water vehicle": 301, "Sailboat, sailing ship": 302, "Rowboat, canoe, kayak": 303, "Motorboat, speedboat": 304, "Ship": 305, "Motor vehicle (road)": 306, "Car": 307, "Vehicle horn, car horn, honking": 308, "Toot": 309, "Car alarm": 310, "Power windows, electric windows": 311, "Skidding": 312, "Tire squeal": 313, "Car passing by": 314, "Race car, auto racing": 315, "Truck": 316, "Air brake": 317, "Air horn, truck horn": 318, "Reversing beeps": 319, "Ice cream truck, ice cream van": 320, "Bus": 321, "Emergency vehicle": 322, "Police car (siren)": 323, "Ambulance (siren)": 324, "Fire engine, fire truck (siren)": 325, "Motorcycle": 326, "Traffic noise, roadway noise": 327, "Rail transport": 328, "Train": 329, "Train whistle": 330, "Train horn": 331, "Railroad car, train wagon": 332, "Train wheels squealing": 333, "Subway, metro, underground": 334, "Aircraft": 335, "Aircraft engine": 336, "Jet engine": 337, "Propeller, airscrew": 338, "Helicopter": 339, "Fixed-wing aircraft, airplane": 340, "Bicycle": 341, "Skateboard": 342, "Engine": 343, "Light engine (high frequency)": 344, "Dental drill, dentist's drill": 345, "Lawn mower": 346, "Chainsaw": 347, "Medium engine (mid frequency)": 348, "Heavy engine (low frequency)": 349, "Engine knocking": 350, "Engine starting": 351, "Idling": 352, "Accelerating, revving, vroom": 353, "Door": 354, "Doorbell": 355, "Ding-dong": 356, "Sliding door": 357, "Slam": 358, "Knock": 359, "Tap": 360, "Squeak": 361, "Cupboard open or close": 362, "Drawer open or close": 363, "Dishes, pots, and pans": 364, "Cutlery, silverware": 365, "Chopping (food)": 366, "Frying (food)": 367, "Microwave oven": 368, "Blender": 369, "Water tap, faucet": 370, "Sink (filling or washing)": 371, "Bathtub (filling or washing)": 372, "Hair dryer": 373, "Toilet flush": 374, "Toothbrush": 375, "Electric toothbrush": 376, "Vacuum cleaner": 377, "Zipper (clothing)": 378, "Keys jangling": 379, "Coin (dropping)": 380, "Scissors": 381, "Electric shaver, electric razor": 382, "Shuffling cards": 383, "Typing": 384, "Typewriter": 385, "Computer keyboard": 386, "Writing": 387, "Alarm": 388, "Telephone": 389, "Telephone bell ringing": 390, "Ringtone": 391, "Telephone dialing, DTMF": 392, "Dial tone": 393, "Busy signal": 394, "Alarm clock": 395, "Siren": 396, "Civil defense siren": 397, "Buzzer": 398, "Smoke detector, smoke alarm": 399, "Fire alarm": 400, "Foghorn": 401, "Whistle": 402, "Steam whistle": 403, "Mechanisms": 404, "Ratchet, pawl": 405, "Clock": 406, "Tick": 407, "Tick-tock": 408, "Gears": 409, "Pulleys": 410, "Sewing machine": 411, "Mechanical fan": 412, "Air conditioning": 413, "Cash register": 414, "Printer": 415, "Camera": 416, "Single-lens reflex camera": 417, "Tools": 418, "Hammer": 419, "Jackhammer": 420, "Sawing": 421, "Filing (rasp)": 422, "Sanding": 423, "Power tool": 424, "Drill": 425, "Explosion": 426, "Gunshot, gunfire": 427, "Machine gun": 428, "Fusillade": 429, "Artillery fire": 430, "Cap gun": 431, "Fireworks": 432, "Firecracker": 433, "Burst, pop": 434, "Eruption": 435, "Boom": 436, "Wood": 437, "Chop": 438, "Splinter": 439, "Crack": 440, "Glass": 441, "Chink, clink": 442, "Shatter": 443, "Liquid": 444, "Splash, splatter": 445, "Slosh": 446, "Squish": 447, "Drip": 448, "Pour": 449, "Trickle, dribble": 450, "Gush": 451, "Fill (with liquid)": 452, "Spray": 453, "Pump (liquid)": 454, "Stir": 455, "Boiling": 456, "Sonar": 457, "Arrow": 458, "Whoosh, swoosh, swish": 459, "Thump, thud": 460, "Thunk": 461, "Electronic tuner": 462, "Effects unit": 463, "Chorus effect": 464, "Basketball bounce": 465, "Bang": 466, "Slap, smack": 467, "Whack, thwack": 468, "Smash, crash": 469, "Breaking": 470, "Bouncing": 471, "Whip": 472, "Flap": 473, "Scratch": 474, "Scrape": 475, "Rub": 476, "Roll": 477, "Crushing": 478, "Crumpling, crinkling": 479, "Tearing": 480, "Beep, bleep": 481, "Ping": 482, "Ding": 483, "Clang": 484, "Squeal": 485, "Creak": 486, "Rustle": 487, "Whir": 488, "Clatter": 489, "Sizzle": 490, "Clicking": 491, "Clickety-clack": 492, "Rumble": 493, "Plop": 494, "Jingle, tinkle": 495, "Hum": 496, "Zing": 497, "Boing": 498, "Crunch": 499, "Silence": 500, "Sine wave": 501, "Harmonic": 502, "Chirp tone": 503, "Sound effect": 504, "Pulse": 505, "Inside, small room": 506, "Inside, large room or hall": 507, "Inside, public space": 508, "Outside, urban or manmade": 509, "Outside, rural or natural": 510, "Reverberation": 511, "Echo": 512, "Noise": 513, "Environmental noise": 514, "Static": 515, "Mains hum": 516, "Distortion": 517, "Sidetone": 518, "Cacophony": 519, "White noise": 520, "Pink noise": 521, "Throbbing": 522, "Vibration": 523, "Television": 524, "Radio": 525, "Field recording": 526}
|
bootstrap_pytorch_dist_env.sh
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
# Set env vars for PyTorch
|
3 |
+
nodes=($(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch))
|
4 |
+
head=${nodes[0]}
|
5 |
+
|
6 |
+
export RANK=$OMPI_COMM_WORLD_RANK
|
7 |
+
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
|
8 |
+
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
|
9 |
+
export MASTER_ADDR=$head
|
10 |
+
export MASTER_PORT=29500 # default from torch launcher
|
11 |
+
|
12 |
+
echo "Setting env_var RANK=${RANK}"
|
13 |
+
echo "Setting env_var LOCAL_RANK=${LOCAL_RANK}"
|
14 |
+
echo "Setting env_var WORLD_SIZE=${WORLD_SIZE}"
|
15 |
+
echo "Setting env_var MASTER_ADDR=${MASTER_ADDR}"
|
16 |
+
echo "Setting env_var MASTER_PORT=${MASTER_PORT}"
|
17 |
+
|
check_ckpt.py
ADDED
@@ -0,0 +1,802 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
def keys_in_state_dict(ckpt, device='cpu'):
|
4 |
+
if device=="cpu":
|
5 |
+
a = torch.load(ckpt, map_location=torch.device('cpu'))["state_dict"]
|
6 |
+
else:
|
7 |
+
a = torch.load(ckpt)["state_dict"]
|
8 |
+
print("keys_in_state_dict", a.keys())
|
9 |
+
|
10 |
+
|
11 |
+
def check_ckpt_diff(ckpt_a, ckpt_b, key_include=None, key_exclude=None, device='cpu', verbose=True):
|
12 |
+
if device=="cpu":
|
13 |
+
a = torch.load(ckpt_a, map_location=torch.device('cpu'))["state_dict"]
|
14 |
+
b = torch.load(ckpt_b, map_location=torch.device('cpu'))["state_dict"]
|
15 |
+
else:
|
16 |
+
a = torch.load(ckpt_a)["state_dict"]
|
17 |
+
b = torch.load(ckpt_b)["state_dict"]
|
18 |
+
a_sum = 0
|
19 |
+
b_sum = 0
|
20 |
+
difference_count = 0
|
21 |
+
for k in a.keys():
|
22 |
+
if key_include is not None and key_include not in k:
|
23 |
+
continue
|
24 |
+
if key_exclude is not None and key_exclude in k:
|
25 |
+
continue
|
26 |
+
if k in b.keys():
|
27 |
+
a_sum += torch.sum(a[k])
|
28 |
+
b_sum += torch.sum(b[k])
|
29 |
+
if verbose:
|
30 |
+
if torch.sum(a[k]) != torch.sum(b[k]):
|
31 |
+
print(f"key {k} is different")
|
32 |
+
difference_count += 1
|
33 |
+
print("a_sum: ", a_sum)
|
34 |
+
print("b_sum: ", b_sum)
|
35 |
+
print("diff: ", a_sum - b_sum)
|
36 |
+
if verbose:
|
37 |
+
print("difference_count: ", difference_count)
|
38 |
+
return bool(a_sum - b_sum)
|
39 |
+
|
40 |
+
# Transformer no freeze:
|
41 |
+
# check_ckpt_diff("/fsx/clap_logs/2022_09_11-19_37_08-model_PANN-14-lr_0.001-b_160-j_4-p_fp32/checkpoints/epoch_10.pt", "/fsx/clap_logs/2022_09_11-19_37_08-model_PANN-14-lr_0.001-b_160-j_4-p_fp32/checkpoints/epoch_100.pt", "text_branch.resblocks")
|
42 |
+
|
43 |
+
check_ckpt_diff("/fsx/clap_logs/2022_09_29-23_42_40-model_PANN-14-lr_0.001-b_160-j_4-p_fp32/checkpoints/epoch_1.pt",
|
44 |
+
"/fsx/clap_logs/2022_09_29-23_42_40-model_PANN-14-lr_0.001-b_160-j_4-p_fp32/checkpoints/epoch_2.pt",
|
45 |
+
"text_branch.resblocks")
|
46 |
+
|
47 |
+
# key module.text_branch.resblocks.0.attn.in_proj_weight is different
|
48 |
+
# key module.text_branch.resblocks.0.attn.in_proj_bias is different
|
49 |
+
# key module.text_branch.resblocks.0.attn.out_proj.weight is different
|
50 |
+
# key module.text_branch.resblocks.0.attn.out_proj.bias is different
|
51 |
+
# key module.text_branch.resblocks.0.ln_1.weight is different
|
52 |
+
# key module.text_branch.resblocks.0.ln_1.bias is different
|
53 |
+
# key module.text_branch.resblocks.0.mlp.c_fc.weight is different
|
54 |
+
# key module.text_branch.resblocks.0.mlp.c_fc.bias is different
|
55 |
+
# key module.text_branch.resblocks.0.mlp.c_proj.weight is different
|
56 |
+
# key module.text_branch.resblocks.0.mlp.c_proj.bias is different
|
57 |
+
# key module.text_branch.resblocks.0.ln_2.weight is different
|
58 |
+
# key module.text_branch.resblocks.0.ln_2.bias is different
|
59 |
+
# key module.text_branch.resblocks.1.attn.in_proj_weight is different
|
60 |
+
# key module.text_branch.resblocks.1.attn.in_proj_bias is different
|
61 |
+
# key module.text_branch.resblocks.1.attn.out_proj.weight is different
|
62 |
+
# key module.text_branch.resblocks.1.attn.out_proj.bias is different
|
63 |
+
# key module.text_branch.resblocks.1.ln_1.weight is different
|
64 |
+
# key module.text_branch.resblocks.1.ln_1.bias is different
|
65 |
+
# key module.text_branch.resblocks.1.mlp.c_fc.weight is different
|
66 |
+
# key module.text_branch.resblocks.1.mlp.c_fc.bias is different
|
67 |
+
# key module.text_branch.resblocks.1.mlp.c_proj.weight is different
|
68 |
+
# key module.text_branch.resblocks.1.mlp.c_proj.bias is different
|
69 |
+
# key module.text_branch.resblocks.1.ln_2.weight is different
|
70 |
+
# key module.text_branch.resblocks.1.ln_2.bias is different
|
71 |
+
# key module.text_branch.resblocks.2.attn.in_proj_weight is different
|
72 |
+
# key module.text_branch.resblocks.2.attn.in_proj_bias is different
|
73 |
+
# key module.text_branch.resblocks.2.attn.out_proj.weight is different
|
74 |
+
# key module.text_branch.resblocks.2.attn.out_proj.bias is different
|
75 |
+
# key module.text_branch.resblocks.2.ln_1.weight is different
|
76 |
+
# key module.text_branch.resblocks.2.ln_1.bias is different
|
77 |
+
# key module.text_branch.resblocks.2.mlp.c_fc.weight is different
|
78 |
+
# key module.text_branch.resblocks.2.mlp.c_fc.bias is different
|
79 |
+
# key module.text_branch.resblocks.2.mlp.c_proj.weight is different
|
80 |
+
# key module.text_branch.resblocks.2.mlp.c_proj.bias is different
|
81 |
+
# key module.text_branch.resblocks.2.ln_2.weight is different
|
82 |
+
# key module.text_branch.resblocks.2.ln_2.bias is different
|
83 |
+
# key module.text_branch.resblocks.3.attn.in_proj_weight is different
|
84 |
+
# key module.text_branch.resblocks.3.attn.in_proj_bias is different
|
85 |
+
# key module.text_branch.resblocks.3.attn.out_proj.weight is different
|
86 |
+
# key module.text_branch.resblocks.3.attn.out_proj.bias is different
|
87 |
+
# key module.text_branch.resblocks.3.ln_1.weight is different
|
88 |
+
# key module.text_branch.resblocks.3.ln_1.bias is different
|
89 |
+
# key module.text_branch.resblocks.3.mlp.c_fc.weight is different
|
90 |
+
# key module.text_branch.resblocks.3.mlp.c_fc.bias is different
|
91 |
+
# key module.text_branch.resblocks.3.mlp.c_proj.weight is different
|
92 |
+
# key module.text_branch.resblocks.3.mlp.c_proj.bias is different
|
93 |
+
# key module.text_branch.resblocks.3.ln_2.weight is different
|
94 |
+
# key module.text_branch.resblocks.3.ln_2.bias is different
|
95 |
+
# key module.text_branch.resblocks.4.attn.in_proj_weight is different
|
96 |
+
# key module.text_branch.resblocks.4.attn.in_proj_bias is different
|
97 |
+
# key module.text_branch.resblocks.4.attn.out_proj.weight is different
|
98 |
+
# key module.text_branch.resblocks.4.attn.out_proj.bias is different
|
99 |
+
# key module.text_branch.resblocks.4.ln_1.weight is different
|
100 |
+
# key module.text_branch.resblocks.4.ln_1.bias is different
|
101 |
+
# key module.text_branch.resblocks.4.mlp.c_fc.weight is different
|
102 |
+
# key module.text_branch.resblocks.4.mlp.c_fc.bias is different
|
103 |
+
# key module.text_branch.resblocks.4.mlp.c_proj.weight is different
|
104 |
+
# key module.text_branch.resblocks.4.mlp.c_proj.bias is different
|
105 |
+
# key module.text_branch.resblocks.4.ln_2.weight is different
|
106 |
+
# key module.text_branch.resblocks.4.ln_2.bias is different
|
107 |
+
# key module.text_branch.resblocks.5.attn.in_proj_weight is different
|
108 |
+
# key module.text_branch.resblocks.5.attn.in_proj_bias is different
|
109 |
+
# key module.text_branch.resblocks.5.attn.out_proj.weight is different
|
110 |
+
# key module.text_branch.resblocks.5.attn.out_proj.bias is different
|
111 |
+
# key module.text_branch.resblocks.5.ln_1.weight is different
|
112 |
+
# key module.text_branch.resblocks.5.ln_1.bias is different
|
113 |
+
# key module.text_branch.resblocks.5.mlp.c_fc.weight is different
|
114 |
+
# key module.text_branch.resblocks.5.mlp.c_fc.bias is different
|
115 |
+
# key module.text_branch.resblocks.5.mlp.c_proj.weight is different
|
116 |
+
# key module.text_branch.resblocks.5.mlp.c_proj.bias is different
|
117 |
+
# key module.text_branch.resblocks.5.ln_2.weight is different
|
118 |
+
# key module.text_branch.resblocks.5.ln_2.bias is different
|
119 |
+
# key module.text_branch.resblocks.6.attn.in_proj_weight is different
|
120 |
+
# key module.text_branch.resblocks.6.attn.in_proj_bias is different
|
121 |
+
# key module.text_branch.resblocks.6.attn.out_proj.weight is different
|
122 |
+
# key module.text_branch.resblocks.6.attn.out_proj.bias is different
|
123 |
+
# key module.text_branch.resblocks.6.ln_1.weight is different
|
124 |
+
# key module.text_branch.resblocks.6.ln_1.bias is different
|
125 |
+
# key module.text_branch.resblocks.6.mlp.c_fc.weight is different
|
126 |
+
# key module.text_branch.resblocks.6.mlp.c_fc.bias is different
|
127 |
+
# key module.text_branch.resblocks.6.mlp.c_proj.weight is different
|
128 |
+
# key module.text_branch.resblocks.6.mlp.c_proj.bias is different
|
129 |
+
# key module.text_branch.resblocks.6.ln_2.weight is different
|
130 |
+
# key module.text_branch.resblocks.6.ln_2.bias is different
|
131 |
+
# key module.text_branch.resblocks.7.attn.in_proj_weight is different
|
132 |
+
# key module.text_branch.resblocks.7.attn.in_proj_bias is different
|
133 |
+
# key module.text_branch.resblocks.7.attn.out_proj.weight is different
|
134 |
+
# key module.text_branch.resblocks.7.attn.out_proj.bias is different
|
135 |
+
# key module.text_branch.resblocks.7.ln_1.weight is different
|
136 |
+
# key module.text_branch.resblocks.7.ln_1.bias is different
|
137 |
+
# key module.text_branch.resblocks.7.mlp.c_fc.weight is different
|
138 |
+
# key module.text_branch.resblocks.7.mlp.c_fc.bias is different
|
139 |
+
# key module.text_branch.resblocks.7.mlp.c_proj.weight is different
|
140 |
+
# key module.text_branch.resblocks.7.mlp.c_proj.bias is different
|
141 |
+
# key module.text_branch.resblocks.7.ln_2.weight is different
|
142 |
+
# key module.text_branch.resblocks.7.ln_2.bias is different
|
143 |
+
# key module.text_branch.resblocks.8.attn.in_proj_weight is different
|
144 |
+
# key module.text_branch.resblocks.8.attn.in_proj_bias is different
|
145 |
+
# key module.text_branch.resblocks.8.attn.out_proj.weight is different
|
146 |
+
# key module.text_branch.resblocks.8.attn.out_proj.bias is different
|
147 |
+
# key module.text_branch.resblocks.8.ln_1.weight is different
|
148 |
+
# key module.text_branch.resblocks.8.ln_1.bias is different
|
149 |
+
# key module.text_branch.resblocks.8.mlp.c_fc.weight is different
|
150 |
+
# key module.text_branch.resblocks.8.mlp.c_fc.bias is different
|
151 |
+
# key module.text_branch.resblocks.8.mlp.c_proj.weight is different
|
152 |
+
# key module.text_branch.resblocks.8.mlp.c_proj.bias is different
|
153 |
+
# key module.text_branch.resblocks.8.ln_2.weight is different
|
154 |
+
# key module.text_branch.resblocks.8.ln_2.bias is different
|
155 |
+
# key module.text_branch.resblocks.9.attn.in_proj_weight is different
|
156 |
+
# key module.text_branch.resblocks.9.attn.in_proj_bias is different
|
157 |
+
# key module.text_branch.resblocks.9.attn.out_proj.weight is different
|
158 |
+
# key module.text_branch.resblocks.9.attn.out_proj.bias is different
|
159 |
+
# key module.text_branch.resblocks.9.ln_1.weight is different
|
160 |
+
# key module.text_branch.resblocks.9.ln_1.bias is different
|
161 |
+
# key module.text_branch.resblocks.9.mlp.c_fc.weight is different
|
162 |
+
# key module.text_branch.resblocks.9.mlp.c_fc.bias is different
|
163 |
+
# key module.text_branch.resblocks.9.mlp.c_proj.weight is different
|
164 |
+
# key module.text_branch.resblocks.9.mlp.c_proj.bias is different
|
165 |
+
# key module.text_branch.resblocks.9.ln_2.weight is different
|
166 |
+
# key module.text_branch.resblocks.9.ln_2.bias is different
|
167 |
+
# key module.text_branch.resblocks.10.attn.in_proj_weight is different
|
168 |
+
# key module.text_branch.resblocks.10.attn.in_proj_bias is different
|
169 |
+
# key module.text_branch.resblocks.10.attn.out_proj.weight is different
|
170 |
+
# key module.text_branch.resblocks.10.attn.out_proj.bias is different
|
171 |
+
# key module.text_branch.resblocks.10.ln_1.weight is different
|
172 |
+
# key module.text_branch.resblocks.10.ln_1.bias is different
|
173 |
+
# key module.text_branch.resblocks.10.mlp.c_fc.weight is different
|
174 |
+
# key module.text_branch.resblocks.10.mlp.c_fc.bias is different
|
175 |
+
# key module.text_branch.resblocks.10.mlp.c_proj.weight is different
|
176 |
+
# key module.text_branch.resblocks.10.mlp.c_proj.bias is different
|
177 |
+
# key module.text_branch.resblocks.10.ln_2.weight is different
|
178 |
+
# key module.text_branch.resblocks.10.ln_2.bias is different
|
179 |
+
# key module.text_branch.resblocks.11.attn.in_proj_weight is different
|
180 |
+
# key module.text_branch.resblocks.11.attn.in_proj_bias is different
|
181 |
+
# key module.text_branch.resblocks.11.attn.out_proj.weight is different
|
182 |
+
# key module.text_branch.resblocks.11.attn.out_proj.bias is different
|
183 |
+
# key module.text_branch.resblocks.11.ln_1.weight is different
|
184 |
+
# key module.text_branch.resblocks.11.ln_1.bias is different
|
185 |
+
# key module.text_branch.resblocks.11.mlp.c_fc.weight is different
|
186 |
+
# key module.text_branch.resblocks.11.mlp.c_fc.bias is different
|
187 |
+
# key module.text_branch.resblocks.11.mlp.c_proj.weight is different
|
188 |
+
# key module.text_branch.resblocks.11.mlp.c_proj.bias is different
|
189 |
+
# key module.text_branch.resblocks.11.ln_2.weight is different
|
190 |
+
# key module.text_branch.resblocks.11.ln_2.bias is different
|
191 |
+
# a_sum: tensor(12113.6445)
|
192 |
+
# b_sum: tensor(9883.4424)
|
193 |
+
# diff: tensor(2230.2021)
|
194 |
+
# True
|
195 |
+
|
196 |
+
|
197 |
+
# Transformer freeze:
|
198 |
+
# check_ckpt_diff("/fsx/clap_logs/2022_09_16-18_55_10-model_PANN-14-lr_0.001-b_160-j_4-p_fp32/checkpoints/epoch_10.pt", "/fsx/clap_logs/2022_09_16-18_55_10-model_PANN-14-lr_0.001-b_160-j_4-p_fp32/checkpoints/epoch_100.pt", "text_branch.resblocks")
|
199 |
+
|
200 |
+
# key module.text_branch.resblocks.0.attn.in_proj_weight is different
|
201 |
+
# key module.text_branch.resblocks.0.attn.in_proj_bias is different
|
202 |
+
# key module.text_branch.resblocks.0.attn.out_proj.weight is different
|
203 |
+
# key module.text_branch.resblocks.0.attn.out_proj.bias is different
|
204 |
+
# key module.text_branch.resblocks.0.ln_1.weight is different
|
205 |
+
# key module.text_branch.resblocks.0.ln_1.bias is different
|
206 |
+
# key module.text_branch.resblocks.0.mlp.c_fc.weight is different
|
207 |
+
# key module.text_branch.resblocks.0.mlp.c_fc.bias is different
|
208 |
+
# key module.text_branch.resblocks.0.mlp.c_proj.weight is different
|
209 |
+
# key module.text_branch.resblocks.0.mlp.c_proj.bias is different
|
210 |
+
# key module.text_branch.resblocks.0.ln_2.weight is different
|
211 |
+
# key module.text_branch.resblocks.0.ln_2.bias is different
|
212 |
+
# key module.text_branch.resblocks.1.attn.in_proj_weight is different
|
213 |
+
# key module.text_branch.resblocks.1.attn.in_proj_bias is different
|
214 |
+
# key module.text_branch.resblocks.1.attn.out_proj.weight is different
|
215 |
+
# key module.text_branch.resblocks.1.attn.out_proj.bias is different
|
216 |
+
# key module.text_branch.resblocks.1.ln_1.weight is different
|
217 |
+
# key module.text_branch.resblocks.1.ln_1.bias is different
|
218 |
+
# key module.text_branch.resblocks.1.mlp.c_fc.weight is different
|
219 |
+
# key module.text_branch.resblocks.1.mlp.c_fc.bias is different
|
220 |
+
# key module.text_branch.resblocks.1.mlp.c_proj.weight is different
|
221 |
+
# key module.text_branch.resblocks.1.mlp.c_proj.bias is different
|
222 |
+
# key module.text_branch.resblocks.1.ln_2.weight is different
|
223 |
+
# key module.text_branch.resblocks.1.ln_2.bias is different
|
224 |
+
# key module.text_branch.resblocks.2.attn.in_proj_weight is different
|
225 |
+
# key module.text_branch.resblocks.2.attn.in_proj_bias is different
|
226 |
+
# key module.text_branch.resblocks.2.attn.out_proj.weight is different
|
227 |
+
# key module.text_branch.resblocks.2.attn.out_proj.bias is different
|
228 |
+
# key module.text_branch.resblocks.2.ln_1.weight is different
|
229 |
+
# key module.text_branch.resblocks.2.ln_1.bias is different
|
230 |
+
# key module.text_branch.resblocks.2.mlp.c_fc.weight is different
|
231 |
+
# key module.text_branch.resblocks.2.mlp.c_fc.bias is different
|
232 |
+
# key module.text_branch.resblocks.2.mlp.c_proj.weight is different
|
233 |
+
# key module.text_branch.resblocks.2.mlp.c_proj.bias is different
|
234 |
+
# key module.text_branch.resblocks.2.ln_2.weight is different
|
235 |
+
# key module.text_branch.resblocks.2.ln_2.bias is different
|
236 |
+
# key module.text_branch.resblocks.3.attn.in_proj_weight is different
|
237 |
+
# key module.text_branch.resblocks.3.attn.in_proj_bias is different
|
238 |
+
# key module.text_branch.resblocks.3.attn.out_proj.weight is different
|
239 |
+
# key module.text_branch.resblocks.3.attn.out_proj.bias is different
|
240 |
+
# key module.text_branch.resblocks.3.ln_1.weight is different
|
241 |
+
# key module.text_branch.resblocks.3.ln_1.bias is different
|
242 |
+
# key module.text_branch.resblocks.3.mlp.c_fc.weight is different
|
243 |
+
# key module.text_branch.resblocks.3.mlp.c_fc.bias is different
|
244 |
+
# key module.text_branch.resblocks.3.mlp.c_proj.weight is different
|
245 |
+
# key module.text_branch.resblocks.3.mlp.c_proj.bias is different
|
246 |
+
# key module.text_branch.resblocks.3.ln_2.weight is different
|
247 |
+
# key module.text_branch.resblocks.3.ln_2.bias is different
|
248 |
+
# key module.text_branch.resblocks.4.attn.in_proj_weight is different
|
249 |
+
# key module.text_branch.resblocks.4.attn.in_proj_bias is different
|
250 |
+
# key module.text_branch.resblocks.4.attn.out_proj.weight is different
|
251 |
+
# key module.text_branch.resblocks.4.attn.out_proj.bias is different
|
252 |
+
# key module.text_branch.resblocks.4.ln_1.weight is different
|
253 |
+
# key module.text_branch.resblocks.4.ln_1.bias is different
|
254 |
+
# key module.text_branch.resblocks.4.mlp.c_fc.weight is different
|
255 |
+
# key module.text_branch.resblocks.4.mlp.c_fc.bias is different
|
256 |
+
# key module.text_branch.resblocks.4.mlp.c_proj.weight is different
|
257 |
+
# key module.text_branch.resblocks.4.mlp.c_proj.bias is different
|
258 |
+
# key module.text_branch.resblocks.4.ln_2.weight is different
|
259 |
+
# key module.text_branch.resblocks.4.ln_2.bias is different
|
260 |
+
# key module.text_branch.resblocks.5.attn.in_proj_weight is different
|
261 |
+
# key module.text_branch.resblocks.5.attn.in_proj_bias is different
|
262 |
+
# key module.text_branch.resblocks.5.attn.out_proj.weight is different
|
263 |
+
# key module.text_branch.resblocks.5.attn.out_proj.bias is different
|
264 |
+
# key module.text_branch.resblocks.5.ln_1.weight is different
|
265 |
+
# key module.text_branch.resblocks.5.ln_1.bias is different
|
266 |
+
# key module.text_branch.resblocks.5.mlp.c_fc.weight is different
|
267 |
+
# key module.text_branch.resblocks.5.mlp.c_fc.bias is different
|
268 |
+
# key module.text_branch.resblocks.5.mlp.c_proj.weight is different
|
269 |
+
# key module.text_branch.resblocks.5.mlp.c_proj.bias is different
|
270 |
+
# key module.text_branch.resblocks.5.ln_2.weight is different
|
271 |
+
# key module.text_branch.resblocks.5.ln_2.bias is different
|
272 |
+
# key module.text_branch.resblocks.6.attn.in_proj_weight is different
|
273 |
+
# key module.text_branch.resblocks.6.attn.in_proj_bias is different
|
274 |
+
# key module.text_branch.resblocks.6.attn.out_proj.weight is different
|
275 |
+
# key module.text_branch.resblocks.6.attn.out_proj.bias is different
|
276 |
+
# key module.text_branch.resblocks.6.ln_1.weight is different
|
277 |
+
# key module.text_branch.resblocks.6.ln_1.bias is different
|
278 |
+
# key module.text_branch.resblocks.6.mlp.c_fc.weight is different
|
279 |
+
# key module.text_branch.resblocks.6.mlp.c_fc.bias is different
|
280 |
+
# key module.text_branch.resblocks.6.mlp.c_proj.weight is different
|
281 |
+
# key module.text_branch.resblocks.6.mlp.c_proj.bias is different
|
282 |
+
# key module.text_branch.resblocks.6.ln_2.weight is different
|
283 |
+
# key module.text_branch.resblocks.6.ln_2.bias is different
|
284 |
+
# key module.text_branch.resblocks.7.attn.in_proj_weight is different
|
285 |
+
# key module.text_branch.resblocks.7.attn.in_proj_bias is different
|
286 |
+
# key module.text_branch.resblocks.7.attn.out_proj.weight is different
|
287 |
+
# key module.text_branch.resblocks.7.attn.out_proj.bias is different
|
288 |
+
# key module.text_branch.resblocks.7.ln_1.weight is different
|
289 |
+
# key module.text_branch.resblocks.7.ln_1.bias is different
|
290 |
+
# key module.text_branch.resblocks.7.mlp.c_fc.weight is different
|
291 |
+
# key module.text_branch.resblocks.7.mlp.c_fc.bias is different
|
292 |
+
# key module.text_branch.resblocks.7.mlp.c_proj.weight is different
|
293 |
+
# key module.text_branch.resblocks.7.mlp.c_proj.bias is different
|
294 |
+
# key module.text_branch.resblocks.7.ln_2.weight is different
|
295 |
+
# key module.text_branch.resblocks.7.ln_2.bias is different
|
296 |
+
# key module.text_branch.resblocks.8.attn.in_proj_weight is different
|
297 |
+
# key module.text_branch.resblocks.8.attn.in_proj_bias is different
|
298 |
+
# key module.text_branch.resblocks.8.attn.out_proj.weight is different
|
299 |
+
# key module.text_branch.resblocks.8.attn.out_proj.bias is different
|
300 |
+
# key module.text_branch.resblocks.8.ln_1.weight is different
|
301 |
+
# key module.text_branch.resblocks.8.ln_1.bias is different
|
302 |
+
# key module.text_branch.resblocks.8.mlp.c_fc.weight is different
|
303 |
+
# key module.text_branch.resblocks.8.mlp.c_fc.bias is different
|
304 |
+
# key module.text_branch.resblocks.8.mlp.c_proj.weight is different
|
305 |
+
# key module.text_branch.resblocks.8.mlp.c_proj.bias is different
|
306 |
+
# key module.text_branch.resblocks.8.ln_2.weight is different
|
307 |
+
# key module.text_branch.resblocks.8.ln_2.bias is different
|
308 |
+
# key module.text_branch.resblocks.9.attn.in_proj_weight is different
|
309 |
+
# key module.text_branch.resblocks.9.attn.in_proj_bias is different
|
310 |
+
# key module.text_branch.resblocks.9.attn.out_proj.weight is different
|
311 |
+
# key module.text_branch.resblocks.9.attn.out_proj.bias is different
|
312 |
+
# key module.text_branch.resblocks.9.ln_1.weight is different
|
313 |
+
# key module.text_branch.resblocks.9.ln_1.bias is different
|
314 |
+
# key module.text_branch.resblocks.9.mlp.c_fc.weight is different
|
315 |
+
# key module.text_branch.resblocks.9.mlp.c_fc.bias is different
|
316 |
+
# key module.text_branch.resblocks.9.mlp.c_proj.weight is different
|
317 |
+
# key module.text_branch.resblocks.9.mlp.c_proj.bias is different
|
318 |
+
# key module.text_branch.resblocks.9.ln_2.weight is different
|
319 |
+
# key module.text_branch.resblocks.9.ln_2.bias is different
|
320 |
+
# key module.text_branch.resblocks.10.attn.in_proj_weight is different
|
321 |
+
# key module.text_branch.resblocks.10.attn.in_proj_bias is different
|
322 |
+
# key module.text_branch.resblocks.10.attn.out_proj.weight is different
|
323 |
+
# key module.text_branch.resblocks.10.attn.out_proj.bias is different
|
324 |
+
# key module.text_branch.resblocks.10.ln_1.weight is different
|
325 |
+
# key module.text_branch.resblocks.10.ln_1.bias is different
|
326 |
+
# key module.text_branch.resblocks.10.mlp.c_fc.weight is different
|
327 |
+
# key module.text_branch.resblocks.10.mlp.c_fc.bias is different
|
328 |
+
# key module.text_branch.resblocks.10.mlp.c_proj.weight is different
|
329 |
+
# key module.text_branch.resblocks.10.mlp.c_proj.bias is different
|
330 |
+
# key module.text_branch.resblocks.10.ln_2.weight is different
|
331 |
+
# key module.text_branch.resblocks.10.ln_2.bias is different
|
332 |
+
# key module.text_branch.resblocks.11.attn.in_proj_weight is different
|
333 |
+
# key module.text_branch.resblocks.11.attn.in_proj_bias is different
|
334 |
+
# key module.text_branch.resblocks.11.attn.out_proj.weight is different
|
335 |
+
# key module.text_branch.resblocks.11.attn.out_proj.bias is different
|
336 |
+
# key module.text_branch.resblocks.11.ln_1.weight is different
|
337 |
+
# key module.text_branch.resblocks.11.ln_1.bias is different
|
338 |
+
# key module.text_branch.resblocks.11.mlp.c_fc.weight is different
|
339 |
+
# key module.text_branch.resblocks.11.mlp.c_fc.bias is different
|
340 |
+
# key module.text_branch.resblocks.11.mlp.c_proj.weight is different
|
341 |
+
# key module.text_branch.resblocks.11.mlp.c_proj.bias is different
|
342 |
+
# key module.text_branch.resblocks.11.ln_2.weight is different
|
343 |
+
# key module.text_branch.resblocks.11.ln_2.bias is different
|
344 |
+
# a_sum: tensor(12133.6348)
|
345 |
+
# b_sum: tensor(10423.9521)
|
346 |
+
# diff: tensor(1709.6826)
|
347 |
+
# True
|
348 |
+
|
349 |
+
|
350 |
+
# bert no freeze:
|
351 |
+
# check_ckpt_diff("/fsx/clap_logs/2022_09_14-02_33_11-model_PANN-14-lr_0.0001-b_160-j_4-p_fp32/checkpoints/epoch_10.pt", "/fsx/clap_logs/2022_09_14-02_33_11-model_PANN-14-lr_0.0001-b_160-j_4-p_fp32/checkpoints/epoch_100.pt", "text_branch.encoder")
|
352 |
+
|
353 |
+
# key module.text_branch.encoder.layer.0.attention.self.query.weight is different
|
354 |
+
# key module.text_branch.encoder.layer.0.attention.self.query.bias is different
|
355 |
+
# key module.text_branch.encoder.layer.0.attention.self.key.weight is different
|
356 |
+
# key module.text_branch.encoder.layer.0.attention.self.key.bias is different
|
357 |
+
# key module.text_branch.encoder.layer.0.attention.self.value.weight is different
|
358 |
+
# key module.text_branch.encoder.layer.0.attention.self.value.bias is different
|
359 |
+
# key module.text_branch.encoder.layer.0.attention.output.dense.weight is different
|
360 |
+
# key module.text_branch.encoder.layer.0.attention.output.dense.bias is different
|
361 |
+
# key module.text_branch.encoder.layer.0.attention.output.LayerNorm.weight is different
|
362 |
+
# key module.text_branch.encoder.layer.0.attention.output.LayerNorm.bias is different
|
363 |
+
# key module.text_branch.encoder.layer.0.intermediate.dense.weight is different
|
364 |
+
# key module.text_branch.encoder.layer.0.intermediate.dense.bias is different
|
365 |
+
# key module.text_branch.encoder.layer.0.output.dense.weight is different
|
366 |
+
# key module.text_branch.encoder.layer.0.output.dense.bias is different
|
367 |
+
# key module.text_branch.encoder.layer.0.output.LayerNorm.weight is different
|
368 |
+
# key module.text_branch.encoder.layer.0.output.LayerNorm.bias is different
|
369 |
+
# key module.text_branch.encoder.layer.1.attention.self.query.weight is different
|
370 |
+
# key module.text_branch.encoder.layer.1.attention.self.query.bias is different
|
371 |
+
# key module.text_branch.encoder.layer.1.attention.self.key.weight is different
|
372 |
+
# key module.text_branch.encoder.layer.1.attention.self.key.bias is different
|
373 |
+
# key module.text_branch.encoder.layer.1.attention.self.value.weight is different
|
374 |
+
# key module.text_branch.encoder.layer.1.attention.self.value.bias is different
|
375 |
+
# key module.text_branch.encoder.layer.1.attention.output.dense.weight is different
|
376 |
+
# key module.text_branch.encoder.layer.1.attention.output.dense.bias is different
|
377 |
+
# key module.text_branch.encoder.layer.1.attention.output.LayerNorm.weight is different
|
378 |
+
# key module.text_branch.encoder.layer.1.attention.output.LayerNorm.bias is different
|
379 |
+
# key module.text_branch.encoder.layer.1.intermediate.dense.weight is different
|
380 |
+
# key module.text_branch.encoder.layer.1.intermediate.dense.bias is different
|
381 |
+
# key module.text_branch.encoder.layer.1.output.dense.weight is different
|
382 |
+
# key module.text_branch.encoder.layer.1.output.dense.bias is different
|
383 |
+
# key module.text_branch.encoder.layer.1.output.LayerNorm.weight is different
|
384 |
+
# key module.text_branch.encoder.layer.1.output.LayerNorm.bias is different
|
385 |
+
# key module.text_branch.encoder.layer.2.attention.self.query.weight is different
|
386 |
+
# key module.text_branch.encoder.layer.2.attention.self.query.bias is different
|
387 |
+
# key module.text_branch.encoder.layer.2.attention.self.key.weight is different
|
388 |
+
# key module.text_branch.encoder.layer.2.attention.self.key.bias is different
|
389 |
+
# key module.text_branch.encoder.layer.2.attention.self.value.weight is different
|
390 |
+
# key module.text_branch.encoder.layer.2.attention.self.value.bias is different
|
391 |
+
# key module.text_branch.encoder.layer.2.attention.output.dense.weight is different
|
392 |
+
# key module.text_branch.encoder.layer.2.attention.output.dense.bias is different
|
393 |
+
# key module.text_branch.encoder.layer.2.attention.output.LayerNorm.weight is different
|
394 |
+
# key module.text_branch.encoder.layer.2.attention.output.LayerNorm.bias is different
|
395 |
+
# key module.text_branch.encoder.layer.2.intermediate.dense.weight is different
|
396 |
+
# key module.text_branch.encoder.layer.2.intermediate.dense.bias is different
|
397 |
+
# key module.text_branch.encoder.layer.2.output.dense.weight is different
|
398 |
+
# key module.text_branch.encoder.layer.2.output.dense.bias is different
|
399 |
+
# key module.text_branch.encoder.layer.2.output.LayerNorm.weight is different
|
400 |
+
# key module.text_branch.encoder.layer.2.output.LayerNorm.bias is different
|
401 |
+
# key module.text_branch.encoder.layer.3.attention.self.query.weight is different
|
402 |
+
# key module.text_branch.encoder.layer.3.attention.self.query.bias is different
|
403 |
+
# key module.text_branch.encoder.layer.3.attention.self.key.weight is different
|
404 |
+
# key module.text_branch.encoder.layer.3.attention.self.key.bias is different
|
405 |
+
# key module.text_branch.encoder.layer.3.attention.self.value.weight is different
|
406 |
+
# key module.text_branch.encoder.layer.3.attention.self.value.bias is different
|
407 |
+
# key module.text_branch.encoder.layer.3.attention.output.dense.weight is different
|
408 |
+
# key module.text_branch.encoder.layer.3.attention.output.dense.bias is different
|
409 |
+
# key module.text_branch.encoder.layer.3.attention.output.LayerNorm.weight is different
|
410 |
+
# key module.text_branch.encoder.layer.3.attention.output.LayerNorm.bias is different
|
411 |
+
# key module.text_branch.encoder.layer.3.intermediate.dense.weight is different
|
412 |
+
# key module.text_branch.encoder.layer.3.intermediate.dense.bias is different
|
413 |
+
# key module.text_branch.encoder.layer.3.output.dense.weight is different
|
414 |
+
# key module.text_branch.encoder.layer.3.output.dense.bias is different
|
415 |
+
# key module.text_branch.encoder.layer.3.output.LayerNorm.weight is different
|
416 |
+
# key module.text_branch.encoder.layer.3.output.LayerNorm.bias is different
|
417 |
+
# key module.text_branch.encoder.layer.4.attention.self.query.weight is different
|
418 |
+
# key module.text_branch.encoder.layer.4.attention.self.query.bias is different
|
419 |
+
# key module.text_branch.encoder.layer.4.attention.self.key.weight is different
|
420 |
+
# key module.text_branch.encoder.layer.4.attention.self.key.bias is different
|
421 |
+
# key module.text_branch.encoder.layer.4.attention.self.value.weight is different
|
422 |
+
# key module.text_branch.encoder.layer.4.attention.self.value.bias is different
|
423 |
+
# key module.text_branch.encoder.layer.4.attention.output.dense.weight is different
|
424 |
+
# key module.text_branch.encoder.layer.4.attention.output.dense.bias is different
|
425 |
+
# key module.text_branch.encoder.layer.4.attention.output.LayerNorm.weight is different
|
426 |
+
# key module.text_branch.encoder.layer.4.attention.output.LayerNorm.bias is different
|
427 |
+
# key module.text_branch.encoder.layer.4.intermediate.dense.weight is different
|
428 |
+
# key module.text_branch.encoder.layer.4.intermediate.dense.bias is different
|
429 |
+
# key module.text_branch.encoder.layer.4.output.dense.weight is different
|
430 |
+
# key module.text_branch.encoder.layer.4.output.dense.bias is different
|
431 |
+
# key module.text_branch.encoder.layer.4.output.LayerNorm.weight is different
|
432 |
+
# key module.text_branch.encoder.layer.4.output.LayerNorm.bias is different
|
433 |
+
# key module.text_branch.encoder.layer.5.attention.self.query.weight is different
|
434 |
+
# key module.text_branch.encoder.layer.5.attention.self.query.bias is different
|
435 |
+
# key module.text_branch.encoder.layer.5.attention.self.key.weight is different
|
436 |
+
# key module.text_branch.encoder.layer.5.attention.self.key.bias is different
|
437 |
+
# key module.text_branch.encoder.layer.5.attention.self.value.weight is different
|
438 |
+
# key module.text_branch.encoder.layer.5.attention.self.value.bias is different
|
439 |
+
# key module.text_branch.encoder.layer.5.attention.output.dense.weight is different
|
440 |
+
# key module.text_branch.encoder.layer.5.attention.output.dense.bias is different
|
441 |
+
# key module.text_branch.encoder.layer.5.attention.output.LayerNorm.weight is different
|
442 |
+
# key module.text_branch.encoder.layer.5.attention.output.LayerNorm.bias is different
|
443 |
+
# key module.text_branch.encoder.layer.5.intermediate.dense.weight is different
|
444 |
+
# key module.text_branch.encoder.layer.5.intermediate.dense.bias is different
|
445 |
+
# key module.text_branch.encoder.layer.5.output.dense.weight is different
|
446 |
+
# key module.text_branch.encoder.layer.5.output.dense.bias is different
|
447 |
+
# key module.text_branch.encoder.layer.5.output.LayerNorm.weight is different
|
448 |
+
# key module.text_branch.encoder.layer.5.output.LayerNorm.bias is different
|
449 |
+
# key module.text_branch.encoder.layer.6.attention.self.query.weight is different
|
450 |
+
# key module.text_branch.encoder.layer.6.attention.self.query.bias is different
|
451 |
+
# key module.text_branch.encoder.layer.6.attention.self.key.weight is different
|
452 |
+
# key module.text_branch.encoder.layer.6.attention.self.key.bias is different
|
453 |
+
# key module.text_branch.encoder.layer.6.attention.self.value.weight is different
|
454 |
+
# key module.text_branch.encoder.layer.6.attention.self.value.bias is different
|
455 |
+
# key module.text_branch.encoder.layer.6.attention.output.dense.weight is different
|
456 |
+
# key module.text_branch.encoder.layer.6.attention.output.dense.bias is different
|
457 |
+
# key module.text_branch.encoder.layer.6.attention.output.LayerNorm.weight is different
|
458 |
+
# key module.text_branch.encoder.layer.6.attention.output.LayerNorm.bias is different
|
459 |
+
# key module.text_branch.encoder.layer.6.intermediate.dense.weight is different
|
460 |
+
# key module.text_branch.encoder.layer.6.intermediate.dense.bias is different
|
461 |
+
# key module.text_branch.encoder.layer.6.output.dense.weight is different
|
462 |
+
# key module.text_branch.encoder.layer.6.output.dense.bias is different
|
463 |
+
# key module.text_branch.encoder.layer.6.output.LayerNorm.weight is different
|
464 |
+
# key module.text_branch.encoder.layer.6.output.LayerNorm.bias is different
|
465 |
+
# key module.text_branch.encoder.layer.7.attention.self.query.weight is different
|
466 |
+
# key module.text_branch.encoder.layer.7.attention.self.query.bias is different
|
467 |
+
# key module.text_branch.encoder.layer.7.attention.self.key.weight is different
|
468 |
+
# key module.text_branch.encoder.layer.7.attention.self.key.bias is different
|
469 |
+
# key module.text_branch.encoder.layer.7.attention.self.value.weight is different
|
470 |
+
# key module.text_branch.encoder.layer.7.attention.self.value.bias is different
|
471 |
+
# key module.text_branch.encoder.layer.7.attention.output.dense.weight is different
|
472 |
+
# key module.text_branch.encoder.layer.7.attention.output.dense.bias is different
|
473 |
+
# key module.text_branch.encoder.layer.7.attention.output.LayerNorm.weight is different
|
474 |
+
# key module.text_branch.encoder.layer.7.attention.output.LayerNorm.bias is different
|
475 |
+
# key module.text_branch.encoder.layer.7.intermediate.dense.weight is different
|
476 |
+
# key module.text_branch.encoder.layer.7.intermediate.dense.bias is different
|
477 |
+
# key module.text_branch.encoder.layer.7.output.dense.weight is different
|
478 |
+
# key module.text_branch.encoder.layer.7.output.dense.bias is different
|
479 |
+
# key module.text_branch.encoder.layer.7.output.LayerNorm.weight is different
|
480 |
+
# key module.text_branch.encoder.layer.7.output.LayerNorm.bias is different
|
481 |
+
# key module.text_branch.encoder.layer.8.attention.self.query.weight is different
|
482 |
+
# key module.text_branch.encoder.layer.8.attention.self.query.bias is different
|
483 |
+
# key module.text_branch.encoder.layer.8.attention.self.key.weight is different
|
484 |
+
# key module.text_branch.encoder.layer.8.attention.self.key.bias is different
|
485 |
+
# key module.text_branch.encoder.layer.8.attention.self.value.weight is different
|
486 |
+
# key module.text_branch.encoder.layer.8.attention.self.value.bias is different
|
487 |
+
# key module.text_branch.encoder.layer.8.attention.output.dense.weight is different
|
488 |
+
# key module.text_branch.encoder.layer.8.attention.output.dense.bias is different
|
489 |
+
# key module.text_branch.encoder.layer.8.attention.output.LayerNorm.weight is different
|
490 |
+
# key module.text_branch.encoder.layer.8.attention.output.LayerNorm.bias is different
|
491 |
+
# key module.text_branch.encoder.layer.8.intermediate.dense.weight is different
|
492 |
+
# key module.text_branch.encoder.layer.8.intermediate.dense.bias is different
|
493 |
+
# key module.text_branch.encoder.layer.8.output.dense.weight is different
|
494 |
+
# key module.text_branch.encoder.layer.8.output.dense.bias is different
|
495 |
+
# key module.text_branch.encoder.layer.8.output.LayerNorm.weight is different
|
496 |
+
# key module.text_branch.encoder.layer.8.output.LayerNorm.bias is different
|
497 |
+
# key module.text_branch.encoder.layer.9.attention.self.query.weight is different
|
498 |
+
# key module.text_branch.encoder.layer.9.attention.self.query.bias is different
|
499 |
+
# key module.text_branch.encoder.layer.9.attention.self.key.weight is different
|
500 |
+
# key module.text_branch.encoder.layer.9.attention.self.key.bias is different
|
501 |
+
# key module.text_branch.encoder.layer.9.attention.self.value.weight is different
|
502 |
+
# key module.text_branch.encoder.layer.9.attention.self.value.bias is different
|
503 |
+
# key module.text_branch.encoder.layer.9.attention.output.dense.weight is different
|
504 |
+
# key module.text_branch.encoder.layer.9.attention.output.dense.bias is different
|
505 |
+
# key module.text_branch.encoder.layer.9.attention.output.LayerNorm.weight is different
|
506 |
+
# key module.text_branch.encoder.layer.9.attention.output.LayerNorm.bias is different
|
507 |
+
# key module.text_branch.encoder.layer.9.intermediate.dense.weight is different
|
508 |
+
# key module.text_branch.encoder.layer.9.intermediate.dense.bias is different
|
509 |
+
# key module.text_branch.encoder.layer.9.output.dense.weight is different
|
510 |
+
# key module.text_branch.encoder.layer.9.output.dense.bias is different
|
511 |
+
# key module.text_branch.encoder.layer.9.output.LayerNorm.weight is different
|
512 |
+
# key module.text_branch.encoder.layer.9.output.LayerNorm.bias is different
|
513 |
+
# key module.text_branch.encoder.layer.10.attention.self.query.weight is different
|
514 |
+
# key module.text_branch.encoder.layer.10.attention.self.query.bias is different
|
515 |
+
# key module.text_branch.encoder.layer.10.attention.self.key.weight is different
|
516 |
+
# key module.text_branch.encoder.layer.10.attention.self.key.bias is different
|
517 |
+
# key module.text_branch.encoder.layer.10.attention.self.value.weight is different
|
518 |
+
# key module.text_branch.encoder.layer.10.attention.self.value.bias is different
|
519 |
+
# key module.text_branch.encoder.layer.10.attention.output.dense.weight is different
|
520 |
+
# key module.text_branch.encoder.layer.10.attention.output.dense.bias is different
|
521 |
+
# key module.text_branch.encoder.layer.10.attention.output.LayerNorm.weight is different
|
522 |
+
# key module.text_branch.encoder.layer.10.attention.output.LayerNorm.bias is different
|
523 |
+
# key module.text_branch.encoder.layer.10.intermediate.dense.weight is different
|
524 |
+
# key module.text_branch.encoder.layer.10.intermediate.dense.bias is different
|
525 |
+
# key module.text_branch.encoder.layer.10.output.dense.weight is different
|
526 |
+
# key module.text_branch.encoder.layer.10.output.dense.bias is different
|
527 |
+
# key module.text_branch.encoder.layer.10.output.LayerNorm.weight is different
|
528 |
+
# key module.text_branch.encoder.layer.10.output.LayerNorm.bias is different
|
529 |
+
# key module.text_branch.encoder.layer.11.attention.self.query.weight is different
|
530 |
+
# key module.text_branch.encoder.layer.11.attention.self.query.bias is different
|
531 |
+
# key module.text_branch.encoder.layer.11.attention.self.key.weight is different
|
532 |
+
# key module.text_branch.encoder.layer.11.attention.self.key.bias is different
|
533 |
+
# key module.text_branch.encoder.layer.11.attention.self.value.weight is different
|
534 |
+
# key module.text_branch.encoder.layer.11.attention.self.value.bias is different
|
535 |
+
# key module.text_branch.encoder.layer.11.attention.output.dense.weight is different
|
536 |
+
# key module.text_branch.encoder.layer.11.attention.output.dense.bias is different
|
537 |
+
# key module.text_branch.encoder.layer.11.attention.output.LayerNorm.weight is different
|
538 |
+
# key module.text_branch.encoder.layer.11.attention.output.LayerNorm.bias is different
|
539 |
+
# key module.text_branch.encoder.layer.11.intermediate.dense.weight is different
|
540 |
+
# key module.text_branch.encoder.layer.11.intermediate.dense.bias is different
|
541 |
+
# key module.text_branch.encoder.layer.11.output.dense.weight is different
|
542 |
+
# key module.text_branch.encoder.layer.11.output.dense.bias is different
|
543 |
+
# key module.text_branch.encoder.layer.11.output.LayerNorm.weight is different
|
544 |
+
# key module.text_branch.encoder.layer.11.output.LayerNorm.bias is different
|
545 |
+
# a_sum: tensor(15185.1230)
|
546 |
+
# b_sum: tensor(15576.5596)
|
547 |
+
# diff: tensor(-391.4365)
|
548 |
+
# True
|
549 |
+
|
550 |
+
|
551 |
+
# bert freeze:
|
552 |
+
# check_ckpt_diff("/fsx/clap_logs/2022_09_13-01_25_15-model_PANN-14-lr_0.0001-b_160-j_4-p_fp32/checkpoints/epoch_10.pt", "/fsx/clap_logs/2022_09_13-01_25_15-model_PANN-14-lr_0.0001-b_160-j_4-p_fp32/checkpoints/epoch_100.pt", "text_branch.encoder")
|
553 |
+
|
554 |
+
# key module.text_branch.encoder.layer.0.attention.self.query.weight is different
|
555 |
+
# key module.text_branch.encoder.layer.0.attention.self.query.bias is different
|
556 |
+
# key module.text_branch.encoder.layer.0.attention.self.key.weight is different
|
557 |
+
# key module.text_branch.encoder.layer.0.attention.self.key.bias is different
|
558 |
+
# key module.text_branch.encoder.layer.0.attention.self.value.weight is different
|
559 |
+
# key module.text_branch.encoder.layer.0.attention.self.value.bias is different
|
560 |
+
# key module.text_branch.encoder.layer.0.attention.output.dense.weight is different
|
561 |
+
# key module.text_branch.encoder.layer.0.attention.output.dense.bias is different
|
562 |
+
# key module.text_branch.encoder.layer.0.attention.output.LayerNorm.weight is different
|
563 |
+
# key module.text_branch.encoder.layer.0.attention.output.LayerNorm.bias is different
|
564 |
+
# key module.text_branch.encoder.layer.0.intermediate.dense.weight is different
|
565 |
+
# key module.text_branch.encoder.layer.0.intermediate.dense.bias is different
|
566 |
+
# key module.text_branch.encoder.layer.0.output.dense.weight is different
|
567 |
+
# key module.text_branch.encoder.layer.0.output.dense.bias is different
|
568 |
+
# key module.text_branch.encoder.layer.0.output.LayerNorm.weight is different
|
569 |
+
# key module.text_branch.encoder.layer.0.output.LayerNorm.bias is different
|
570 |
+
# key module.text_branch.encoder.layer.1.attention.self.query.weight is different
|
571 |
+
# key module.text_branch.encoder.layer.1.attention.self.query.bias is different
|
572 |
+
# key module.text_branch.encoder.layer.1.attention.self.key.weight is different
|
573 |
+
# key module.text_branch.encoder.layer.1.attention.self.key.bias is different
|
574 |
+
# key module.text_branch.encoder.layer.1.attention.self.value.weight is different
|
575 |
+
# key module.text_branch.encoder.layer.1.attention.self.value.bias is different
|
576 |
+
# key module.text_branch.encoder.layer.1.attention.output.dense.weight is different
|
577 |
+
# key module.text_branch.encoder.layer.1.attention.output.dense.bias is different
|
578 |
+
# key module.text_branch.encoder.layer.1.attention.output.LayerNorm.weight is different
|
579 |
+
# key module.text_branch.encoder.layer.1.attention.output.LayerNorm.bias is different
|
580 |
+
# key module.text_branch.encoder.layer.1.intermediate.dense.weight is different
|
581 |
+
# key module.text_branch.encoder.layer.1.intermediate.dense.bias is different
|
582 |
+
# key module.text_branch.encoder.layer.1.output.dense.weight is different
|
583 |
+
# key module.text_branch.encoder.layer.1.output.dense.bias is different
|
584 |
+
# key module.text_branch.encoder.layer.1.output.LayerNorm.weight is different
|
585 |
+
# key module.text_branch.encoder.layer.1.output.LayerNorm.bias is different
|
586 |
+
# key module.text_branch.encoder.layer.2.attention.self.query.weight is different
|
587 |
+
# key module.text_branch.encoder.layer.2.attention.self.query.bias is different
|
588 |
+
# key module.text_branch.encoder.layer.2.attention.self.key.weight is different
|
589 |
+
# key module.text_branch.encoder.layer.2.attention.self.key.bias is different
|
590 |
+
# key module.text_branch.encoder.layer.2.attention.self.value.weight is different
|
591 |
+
# key module.text_branch.encoder.layer.2.attention.self.value.bias is different
|
592 |
+
# key module.text_branch.encoder.layer.2.attention.output.dense.weight is different
|
593 |
+
# key module.text_branch.encoder.layer.2.attention.output.dense.bias is different
|
594 |
+
# key module.text_branch.encoder.layer.2.attention.output.LayerNorm.weight is different
|
595 |
+
# key module.text_branch.encoder.layer.2.attention.output.LayerNorm.bias is different
|
596 |
+
# key module.text_branch.encoder.layer.2.intermediate.dense.weight is different
|
597 |
+
# key module.text_branch.encoder.layer.2.intermediate.dense.bias is different
|
598 |
+
# key module.text_branch.encoder.layer.2.output.dense.weight is different
|
599 |
+
# key module.text_branch.encoder.layer.2.output.dense.bias is different
|
600 |
+
# key module.text_branch.encoder.layer.2.output.LayerNorm.weight is different
|
601 |
+
# key module.text_branch.encoder.layer.2.output.LayerNorm.bias is different
|
602 |
+
# key module.text_branch.encoder.layer.3.attention.self.query.weight is different
|
603 |
+
# key module.text_branch.encoder.layer.3.attention.self.query.bias is different
|
604 |
+
# key module.text_branch.encoder.layer.3.attention.self.key.weight is different
|
605 |
+
# key module.text_branch.encoder.layer.3.attention.self.key.bias is different
|
606 |
+
# key module.text_branch.encoder.layer.3.attention.self.value.weight is different
|
607 |
+
# key module.text_branch.encoder.layer.3.attention.self.value.bias is different
|
608 |
+
# key module.text_branch.encoder.layer.3.attention.output.dense.weight is different
|
609 |
+
# key module.text_branch.encoder.layer.3.attention.output.dense.bias is different
|
610 |
+
# key module.text_branch.encoder.layer.3.attention.output.LayerNorm.weight is different
|
611 |
+
# key module.text_branch.encoder.layer.3.attention.output.LayerNorm.bias is different
|
612 |
+
# key module.text_branch.encoder.layer.3.intermediate.dense.weight is different
|
613 |
+
# key module.text_branch.encoder.layer.3.intermediate.dense.bias is different
|
614 |
+
# key module.text_branch.encoder.layer.3.output.dense.weight is different
|
615 |
+
# key module.text_branch.encoder.layer.3.output.dense.bias is different
|
616 |
+
# key module.text_branch.encoder.layer.3.output.LayerNorm.weight is different
|
617 |
+
# key module.text_branch.encoder.layer.3.output.LayerNorm.bias is different
|
618 |
+
# key module.text_branch.encoder.layer.4.attention.self.query.weight is different
|
619 |
+
# key module.text_branch.encoder.layer.4.attention.self.query.bias is different
|
620 |
+
# key module.text_branch.encoder.layer.4.attention.self.key.weight is different
|
621 |
+
# key module.text_branch.encoder.layer.4.attention.self.key.bias is different
|
622 |
+
# key module.text_branch.encoder.layer.4.attention.self.value.weight is different
|
623 |
+
# key module.text_branch.encoder.layer.4.attention.self.value.bias is different
|
624 |
+
# key module.text_branch.encoder.layer.4.attention.output.dense.weight is different
|
625 |
+
# key module.text_branch.encoder.layer.4.attention.output.dense.bias is different
|
626 |
+
# key module.text_branch.encoder.layer.4.attention.output.LayerNorm.weight is different
|
627 |
+
# key module.text_branch.encoder.layer.4.attention.output.LayerNorm.bias is different
|
628 |
+
# key module.text_branch.encoder.layer.4.intermediate.dense.weight is different
|
629 |
+
# key module.text_branch.encoder.layer.4.intermediate.dense.bias is different
|
630 |
+
# key module.text_branch.encoder.layer.4.output.dense.weight is different
|
631 |
+
# key module.text_branch.encoder.layer.4.output.dense.bias is different
|
632 |
+
# key module.text_branch.encoder.layer.4.output.LayerNorm.weight is different
|
633 |
+
# key module.text_branch.encoder.layer.4.output.LayerNorm.bias is different
|
634 |
+
# key module.text_branch.encoder.layer.5.attention.self.query.weight is different
|
635 |
+
# key module.text_branch.encoder.layer.5.attention.self.query.bias is different
|
636 |
+
# key module.text_branch.encoder.layer.5.attention.self.key.weight is different
|
637 |
+
# key module.text_branch.encoder.layer.5.attention.self.key.bias is different
|
638 |
+
# key module.text_branch.encoder.layer.5.attention.self.value.weight is different
|
639 |
+
# key module.text_branch.encoder.layer.5.attention.self.value.bias is different
|
640 |
+
# key module.text_branch.encoder.layer.5.attention.output.dense.weight is different
|
641 |
+
# key module.text_branch.encoder.layer.5.attention.output.dense.bias is different
|
642 |
+
# key module.text_branch.encoder.layer.5.attention.output.LayerNorm.weight is different
|
643 |
+
# key module.text_branch.encoder.layer.5.attention.output.LayerNorm.bias is different
|
644 |
+
# key module.text_branch.encoder.layer.5.intermediate.dense.weight is different
|
645 |
+
# key module.text_branch.encoder.layer.5.intermediate.dense.bias is different
|
646 |
+
# key module.text_branch.encoder.layer.5.output.dense.weight is different
|
647 |
+
# key module.text_branch.encoder.layer.5.output.dense.bias is different
|
648 |
+
# key module.text_branch.encoder.layer.5.output.LayerNorm.weight is different
|
649 |
+
# key module.text_branch.encoder.layer.5.output.LayerNorm.bias is different
|
650 |
+
# key module.text_branch.encoder.layer.6.attention.self.query.weight is different
|
651 |
+
# key module.text_branch.encoder.layer.6.attention.self.query.bias is different
|
652 |
+
# key module.text_branch.encoder.layer.6.attention.self.key.weight is different
|
653 |
+
# key module.text_branch.encoder.layer.6.attention.self.key.bias is different
|
654 |
+
# key module.text_branch.encoder.layer.6.attention.self.value.weight is different
|
655 |
+
# key module.text_branch.encoder.layer.6.attention.self.value.bias is different
|
656 |
+
# key module.text_branch.encoder.layer.6.attention.output.dense.weight is different
|
657 |
+
# key module.text_branch.encoder.layer.6.attention.output.dense.bias is different
|
658 |
+
# key module.text_branch.encoder.layer.6.attention.output.LayerNorm.weight is different
|
659 |
+
# key module.text_branch.encoder.layer.6.attention.output.LayerNorm.bias is different
|
660 |
+
# key module.text_branch.encoder.layer.6.intermediate.dense.weight is different
|
661 |
+
# key module.text_branch.encoder.layer.6.intermediate.dense.bias is different
|
662 |
+
# key module.text_branch.encoder.layer.6.output.dense.weight is different
|
663 |
+
# key module.text_branch.encoder.layer.6.output.dense.bias is different
|
664 |
+
# key module.text_branch.encoder.layer.6.output.LayerNorm.weight is different
|
665 |
+
# key module.text_branch.encoder.layer.6.output.LayerNorm.bias is different
|
666 |
+
# key module.text_branch.encoder.layer.7.attention.self.query.weight is different
|
667 |
+
# key module.text_branch.encoder.layer.7.attention.self.query.bias is different
|
668 |
+
# key module.text_branch.encoder.layer.7.attention.self.key.weight is different
|
669 |
+
# key module.text_branch.encoder.layer.7.attention.self.key.bias is different
|
670 |
+
# key module.text_branch.encoder.layer.7.attention.self.value.weight is different
|
671 |
+
# key module.text_branch.encoder.layer.7.attention.self.value.bias is different
|
672 |
+
# key module.text_branch.encoder.layer.7.attention.output.dense.weight is different
|
673 |
+
# key module.text_branch.encoder.layer.7.attention.output.dense.bias is different
|
674 |
+
# key module.text_branch.encoder.layer.7.attention.output.LayerNorm.weight is different
|
675 |
+
# key module.text_branch.encoder.layer.7.attention.output.LayerNorm.bias is different
|
676 |
+
# key module.text_branch.encoder.layer.7.intermediate.dense.weight is different
|
677 |
+
# key module.text_branch.encoder.layer.7.intermediate.dense.bias is different
|
678 |
+
# key module.text_branch.encoder.layer.7.output.dense.weight is different
|
679 |
+
# key module.text_branch.encoder.layer.7.output.dense.bias is different
|
680 |
+
# key module.text_branch.encoder.layer.7.output.LayerNorm.weight is different
|
681 |
+
# key module.text_branch.encoder.layer.7.output.LayerNorm.bias is different
|
682 |
+
# key module.text_branch.encoder.layer.8.attention.self.query.weight is different
|
683 |
+
# key module.text_branch.encoder.layer.8.attention.self.query.bias is different
|
684 |
+
# key module.text_branch.encoder.layer.8.attention.self.key.weight is different
|
685 |
+
# key module.text_branch.encoder.layer.8.attention.self.key.bias is different
|
686 |
+
# key module.text_branch.encoder.layer.8.attention.self.value.weight is different
|
687 |
+
# key module.text_branch.encoder.layer.8.attention.self.value.bias is different
|
688 |
+
# key module.text_branch.encoder.layer.8.attention.output.dense.weight is different
|
689 |
+
# key module.text_branch.encoder.layer.8.attention.output.dense.bias is different
|
690 |
+
# key module.text_branch.encoder.layer.8.attention.output.LayerNorm.weight is different
|
691 |
+
# key module.text_branch.encoder.layer.8.attention.output.LayerNorm.bias is different
|
692 |
+
# key module.text_branch.encoder.layer.8.intermediate.dense.weight is different
|
693 |
+
# key module.text_branch.encoder.layer.8.intermediate.dense.bias is different
|
694 |
+
# key module.text_branch.encoder.layer.8.output.dense.weight is different
|
695 |
+
# key module.text_branch.encoder.layer.8.output.dense.bias is different
|
696 |
+
# key module.text_branch.encoder.layer.8.output.LayerNorm.weight is different
|
697 |
+
# key module.text_branch.encoder.layer.8.output.LayerNorm.bias is different
|
698 |
+
# key module.text_branch.encoder.layer.9.attention.self.query.weight is different
|
699 |
+
# key module.text_branch.encoder.layer.9.attention.self.query.bias is different
|
700 |
+
# key module.text_branch.encoder.layer.9.attention.self.key.weight is different
|
701 |
+
# key module.text_branch.encoder.layer.9.attention.self.key.bias is different
|
702 |
+
# key module.text_branch.encoder.layer.9.attention.self.value.weight is different
|
703 |
+
# key module.text_branch.encoder.layer.9.attention.self.value.bias is different
|
704 |
+
# key module.text_branch.encoder.layer.9.attention.output.dense.weight is different
|
705 |
+
# key module.text_branch.encoder.layer.9.attention.output.dense.bias is different
|
706 |
+
# key module.text_branch.encoder.layer.9.attention.output.LayerNorm.weight is different
|
707 |
+
# key module.text_branch.encoder.layer.9.attention.output.LayerNorm.bias is different
|
708 |
+
# key module.text_branch.encoder.layer.9.intermediate.dense.weight is different
|
709 |
+
# key module.text_branch.encoder.layer.9.intermediate.dense.bias is different
|
710 |
+
# key module.text_branch.encoder.layer.9.output.dense.weight is different
|
711 |
+
# key module.text_branch.encoder.layer.9.output.dense.bias is different
|
712 |
+
# key module.text_branch.encoder.layer.9.output.LayerNorm.weight is different
|
713 |
+
# key module.text_branch.encoder.layer.9.output.LayerNorm.bias is different
|
714 |
+
# key module.text_branch.encoder.layer.10.attention.self.query.weight is different
|
715 |
+
# key module.text_branch.encoder.layer.10.attention.self.query.bias is different
|
716 |
+
# key module.text_branch.encoder.layer.10.attention.self.key.weight is different
|
717 |
+
# key module.text_branch.encoder.layer.10.attention.self.key.bias is different
|
718 |
+
# key module.text_branch.encoder.layer.10.attention.self.value.weight is different
|
719 |
+
# key module.text_branch.encoder.layer.10.attention.self.value.bias is different
|
720 |
+
# key module.text_branch.encoder.layer.10.attention.output.dense.weight is different
|
721 |
+
# key module.text_branch.encoder.layer.10.attention.output.dense.bias is different
|
722 |
+
# key module.text_branch.encoder.layer.10.attention.output.LayerNorm.weight is different
|
723 |
+
# key module.text_branch.encoder.layer.10.attention.output.LayerNorm.bias is different
|
724 |
+
# key module.text_branch.encoder.layer.10.intermediate.dense.weight is different
|
725 |
+
# key module.text_branch.encoder.layer.10.intermediate.dense.bias is different
|
726 |
+
# key module.text_branch.encoder.layer.10.output.dense.weight is different
|
727 |
+
# key module.text_branch.encoder.layer.10.output.dense.bias is different
|
728 |
+
# key module.text_branch.encoder.layer.10.output.LayerNorm.weight is different
|
729 |
+
# key module.text_branch.encoder.layer.10.output.LayerNorm.bias is different
|
730 |
+
# key module.text_branch.encoder.layer.11.attention.self.query.weight is different
|
731 |
+
# key module.text_branch.encoder.layer.11.attention.self.query.bias is different
|
732 |
+
# key module.text_branch.encoder.layer.11.attention.self.key.weight is different
|
733 |
+
# key module.text_branch.encoder.layer.11.attention.self.key.bias is different
|
734 |
+
# key module.text_branch.encoder.layer.11.attention.self.value.weight is different
|
735 |
+
# key module.text_branch.encoder.layer.11.attention.self.value.bias is different
|
736 |
+
# key module.text_branch.encoder.layer.11.attention.output.dense.weight is different
|
737 |
+
# key module.text_branch.encoder.layer.11.attention.output.dense.bias is different
|
738 |
+
# key module.text_branch.encoder.layer.11.attention.output.LayerNorm.weight is different
|
739 |
+
# key module.text_branch.encoder.layer.11.attention.output.LayerNorm.bias is different
|
740 |
+
# key module.text_branch.encoder.layer.11.intermediate.dense.weight is different
|
741 |
+
# key module.text_branch.encoder.layer.11.intermediate.dense.bias is different
|
742 |
+
# key module.text_branch.encoder.layer.11.output.dense.weight is different
|
743 |
+
# key module.text_branch.encoder.layer.11.output.dense.bias is different
|
744 |
+
# key module.text_branch.encoder.layer.11.output.LayerNorm.weight is different
|
745 |
+
# key module.text_branch.encoder.layer.11.output.LayerNorm.bias is different
|
746 |
+
# a_sum: tensor(15078.6641)
|
747 |
+
# b_sum: tensor(15540.0723)
|
748 |
+
# diff: tensor(-461.4082)
|
749 |
+
# True
|
750 |
+
|
751 |
+
# linear_prob_text
|
752 |
+
# check_ckpt_diff("/fsx/clap_logs/2022_09_15-02_05_29-linear_probemodel_PANN-14-lr_0.0001-b_512-j_4-p_fp32/checkpoints/pretrain_epoch_10_lp_epoch_50.pt", "/fsx/clap_logs/2022_09_15-02_05_29-linear_probemodel_PANN-14-lr_0.0001-b_512-j_4-p_fp32/checkpoints/pretrain_epoch_10_lp_epoch_100.pt", "text_branch.resblocks")
|
753 |
+
|
754 |
+
# a_sum: tensor(12111.0244)
|
755 |
+
# b_sum: tensor(12111.0244)
|
756 |
+
# diff: tensor(0.)
|
757 |
+
|
758 |
+
# linear_prob_audio
|
759 |
+
# check_ckpt_diff("/fsx/clap_logs/2022_09_15-02_05_29-linear_probemodel_PANN-14-lr_0.0001-b_512-j_4-p_fp32/checkpoints/pretrain_epoch_10_lp_epoch_50.pt", "/fsx/clap_logs/2022_09_15-02_05_29-linear_probemodel_PANN-14-lr_0.0001-b_512-j_4-p_fp32/checkpoints/pretrain_epoch_10_lp_epoch_100.pt", "clap_model")
|
760 |
+
|
761 |
+
# key clap_model.audio_branch.bn0.num_batches_tracked is different
|
762 |
+
# key clap_model.audio_branch.conv_block1.bn1.running_mean is different
|
763 |
+
# key clap_model.audio_branch.conv_block1.bn1.running_var is different
|
764 |
+
# key clap_model.audio_branch.conv_block1.bn1.num_batches_tracked is different
|
765 |
+
# key clap_model.audio_branch.conv_block1.bn2.running_mean is different
|
766 |
+
# key clap_model.audio_branch.conv_block1.bn2.running_var is different
|
767 |
+
# key clap_model.audio_branch.conv_block1.bn2.num_batches_tracked is different
|
768 |
+
# key clap_model.audio_branch.conv_block2.bn1.running_mean is different
|
769 |
+
# key clap_model.audio_branch.conv_block2.bn1.running_var is different
|
770 |
+
# key clap_model.audio_branch.conv_block2.bn1.num_batches_tracked is different
|
771 |
+
# key clap_model.audio_branch.conv_block2.bn2.running_mean is different
|
772 |
+
# key clap_model.audio_branch.conv_block2.bn2.running_var is different
|
773 |
+
# key clap_model.audio_branch.conv_block2.bn2.num_batches_tracked is different
|
774 |
+
# key clap_model.audio_branch.conv_block3.bn1.running_mean is different
|
775 |
+
# key clap_model.audio_branch.conv_block3.bn1.running_var is different
|
776 |
+
# key clap_model.audio_branch.conv_block3.bn1.num_batches_tracked is different
|
777 |
+
# key clap_model.audio_branch.conv_block3.bn2.running_mean is different
|
778 |
+
# key clap_model.audio_branch.conv_block3.bn2.running_var is different
|
779 |
+
# key clap_model.audio_branch.conv_block3.bn2.num_batches_tracked is different
|
780 |
+
# key clap_model.audio_branch.conv_block4.bn1.running_mean is different
|
781 |
+
# key clap_model.audio_branch.conv_block4.bn1.running_var is different
|
782 |
+
# key clap_model.audio_branch.conv_block4.bn1.num_batches_tracked is different
|
783 |
+
# key clap_model.audio_branch.conv_block4.bn2.running_mean is different
|
784 |
+
# key clap_model.audio_branch.conv_block4.bn2.running_var is different
|
785 |
+
# key clap_model.audio_branch.conv_block4.bn2.num_batches_tracked is different
|
786 |
+
# key clap_model.audio_branch.conv_block5.bn1.running_mean is different
|
787 |
+
# key clap_model.audio_branch.conv_block5.bn1.running_var is different
|
788 |
+
# key clap_model.audio_branch.conv_block5.bn1.num_batches_tracked is different
|
789 |
+
# key clap_model.audio_branch.conv_block5.bn2.running_mean is different
|
790 |
+
# key clap_model.audio_branch.conv_block5.bn2.running_var is different
|
791 |
+
# key clap_model.audio_branch.conv_block5.bn2.num_batches_tracked is different
|
792 |
+
# key clap_model.audio_branch.conv_block6.bn1.running_mean is different
|
793 |
+
# key clap_model.audio_branch.conv_block6.bn1.running_var is different
|
794 |
+
# key clap_model.audio_branch.conv_block6.bn1.num_batches_tracked is different
|
795 |
+
# key clap_model.audio_branch.conv_block6.bn2.running_mean is different
|
796 |
+
# key clap_model.audio_branch.conv_block6.bn2.running_var is different
|
797 |
+
# key clap_model.audio_branch.conv_block6.bn2.num_batches_tracked is different
|
798 |
+
# a_sum: tensor(120061.5078)
|
799 |
+
# b_sum: tensor(122656.0469)
|
800 |
+
# diff: tensor(-2594.5391)
|
801 |
+
# True
|
802 |
+
|
eval_retrieval_freesound.sh
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 1
|
6 |
+
#SBATCH --ntasks-per-node 1
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:1
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,290,374]
|
11 |
+
|
12 |
+
|
13 |
+
module load intelmpi
|
14 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
15 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
16 |
+
export NCCL_PROTO=simple
|
17 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
18 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
19 |
+
|
20 |
+
export FI_EFA_FORK_SAFE=1
|
21 |
+
export FI_LOG_LEVEL=1
|
22 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
23 |
+
|
24 |
+
#export NCCL_ALGO=ring
|
25 |
+
export NCCL_DEBUG=info
|
26 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
27 |
+
|
28 |
+
export PYTHONFAULTHANDLER=1
|
29 |
+
|
30 |
+
export CUDA_LAUNCH_BLOCKING=0
|
31 |
+
export OMPI_MCA_mtl_base_verbose=1
|
32 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
33 |
+
export FI_PROVIDER=efa
|
34 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
35 |
+
export NCCL_TREE_THRESHOLD=0
|
36 |
+
|
37 |
+
|
38 |
+
#export NCCL_P2P_DISABLE=1
|
39 |
+
#export NCCL_IBEXT_DISABLE=1
|
40 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
41 |
+
|
42 |
+
# sent to sub script
|
43 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
44 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
45 |
+
export MASTER_PORT=12802
|
46 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
47 |
+
|
48 |
+
echo go $COUNT_NODE
|
49 |
+
echo $HOSTNAMES
|
50 |
+
|
51 |
+
source /fsx/yusong/clap/bin/activate
|
52 |
+
cd /fsx/yusong/CLAP/src
|
53 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
54 |
+
|
55 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m evaluate.eval_retrieval_main \
|
56 |
+
--save-frequency 5 \
|
57 |
+
--save-top-performance 3 \
|
58 |
+
--save-most-recent \
|
59 |
+
--dataset-type="webdataset" \
|
60 |
+
--precision="fp32" \
|
61 |
+
--warmup 0 \
|
62 |
+
--batch-size=512 \
|
63 |
+
--wd=0.0 \
|
64 |
+
--epochs=50 \
|
65 |
+
--workers=6 \
|
66 |
+
--use-bn-sync \
|
67 |
+
--freeze-text \
|
68 |
+
--amodel HTSAT-tiny \
|
69 |
+
--tmodel roberta \
|
70 |
+
--report-to "wandb" \
|
71 |
+
--wandb-notes "10.17-freesound-dataset-4#" \
|
72 |
+
--datasetnames "freesound_no_overlap_noesc50" \
|
73 |
+
--datasetinfos "train" \
|
74 |
+
--seed 3407 \
|
75 |
+
--remotedata \
|
76 |
+
--logs /fsx/clap_logs \
|
77 |
+
--gather-with-grad \
|
78 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
79 |
+
--data-filling "repeatpad" \
|
80 |
+
--data-truncating "rand_trunc" \
|
81 |
+
--pretrained="/fsx/clap_logs/2022_10_17-02_08_21-model_HTSAT-tiny-lr_0.0001-b_96-j_6-p_fp32/checkpoints"
|
finetune-esc50.sh
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 1
|
6 |
+
#SBATCH --ntasks-per-node 1
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:1
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,374]
|
11 |
+
|
12 |
+
|
13 |
+
module load intelmpi
|
14 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
15 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
16 |
+
export NCCL_PROTO=simple
|
17 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
18 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
19 |
+
|
20 |
+
export FI_EFA_FORK_SAFE=1
|
21 |
+
export FI_LOG_LEVEL=1
|
22 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
23 |
+
|
24 |
+
#export NCCL_ALGO=ring
|
25 |
+
export NCCL_DEBUG=info
|
26 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
27 |
+
|
28 |
+
export PYTHONFAULTHANDLER=1
|
29 |
+
|
30 |
+
export CUDA_LAUNCH_BLOCKING=0
|
31 |
+
export OMPI_MCA_mtl_base_verbose=1
|
32 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
33 |
+
export FI_PROVIDER=efa
|
34 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
35 |
+
export NCCL_TREE_THRESHOLD=0
|
36 |
+
|
37 |
+
|
38 |
+
#export NCCL_P2P_DISABLE=1
|
39 |
+
#export NCCL_IBEXT_DISABLE=1
|
40 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
41 |
+
|
42 |
+
# sent to sub script
|
43 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
44 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
45 |
+
export MASTER_PORT=12802
|
46 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
47 |
+
|
48 |
+
echo go $COUNT_NODE
|
49 |
+
echo $HOSTNAMES
|
50 |
+
|
51 |
+
source /fsx/yusong/clap/bin/activate
|
52 |
+
cd /fsx/yusong/CLAP/src
|
53 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
54 |
+
|
55 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m evaluate.eval_linear_probe \
|
56 |
+
--save-frequency 50 \
|
57 |
+
--save-top-performance 3 \
|
58 |
+
--save-most-recent \
|
59 |
+
--dataset-type="webdataset" \
|
60 |
+
--precision="fp32" \
|
61 |
+
--warmup 0 \
|
62 |
+
--batch-size=160 \
|
63 |
+
--lr=1e-4 \
|
64 |
+
--wd=0.1 \
|
65 |
+
--epochs=100 \
|
66 |
+
--workers=4 \
|
67 |
+
--use-bn-sync \
|
68 |
+
--freeze-text \
|
69 |
+
--amodel PANN-14 \
|
70 |
+
--tmodel roberta \
|
71 |
+
--report-to "wandb" \
|
72 |
+
--wandb-notes "10.14-finetune-esc50" \
|
73 |
+
--datasetnames "esc50" \
|
74 |
+
--datasetinfos "train" \
|
75 |
+
--seed 3407 \
|
76 |
+
--remotedata \
|
77 |
+
--logs /fsx/clap_logs \
|
78 |
+
--gather-with-grad \
|
79 |
+
--lp-loss="ce" \
|
80 |
+
--lp-metrics="acc" \
|
81 |
+
--lp-lr=1e-4 \
|
82 |
+
--lp-mlp \
|
83 |
+
--class-label-path="../class_labels/ESC50_class_labels_indices_space.json" \
|
84 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
85 |
+
--pretrained="/fsx/clap_logs/2022_10_14-04_05_14-model_PANN-14-lr_0.0001-b_160-j_6-p_fp32/checkpoints" \
|
86 |
+
--data-filling "repeatpad" \
|
87 |
+
--data-truncating "rand_trunc" \
|
88 |
+
--optimizer "adam"
|
finetune-fsd50k.sh
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 1
|
6 |
+
#SBATCH --ntasks-per-node 1
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:1
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,374]
|
11 |
+
|
12 |
+
|
13 |
+
module load intelmpi
|
14 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
15 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
16 |
+
export NCCL_PROTO=simple
|
17 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
18 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
19 |
+
|
20 |
+
export FI_EFA_FORK_SAFE=1
|
21 |
+
export FI_LOG_LEVEL=1
|
22 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
23 |
+
|
24 |
+
#export NCCL_ALGO=ring
|
25 |
+
export NCCL_DEBUG=info
|
26 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
27 |
+
|
28 |
+
export PYTHONFAULTHANDLER=1
|
29 |
+
|
30 |
+
export CUDA_LAUNCH_BLOCKING=0
|
31 |
+
export OMPI_MCA_mtl_base_verbose=1
|
32 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
33 |
+
export FI_PROVIDER=efa
|
34 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
35 |
+
export NCCL_TREE_THRESHOLD=0
|
36 |
+
|
37 |
+
|
38 |
+
#export NCCL_P2P_DISABLE=1
|
39 |
+
#export NCCL_IBEXT_DISABLE=1
|
40 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
41 |
+
|
42 |
+
# sent to sub script
|
43 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
44 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
45 |
+
export MASTER_PORT=12802
|
46 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
47 |
+
|
48 |
+
echo go $COUNT_NODE
|
49 |
+
echo $HOSTNAMES
|
50 |
+
|
51 |
+
source /fsx/yusong/clap/bin/activate
|
52 |
+
cd /fsx/yusong/CLAP/src
|
53 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
54 |
+
|
55 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m evaluate.eval_linear_probe \
|
56 |
+
--save-frequency 50 \
|
57 |
+
--save-top-performance 3 \
|
58 |
+
--save-most-recent \
|
59 |
+
--dataset-type="webdataset" \
|
60 |
+
--precision="fp32" \
|
61 |
+
--warmup 0 \
|
62 |
+
--batch-size=160 \
|
63 |
+
--lr=1e-4 \
|
64 |
+
--wd=0.1 \
|
65 |
+
--epochs=100 \
|
66 |
+
--workers=4 \
|
67 |
+
--use-bn-sync \
|
68 |
+
--freeze-text \
|
69 |
+
--amodel PANN-14 \
|
70 |
+
--tmodel roberta \
|
71 |
+
--report-to "wandb" \
|
72 |
+
--wandb-notes "10.14-finetune-fsd50k" \
|
73 |
+
--datasetnames "fsd50k_class_label" \
|
74 |
+
--datasetinfos "train" \
|
75 |
+
--seed 3407 \
|
76 |
+
--remotedata \
|
77 |
+
--logs /fsx/clap_logs \
|
78 |
+
--gather-with-grad \
|
79 |
+
--lp-loss="bce" \
|
80 |
+
--lp-metrics="map" \
|
81 |
+
--lp-lr=1e-4 \
|
82 |
+
--lp-mlp \
|
83 |
+
--class-label-path="../class_labels/FSD50k_class_labels_indices.json" \
|
84 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
85 |
+
--pretrained="/fsx/clap_logs/2022_10_14-04_05_14-model_PANN-14-lr_0.0001-b_160-j_6-p_fp32/checkpoints" \
|
86 |
+
--data-filling "repeatpad" \
|
87 |
+
--data-truncating "rand_trunc" \
|
88 |
+
--optimizer "adam"
|
htsat-roberta-large-dataset-fusion.sh
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 3
|
6 |
+
#SBATCH --ntasks-per-node 8
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:8
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,374]
|
11 |
+
|
12 |
+
module load intelmpi
|
13 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
14 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
15 |
+
export NCCL_PROTO=simple
|
16 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
17 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
18 |
+
|
19 |
+
export FI_EFA_FORK_SAFE=1
|
20 |
+
export FI_LOG_LEVEL=1
|
21 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
22 |
+
|
23 |
+
#export NCCL_ALGO=ring
|
24 |
+
export NCCL_DEBUG=info
|
25 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
26 |
+
|
27 |
+
export PYTHONFAULTHANDLER=1
|
28 |
+
|
29 |
+
export CUDA_LAUNCH_BLOCKING=0
|
30 |
+
export OMPI_MCA_mtl_base_verbose=1
|
31 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
32 |
+
export FI_PROVIDER=efa
|
33 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
34 |
+
export NCCL_TREE_THRESHOLD=0
|
35 |
+
|
36 |
+
|
37 |
+
#export NCCL_P2P_DISABLE=1
|
38 |
+
#export NCCL_IBEXT_DISABLE=1
|
39 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
40 |
+
|
41 |
+
# sent to sub script
|
42 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
43 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
44 |
+
export MASTER_PORT=12802
|
45 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
46 |
+
|
47 |
+
echo go $COUNT_NODE
|
48 |
+
echo $HOSTNAMES
|
49 |
+
|
50 |
+
source /fsx/yusong/clap/bin/activate
|
51 |
+
cd /fsx/yusong/CLAP/src
|
52 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
53 |
+
|
54 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m training.main \
|
55 |
+
--save-frequency 5 \
|
56 |
+
--save-top-performance 3 \
|
57 |
+
--save-most-recent \
|
58 |
+
--dataset-type="webdataset" \
|
59 |
+
--precision="fp32" \
|
60 |
+
--batch-size=96 \
|
61 |
+
--lr=1e-4 \
|
62 |
+
--wd=0.0 \
|
63 |
+
--epochs=45 \
|
64 |
+
--workers=6 \
|
65 |
+
--use-bn-sync \
|
66 |
+
--amodel HTSAT-tiny \
|
67 |
+
--tmodel roberta \
|
68 |
+
--warmup 3200 \
|
69 |
+
--report-to "wandb" \
|
70 |
+
--wandb-notes "10.16-clap-dataset-2#-htsat-roberta-fusion" \
|
71 |
+
--datasetnames "Clotho" "audiocaps" "BBCSoundEffects" "free_to_use_sounds" "paramount_motion" "sonniss_game_effects" "wesoundeffects" "freesound_no_overlap_noesc50" "audiostock" "epidemic_sound_effects" "fsd50k_class_label" "MACS" "WavText5K" \
|
72 |
+
--full-train-dataset "BBCSoundEffects" "free_to_use_sounds" "paramount_motion" "sonniss_game_effects" "wesoundeffects" "audiostock" "epidemic_sound_effects" "fsd50k_class_label" \
|
73 |
+
--exclude-eval-dataset "freesound_no_overlap_noesc50" "MACS" "WavText5K" "fsd50k_class_label" \
|
74 |
+
--datasetinfos "train" "unbalanced_train" \
|
75 |
+
--top-k-checkpoint-select-dataset="Clotho-test" \
|
76 |
+
--top-k-checkpoint-select-metric="mAP@10" \
|
77 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
78 |
+
--logs /fsx/clap_logs \
|
79 |
+
--seed 3407 \
|
80 |
+
--remotedata \
|
81 |
+
--gather-with-grad \
|
82 |
+
--optimizer "adam" \
|
83 |
+
--data-filling "repeatpad" \
|
84 |
+
--data-truncating "fusion" \
|
85 |
+
--enable-fusion \
|
86 |
+
--fusion-type "aff_2d" \
|
87 |
+
--pretrained-audio /fsx/yusong/audio_pretrained_model/HTSAT-fullset-imagenet-map=0.467.ckpt
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
soundfile
|
2 |
+
librosa
|
3 |
+
torchlibrosa
|
4 |
+
ftfy
|
5 |
+
braceexpand
|
6 |
+
webdataset
|
7 |
+
wget
|
8 |
+
wandb
|
9 |
+
llvmlite
|
10 |
+
scipy
|
11 |
+
scikit-learn
|
12 |
+
pandas
|
13 |
+
h5py
|
14 |
+
tqdm
|
15 |
+
regex
|
16 |
+
transformers
|
test_tars.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import webdataset as wds
|
2 |
+
import soundfile as sf
|
3 |
+
import io
|
4 |
+
import os
|
5 |
+
import random
|
6 |
+
import copy
|
7 |
+
from tqdm import tqdm
|
8 |
+
import shutil
|
9 |
+
import argparse
|
10 |
+
import traceback
|
11 |
+
import logging
|
12 |
+
import json
|
13 |
+
from open_clip import tokenize
|
14 |
+
|
15 |
+
|
16 |
+
def parse_args():
|
17 |
+
parser = argparse.ArgumentParser()
|
18 |
+
parser.add_argument(
|
19 |
+
"--tar-path",
|
20 |
+
type=str,
|
21 |
+
default=None,
|
22 |
+
help="Path to the tars",
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--start",
|
26 |
+
type=int,
|
27 |
+
default=0,
|
28 |
+
help="start from tar-path + start",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--end",
|
32 |
+
type=int,
|
33 |
+
default=99999,
|
34 |
+
help="end with tar-path + end",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--exclude",
|
38 |
+
nargs='+',
|
39 |
+
default=None,
|
40 |
+
help="exclude tar-path + exclude",
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--batch-size",
|
44 |
+
type=int,
|
45 |
+
default=1,
|
46 |
+
)
|
47 |
+
parser.add_argument(
|
48 |
+
"--order",
|
49 |
+
default=False,
|
50 |
+
action='store_true',
|
51 |
+
help="if keep the search order accendingly",
|
52 |
+
)
|
53 |
+
args = parser.parse_args()
|
54 |
+
return args
|
55 |
+
|
56 |
+
def log_and_continue(exn):
|
57 |
+
"""Call in an exception handler to ignore any exception, isssue a warning, and continue."""
|
58 |
+
logging.warning(f"Handling webdataset error ({repr(exn)}). Ignoring.")
|
59 |
+
return True
|
60 |
+
|
61 |
+
def preprocess(
|
62 |
+
sample,
|
63 |
+
):
|
64 |
+
"""
|
65 |
+
Preprocess a single sample for wdsdataloader.
|
66 |
+
"""
|
67 |
+
audio_ext = "flac"
|
68 |
+
text_ext = "json"
|
69 |
+
audio_data, orig_sr = sf.read(io.BytesIO(sample[audio_ext]))
|
70 |
+
json_dict_raw = json.loads(sample[text_ext].decode("utf-8"))
|
71 |
+
sample["waveform"] = audio_data
|
72 |
+
texts = json_dict_raw["text"]
|
73 |
+
if isinstance(texts, list) and isinstance(texts[0], str) and len(texts) > 1:
|
74 |
+
texts = random.choice(texts)
|
75 |
+
sample["raw_text"] = texts
|
76 |
+
sample["text"] = tokenize(texts)
|
77 |
+
return sample
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
args = parse_args()
|
81 |
+
tar_path = args.tar_path
|
82 |
+
idx_list = list(range(args.start, args.end))
|
83 |
+
if args.exclude != None:
|
84 |
+
for x in args.exclude:
|
85 |
+
idx_list.remove(x)
|
86 |
+
if not args.order:
|
87 |
+
random.shuffle(idx_list)
|
88 |
+
if "aws" in tar_path:
|
89 |
+
args.local = False
|
90 |
+
if args.local:
|
91 |
+
input_shards = [os.path.join(args.tar_path, str(i)+".tar") for i in idx_list]
|
92 |
+
else:
|
93 |
+
input_shards = [os.path.join(args.tar_path, str(i)+".tar -") for i in idx_list]
|
94 |
+
pipeline = [wds.SimpleShardList(input_shards)]
|
95 |
+
pipeline.extend(
|
96 |
+
[
|
97 |
+
wds.split_by_node,
|
98 |
+
wds.split_by_worker,
|
99 |
+
wds.tarfile_to_samples(handler=log_and_continue),
|
100 |
+
wds.map(preprocess),
|
101 |
+
wds.to_tuple("__url__", "__key__", "waveform"),
|
102 |
+
wds.batched(1),
|
103 |
+
]
|
104 |
+
)
|
105 |
+
dataset = wds.DataPipeline(*pipeline)
|
106 |
+
dataloader = wds.WebLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0)
|
107 |
+
old_k = 0
|
108 |
+
old_batch = None
|
109 |
+
try:
|
110 |
+
for k, batch in tqdm(enumerate(dataloader)):
|
111 |
+
print("k:", k)
|
112 |
+
print("batch:", batch)
|
113 |
+
old_k = k
|
114 |
+
old_batch = copy.deepcopy(batch)
|
115 |
+
except:
|
116 |
+
with open("check_tar_log.txt","a") as file:
|
117 |
+
traceback.print_exc(file = file)
|
118 |
+
print("old_k:", old_k)
|
119 |
+
print("old_batch:", old_batch)
|
120 |
+
pass
|
train-htsat-roberta.sh
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 1
|
6 |
+
#SBATCH --ntasks-per-node 8
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:8
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,374]
|
11 |
+
|
12 |
+
module load intelmpi
|
13 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
14 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
15 |
+
export NCCL_PROTO=simple
|
16 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
17 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
18 |
+
|
19 |
+
export FI_EFA_FORK_SAFE=1
|
20 |
+
export FI_LOG_LEVEL=1
|
21 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
22 |
+
|
23 |
+
#export NCCL_ALGO=ring
|
24 |
+
export NCCL_DEBUG=info
|
25 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
26 |
+
|
27 |
+
export PYTHONFAULTHANDLER=1
|
28 |
+
|
29 |
+
export CUDA_LAUNCH_BLOCKING=0
|
30 |
+
export OMPI_MCA_mtl_base_verbose=1
|
31 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
32 |
+
export FI_PROVIDER=efa
|
33 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
34 |
+
export NCCL_TREE_THRESHOLD=0
|
35 |
+
|
36 |
+
|
37 |
+
#export NCCL_P2P_DISABLE=1
|
38 |
+
#export NCCL_IBEXT_DISABLE=1
|
39 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
40 |
+
|
41 |
+
# sent to sub script
|
42 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
43 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
44 |
+
export MASTER_PORT=12802
|
45 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
46 |
+
|
47 |
+
echo go $COUNT_NODE
|
48 |
+
echo $HOSTNAMES
|
49 |
+
|
50 |
+
source /fsx/yusong/clap/bin/activate
|
51 |
+
cd /fsx/yusong/CLAP/src
|
52 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
53 |
+
|
54 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m training.main \
|
55 |
+
--save-frequency 5 \
|
56 |
+
--save-top-performance 3 \
|
57 |
+
--save-most-recent \
|
58 |
+
--dataset-type="webdataset" \
|
59 |
+
--precision="fp32" \
|
60 |
+
--batch-size=96 \
|
61 |
+
--lr=1e-4 \
|
62 |
+
--wd=0.0 \
|
63 |
+
--epochs=45 \
|
64 |
+
--workers=6 \
|
65 |
+
--use-bn-sync \
|
66 |
+
--amodel HTSAT-tiny \
|
67 |
+
--tmodel roberta \
|
68 |
+
--warmup 3200 \
|
69 |
+
--report-to "wandb" \
|
70 |
+
--wandb-notes "10.16-clap-dataset-1#-htsat-roberta" \
|
71 |
+
--datasetnames "Clotho" "audiocaps" \
|
72 |
+
--datasetinfos "train" "unbalanced_train" \
|
73 |
+
--top-k-checkpoint-select-dataset="Clotho-test" \
|
74 |
+
--top-k-checkpoint-select-metric="mAP@10" \
|
75 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
76 |
+
--logs /fsx/clap_logs \
|
77 |
+
--seed 3407 \
|
78 |
+
--remotedata \
|
79 |
+
--gather-with-grad \
|
80 |
+
--optimizer "adam" \
|
81 |
+
--data-filling "repeatpad" \
|
82 |
+
--data-truncating "rand_trunc" \
|
83 |
+
--pretrained-audio /fsx/yusong/audio_pretrained_model/HTSAT-fullset-imagenet-map=0.467.ckpt
|
train-pann-roberta.sh
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 1
|
6 |
+
#SBATCH --ntasks-per-node 8
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:8
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,374]
|
11 |
+
|
12 |
+
module load intelmpi
|
13 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
14 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
15 |
+
export NCCL_PROTO=simple
|
16 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
17 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
18 |
+
|
19 |
+
export FI_EFA_FORK_SAFE=1
|
20 |
+
export FI_LOG_LEVEL=1
|
21 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
22 |
+
|
23 |
+
#export NCCL_ALGO=ring
|
24 |
+
export NCCL_DEBUG=info
|
25 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
26 |
+
|
27 |
+
export PYTHONFAULTHANDLER=1
|
28 |
+
|
29 |
+
export CUDA_LAUNCH_BLOCKING=0
|
30 |
+
export OMPI_MCA_mtl_base_verbose=1
|
31 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
32 |
+
export FI_PROVIDER=efa
|
33 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
34 |
+
export NCCL_TREE_THRESHOLD=0
|
35 |
+
|
36 |
+
|
37 |
+
#export NCCL_P2P_DISABLE=1
|
38 |
+
#export NCCL_IBEXT_DISABLE=1
|
39 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
40 |
+
|
41 |
+
# sent to sub script
|
42 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
43 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
44 |
+
export MASTER_PORT=12802
|
45 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
46 |
+
|
47 |
+
echo go $COUNT_NODE
|
48 |
+
echo $HOSTNAMES
|
49 |
+
|
50 |
+
source /fsx/yusong/clap/bin/activate
|
51 |
+
cd /fsx/yusong/CLAP/src
|
52 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
53 |
+
|
54 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m training.main \
|
55 |
+
--save-frequency 5 \
|
56 |
+
--save-top-performance 3 \
|
57 |
+
--save-most-recent \
|
58 |
+
--dataset-type="webdataset" \
|
59 |
+
--precision="fp32" \
|
60 |
+
--batch-size=96 \
|
61 |
+
--lr=1e-4 \
|
62 |
+
--wd=0.0 \
|
63 |
+
--epochs=45 \
|
64 |
+
--workers=6 \
|
65 |
+
--use-bn-sync \
|
66 |
+
--amodel PANN-14 \
|
67 |
+
--tmodel roberta \
|
68 |
+
--warmup 500 \
|
69 |
+
--report-to "wandb" \
|
70 |
+
--wandb-notes "10.16-clap-dataset-1#-pann-roberta" \
|
71 |
+
--datasetnames "Clotho" "audiocaps" \
|
72 |
+
--datasetinfos "train" "unbalanced_train" \
|
73 |
+
--top-k-checkpoint-select-dataset="Clotho-test" \
|
74 |
+
--top-k-checkpoint-select-metric="mAP@10" \
|
75 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
76 |
+
--logs /fsx/clap_logs \
|
77 |
+
--seed 3407 \
|
78 |
+
--remotedata \
|
79 |
+
--gather-with-grad \
|
80 |
+
--optimizer "adam" \
|
81 |
+
--data-filling "repeatpad" \
|
82 |
+
--data-truncating "rand_trunc" \
|
83 |
+
--pretrained-audio /fsx/yusong/audio_pretrained_model/PANN-fullset-map=0.439.ckpt
|
zeroshot_esc50.sh
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --comment clap
|
3 |
+
#SBATCH --partition=gpu
|
4 |
+
#SBATCH --job-name=mclap
|
5 |
+
#SBATCH --nodes 1
|
6 |
+
#SBATCH --ntasks-per-node 1
|
7 |
+
#SBATCH --cpus-per-gpu=6
|
8 |
+
#SBATCH --gres=gpu:1
|
9 |
+
#SBATCH --output=%x_%j.out
|
10 |
+
#SBATCH --exclude gpu-st-p4d-24xlarge-[23,30,31,108,115,134,135,183,185,186,187,188,275,277,290,374]
|
11 |
+
|
12 |
+
|
13 |
+
module load intelmpi
|
14 |
+
source /opt/intel/mpi/latest/env/vars.sh
|
15 |
+
export LD_LIBRARY_PATH=/opt/aws-ofi-nccl/lib:/opt/amazon/efa/lib64:/usr/local/cuda-11.0/efa/lib:/usr/local/cuda-11.0/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0:/opt/nccl/build/lib:/opt/aws-ofi-nccl-install/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
16 |
+
export NCCL_PROTO=simple
|
17 |
+
export PATH=/opt/amazon/efa/bin:$PATH
|
18 |
+
export LD_PRELOAD="/opt/nccl/build/lib/libnccl.so"
|
19 |
+
|
20 |
+
export FI_EFA_FORK_SAFE=1
|
21 |
+
export FI_LOG_LEVEL=1
|
22 |
+
export FI_EFA_USE_DEVICE_RDMA=1 # use for p4dn
|
23 |
+
|
24 |
+
#export NCCL_ALGO=ring
|
25 |
+
export NCCL_DEBUG=info
|
26 |
+
#export NCCL_DEBUG_SUBSYS=INIT,ENV,GRAPH,COLL
|
27 |
+
|
28 |
+
export PYTHONFAULTHANDLER=1
|
29 |
+
|
30 |
+
export CUDA_LAUNCH_BLOCKING=0
|
31 |
+
export OMPI_MCA_mtl_base_verbose=1
|
32 |
+
export FI_EFA_ENABLE_SHM_TRANSFER=0
|
33 |
+
export FI_PROVIDER=efa
|
34 |
+
export FI_EFA_TX_MIN_CREDITS=64
|
35 |
+
export NCCL_TREE_THRESHOLD=0
|
36 |
+
|
37 |
+
|
38 |
+
#export NCCL_P2P_DISABLE=1
|
39 |
+
#export NCCL_IBEXT_DISABLE=1
|
40 |
+
#export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"
|
41 |
+
|
42 |
+
# sent to sub script
|
43 |
+
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
|
44 |
+
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
45 |
+
export MASTER_PORT=12802
|
46 |
+
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
|
47 |
+
|
48 |
+
echo go $COUNT_NODE
|
49 |
+
echo $HOSTNAMES
|
50 |
+
|
51 |
+
source /fsx/yusong/clap/bin/activate
|
52 |
+
cd /fsx/yusong/CLAP/src
|
53 |
+
export TRANSFORMERS_CACHE=/fsx/yusong/transformers_cache
|
54 |
+
|
55 |
+
srun --comment clap --cpu_bind=v --accel-bind=gn python -m evaluate.eval_retrieval_main \
|
56 |
+
--save-frequency 5 \
|
57 |
+
--save-top-performance 3 \
|
58 |
+
--save-most-recent \
|
59 |
+
--dataset-type="webdataset" \
|
60 |
+
--precision="fp32" \
|
61 |
+
--warmup 0 \
|
62 |
+
--batch-size=512 \
|
63 |
+
--wd=0.0 \
|
64 |
+
--epochs=50 \
|
65 |
+
--workers=6 \
|
66 |
+
--use-bn-sync \
|
67 |
+
--freeze-text \
|
68 |
+
--amodel HTSAT-tiny \
|
69 |
+
--tmodel roberta \
|
70 |
+
--report-to "wandb" \
|
71 |
+
--wandb-notes "10.17-zeroshot-esc50-dataset-4#" \
|
72 |
+
--datasetnames "esc50" \
|
73 |
+
--datasetinfos "train" \
|
74 |
+
--seed 3407 \
|
75 |
+
--remotedata \
|
76 |
+
--logs /fsx/clap_logs \
|
77 |
+
--gather-with-grad \
|
78 |
+
--openai-model-cache-dir /fsx/yusong/transformers_cache \
|
79 |
+
--data-filling "repeatpad" \
|
80 |
+
--data-truncating "rand_trunc" \
|
81 |
+
--class-label-path="../class_labels/ESC50_class_labels_indices_space.json" \
|
82 |
+
--pretrained="/fsx/clap_logs/2022_10_17-02_08_21-model_HTSAT-tiny-lr_0.0001-b_96-j_6-p_fp32/checkpoints"
|