diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..63c2577aaa14a2828a83659a61694b6e19fd1432 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +miku.png filter=lfs diff=lfs merge=lfs -text +uvr5_models/Demucs/04573f0d-f3cf25b2.th filter=lfs diff=lfs merge=lfs -text +uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th filter=lfs diff=lfs merge=lfs -text +uvr5_models/Demucs/955717e8-8726e21a.th filter=lfs diff=lfs merge=lfs -text +uvr5_models/Demucs/d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text +uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text diff --git a/miku.png b/miku.png new file mode 100644 index 0000000000000000000000000000000000000000..1ca9305da9d1cd4735a5f08aee98b07d2c705689 --- /dev/null +++ b/miku.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5c63ffc2d83d6c81b9f12b19c0dd1a1629f998fb082dbbf7c2b5f5b333ae43 +size 2325722 diff --git a/uvr5_models/Demucs/04573f0d-f3cf25b2.th b/uvr5_models/Demucs/04573f0d-f3cf25b2.th new file mode 100644 index 0000000000000000000000000000000000000000..87e7befdc8b254d6cd7bfc8c2739e405783b7c3b --- /dev/null +++ b/uvr5_models/Demucs/04573f0d-f3cf25b2.th @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3cf25b222c4eed7cd49dd8b2c9597d50c18bd154090f7b919cfa5f93cf22c49 +size 84141271 diff --git a/uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th b/uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th new file mode 100644 index 0000000000000000000000000000000000000000..dc5aeb5ce23edf05faa98f84e7f8df6e3d121f1f --- /dev/null +++ b/uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3bcb9c8b40d14ae5d51b6db2587339cc12c6b77c0be151ce6d69002e087bf2 +size 84141271 diff --git a/uvr5_models/Demucs/955717e8-8726e21a.th b/uvr5_models/Demucs/955717e8-8726e21a.th new file mode 100644 index 0000000000000000000000000000000000000000..94c5ce7eda92bb105307197a7be1d9635b417a1f --- /dev/null +++ b/uvr5_models/Demucs/955717e8-8726e21a.th @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8726e21a993978c7ba086d3872e7608d7d5bfca646ca4aca459ffda844faa8b4 +size 84141911 diff --git a/uvr5_models/Demucs/d12395a8-e57c48e6.th b/uvr5_models/Demucs/d12395a8-e57c48e6.th new file mode 100644 index 0000000000000000000000000000000000000000..b2516f0ba249a05c119eba7e3c52addc11785d41 --- /dev/null +++ b/uvr5_models/Demucs/d12395a8-e57c48e6.th @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57c48e6b0e38af4f7118d7bd08c49f0a0c0edf7d09143bdd902ea0d237303e6 +size 84141271 diff --git a/uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th b/uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th new file mode 100644 index 0000000000000000000000000000000000000000..1d2f2cebac207dbb924b9011860f22621184d741 --- /dev/null +++ b/uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3fe64ae8ef66ac9a4857222ce48efbdc5eb3ad375cb79dd13debee5aaa4066 +size 84141271 diff --git a/uvr5_models/Demucs/hdemucs_mmi.yaml b/uvr5_models/Demucs/hdemucs_mmi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ea089139bfbef4a1126ab25e93c3dc380a90b46 --- /dev/null +++ b/uvr5_models/Demucs/hdemucs_mmi.yaml @@ -0,0 +1,2 @@ +models: ['75fc33f5'] +segment: 44 diff --git a/uvr5_models/Demucs/htdemucs.yaml b/uvr5_models/Demucs/htdemucs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d5f2089fa3e1a0335d93de070f6802598cd4a4d --- /dev/null +++ b/uvr5_models/Demucs/htdemucs.yaml @@ -0,0 +1 @@ +models: ['955717e8'] diff --git a/uvr5_models/Demucs/htdemucs_6s.yaml b/uvr5_models/Demucs/htdemucs_6s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..651a0fa536038a3e6d650f7b2bcc0b50ff7a4be9 --- /dev/null +++ b/uvr5_models/Demucs/htdemucs_6s.yaml @@ -0,0 +1 @@ +models: ['5c90dfd2'] diff --git a/uvr5_models/Demucs/htdemucs_ft.yaml b/uvr5_models/Demucs/htdemucs_ft.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba5c69c272770f5e5db3dd5fcda75b94ba523250 --- /dev/null +++ b/uvr5_models/Demucs/htdemucs_ft.yaml @@ -0,0 +1,7 @@ +models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d'] +weights: [ + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], +] \ No newline at end of file diff --git a/uvr5_models/MDX/Kim_Inst.onnx b/uvr5_models/MDX/Kim_Inst.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a1cfbc17713f1caef9d11696002bb78aae781ff2 --- /dev/null +++ b/uvr5_models/MDX/Kim_Inst.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b1940e7122fbdd2beadc65507cbff6c352d79012a8a7e60d56db98532af5f7 +size 66759214 diff --git a/uvr5_models/MDX/Kim_Vocal_1.onnx b/uvr5_models/MDX/Kim_Vocal_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4d6e31ac4c3e53021d67bf71f7aa576bbff391b5 --- /dev/null +++ b/uvr5_models/MDX/Kim_Vocal_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f313140ef8fecc3041881b60ecb993d985a0281a138b2fb634aa8901aebc38cb +size 66759214 diff --git a/uvr5_models/MDX/Kim_Vocal_2.onnx b/uvr5_models/MDX/Kim_Vocal_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..89d344b3926e4d0d7ee41b78e7f387558ec2acdc --- /dev/null +++ b/uvr5_models/MDX/Kim_Vocal_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b +size 66759214 diff --git a/uvr5_models/MDX/Reverb_HQ_By_FoxJoy.onnx b/uvr5_models/MDX/Reverb_HQ_By_FoxJoy.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0af4086572259731e9237a7d5acc2254f2d30cf5 --- /dev/null +++ b/uvr5_models/MDX/Reverb_HQ_By_FoxJoy.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80 +size 66780123 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_1.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..86732ec554ea5624474d9d109f6a72b3570fdb34 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca53f94b7a0cbb04fcfcc8f3ea5ec1ae22cd8ad044f5e673588859f83976f5e +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_2.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d7d5ea74c2caf49634f847217378bb626e2e77a --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a96a664d28b52db9def0a9cae9a16dbb524d8325bfe8f0ac64ac5d231456bc +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_3.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_3.onnx new file mode 100644 index 0000000000000000000000000000000000000000..548cacd28b73e2f76dde5f4281f1e0d0dcf537e5 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_3.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7834e2972158d8c9864e7376e3a7d084079c80a23f38dc31c4b0a4e901a1cb +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_1.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3302ace57202067fd1e9c709d76bc8d347658f5d --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a045c4ded87e3bf97b609ec5be7910e8a7cecec455f507227ab12b5e29f7f9 +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_2.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1340d10933420baf427d06cbf9a51ac7903b5398 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197f8ab296df850f961e68c595f6649acb7d9e621b5600b460f3458967299112 +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_3.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_3.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f2b6b241d9246f392067e0717c8b252857022e6c --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_3.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317554b07fe1ea5279a77f2b1520a41ea4b93432560c4ffd08792c30fddf9adc +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_4.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..45b11285dea638eeecfe4487dfc39775da075b14 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4b5b9b05090fdf238f38ba5046813982d50e2a652e9cb3324ea79720c3c9c8 +size 59074342 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_5.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_5.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e674c464504ee1cddb1371129a4314eacabbca64 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_5.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811cb24095d865763752310848b7ec86aeede0626cb05749ab35350e46897000 +size 59074342 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_Main.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_Main.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3c1ab77a4f5874c013ff0269cc926fcc9a7ab466 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_Main.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab401dfe4a548b87deb64f975294bd56ff946aa32903f53b4b24bb13b2cce1e +size 52786726 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Inst_full_292.onnx b/uvr5_models/MDX/UVR-MDX-NET-Inst_full_292.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9f6c60def7873f03a26391afabb238103fc5c318 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Inst_full_292.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020f6b65fa219fb7c285e4f3fc2863bf22daf03c4c93e547b6d13d5f2757a7ec +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET-Voc_FT.onnx b/uvr5_models/MDX/UVR-MDX-NET-Voc_FT.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3048949a6f427d212f310e9a13494306da6e00ec --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET-Voc_FT.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534b2070fcc7df514b13ef660dc8cbb328679c2374d04354a5c42bb14ecce111 +size 66762490 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Crowd_HQ_1.onnx b/uvr5_models/MDX/UVR-MDX-NET_Crowd_HQ_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..466c3fa69b05f5b27c19cc11eb23c99909d2a4d0 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Crowd_HQ_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313b7bf869c411fdafe005cf0d5a635c405cb3d0df137178a64091952d75225c +size 59074342 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Inst_187_beta.onnx b/uvr5_models/MDX/UVR-MDX-NET_Inst_187_beta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b2ff188d7a8ea6ed25bcf1916359853b8fd0cb8f --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Inst_187_beta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74566f3c3033cacba996328b2ee90bf77ef79ea6c35b7841df183b7906f54a5 +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Inst_82_beta.onnx b/uvr5_models/MDX/UVR-MDX-NET_Inst_82_beta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f91e15a00d9d805623d70dc7d95b69e2e8329ba2 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Inst_82_beta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c268302f09ab53687072618e056a611272a7e2c3fd9b3b59164da152f3588e +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Inst_90_beta.onnx b/uvr5_models/MDX/UVR-MDX-NET_Inst_90_beta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..db0d80d538ff6ecf3a15cabaa0c84500ff0b5ccc --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Inst_90_beta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d902868a46575aea6ee2335736ff3b53faf497a6bdaa1b864e0fd84eb1b42a5 +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Main_340.onnx b/uvr5_models/MDX/UVR-MDX-NET_Main_340.onnx new file mode 100644 index 0000000000000000000000000000000000000000..75ef0024acd46a2900ea78e948325d044ec7580a --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Main_340.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78792633b4007755af12ecde20f709b4f0b99563b1d25fe0a501ed2122aff218 +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Main_390.onnx b/uvr5_models/MDX/UVR-MDX-NET_Main_390.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a752391e7cfb5b134ebef388734cd1da9eb5dfb5 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Main_390.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Main_406.onnx b/uvr5_models/MDX/UVR-MDX-NET_Main_406.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c0f58423f248611332f6a3c33382d0d426c67449 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Main_406.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23 +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Main_427.onnx b/uvr5_models/MDX/UVR-MDX-NET_Main_427.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f6731f9dcc99c5b1ad0af85dd845280e833acf46 --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Main_427.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95275802a27801b97e3c0552b6eaa69f9bb3bd7df53cdf0536cce0a753f702cc +size 66759214 diff --git a/uvr5_models/MDX/UVR-MDX-NET_Main_438.onnx b/uvr5_models/MDX/UVR-MDX-NET_Main_438.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a6a17015453da90871426317c547193196bd6f1e --- /dev/null +++ b/uvr5_models/MDX/UVR-MDX-NET_Main_438.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e1ad93587a163a0987a0168b99a2ad875c0d9bfc3afb596b7c36b09c7f5c26 +size 66759214 diff --git a/uvr5_models/MDX/UVR_MDXNET_1_9703.onnx b/uvr5_models/MDX/UVR_MDXNET_1_9703.onnx new file mode 100644 index 0000000000000000000000000000000000000000..50d00b7e34e7763954283b9fc13f2d903072be03 --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_1_9703.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229ad3bb96a037e89d8ed86732d6d3675856e6a07c3e3f02896eac01ec7ee4be +size 29704436 diff --git a/uvr5_models/MDX/UVR_MDXNET_2_9682.onnx b/uvr5_models/MDX/UVR_MDXNET_2_9682.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1d4c430b6d6a8a7d8abea9bccac1959e4ea69ab0 --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_2_9682.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1deb7295acd3206bc9582a5d92f1b0a74bf3f41c7c1fb78a0ac0123cde4372db +size 29704436 diff --git a/uvr5_models/MDX/UVR_MDXNET_3_9662.onnx b/uvr5_models/MDX/UVR_MDXNET_3_9662.onnx new file mode 100644 index 0000000000000000000000000000000000000000..140b8e3eb273df75384c691462998774b3928a52 --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_3_9662.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02220e80d8253f4c2209f8924298b2b686bbdf2868b788ff5500fb9bd94aadc +size 29704436 diff --git a/uvr5_models/MDX/UVR_MDXNET_9482.onnx b/uvr5_models/MDX/UVR_MDXNET_9482.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c40c9257378536385f625a75f7bc74cb35c1eaa5 --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_9482.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f365207c56deb115bceedff3ad8fe98a751c745f9e370cecec6226b8b47184 +size 29704436 diff --git a/uvr5_models/MDX/UVR_MDXNET_KARA.onnx b/uvr5_models/MDX/UVR_MDXNET_KARA.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4bfb51872cc19150179df558fb7212e5359427ea --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_KARA.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3167c87333a48548413e972a286bf40bf5694001d2853861eb1435953f02d63 +size 29704436 diff --git a/uvr5_models/MDX/UVR_MDXNET_KARA_2.onnx b/uvr5_models/MDX/UVR_MDXNET_KARA_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..18aeb6a8d05b84fa74fdd0de7ed917f6df273ebd --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_KARA_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf32e15105a09c0f7dddd2b67346146334d6f3ecb399ed7638eba2ab07cbf5f4 +size 52786726 diff --git a/uvr5_models/MDX/UVR_MDXNET_Main.onnx b/uvr5_models/MDX/UVR_MDXNET_Main.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d9645fd239fd211619aabeb431d3093a45ed8b85 --- /dev/null +++ b/uvr5_models/MDX/UVR_MDXNET_Main.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8289784cda38543ff431add4070662813311a8cccfc0112ca82f76d9dba2b4ca +size 66759214 diff --git a/uvr5_models/mdx_c_configs/config_melbandroformer_inst.yaml b/uvr5_models/mdx_c_configs/config_melbandroformer_inst.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1395e978d64cb1c37d3015adc2feeb0805e3b94 --- /dev/null +++ b/uvr5_models/mdx_c_configs/config_melbandroformer_inst.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Instrumental + - Vocals + target_instrument: Instrumental + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/config_melbandroformer_inst_v2.yaml b/uvr5_models/mdx_c_configs/config_melbandroformer_inst_v2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4297c088f7b8bd2f28308d8a8d1e0694cdec967 --- /dev/null +++ b/uvr5_models/mdx_c_configs/config_melbandroformer_inst_v2.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 3 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Instrumental + - Vocals + target_instrument: Instrumental + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml b/uvr5_models/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b93e721853f4d90efa7f0bead82f6a1b791fc19f --- /dev/null +++ b/uvr5_models/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 2 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Vocals + - Instrumental + target_instrument: null + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/config_vocals_mel_band_roformer_kim.yaml b/uvr5_models/mdx_c_configs/config_vocals_mel_band_roformer_kim.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8130c9958eead0d2efd27f27f4f39ea5ca051a26 --- /dev/null +++ b/uvr5_models/mdx_c_configs/config_vocals_mel_band_roformer_kim.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Vocals + - Instrumental + target_instrument: Vocals + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 1 + chunk_size: 352800 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model1.yaml b/uvr5_models/mdx_c_configs/model1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b241f526051277fe7d8c1275a60300468fb88d30 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model1.yaml @@ -0,0 +1,34 @@ +audio: + chunk_size: 260096 + dim_f: 4096 + dim_t: 128 + hop_length: 2048 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 8 + grad_clip: 0 + instruments: + - Vocals + - Drums + - Bass + - Other + lr: 5.0e-05 + target_instrument: null +inference: + batch_size: 1 + dim_t: 1024 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model2.yaml b/uvr5_models/mdx_c_configs/model2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..249e2c9e43cf746de9e709352219d418853ced5d --- /dev/null +++ b/uvr5_models/mdx_c_configs/model2.yaml @@ -0,0 +1,34 @@ +audio: + chunk_size: 260096 + dim_f: 4096 + dim_t: 128 + hop_length: 2048 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 256 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 8 + grad_clip: 0 + instruments: + - Vocals + - Drums + - Bass + - Other + lr: 3.0e-05 + target_instrument: null +inference: + batch_size: 1 + dim_t: 1024 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model3.yaml b/uvr5_models/mdx_c_configs/model3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..565a0b2204a4861d315e76f7d42f7d1281b161e8 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model3.yaml @@ -0,0 +1,34 @@ +audio: + chunk_size: 260096 + dim_f: 4096 + dim_t: 128 + hop_length: 2048 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 8 + grad_clip: 0 + instruments: + - Vocals + - Drums + - Bass + - Other + lr: 5.0e-05 + target_instrument: Vocals +inference: + batch_size: 1 + dim_t: 1024 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/modelA.yaml b/uvr5_models/mdx_c_configs/modelA.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5f7cd3a85f7b1fd169a229b4b0c3d94090b0f8d --- /dev/null +++ b/uvr5_models/mdx_c_configs/modelA.yaml @@ -0,0 +1,39 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + min_mean_abs: 0.01 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 64 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + coarse_loss_clip: true + ema_momentum: 0.999 + grad_clip: null + instruments: + - Vocals + - Drums + - Bass + - Other + lr: 0.0001 + num_steps: 100000 + q: 0.4 + target_instrument: null +inference: + batch_size: 2 + dim_t: 1024 + num_overlap: 8 diff --git a/uvr5_models/mdx_c_configs/modelB.yaml b/uvr5_models/mdx_c_configs/modelB.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd2f00394249c7d60b0a228634fb1e1727a05c3c --- /dev/null +++ b/uvr5_models/mdx_c_configs/modelB.yaml @@ -0,0 +1,41 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + min_mean_abs: 0.01 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 64 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + coarse_loss_clip: false + datasets: + - ../data/moises/bleeding + ema_momentum: 0.999 + grad_clip: null + instruments: + - Vocals + - Drums + - Bass + - Other + lr: 0.0001 + num_steps: 150000 + q: 0.93 + target_instrument: null +inference: + batch_size: 2 + dim_t: 1024 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_2_stem_061321.yaml b/uvr5_models/mdx_c_configs/model_2_stem_061321.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5412e0e16ea287b59ef6c84435fd81169d81d53 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_2_stem_061321.yaml @@ -0,0 +1,36 @@ +audio: + chunk_size: 260096 + dim_f: 4096 + dim_t: 256 + hop_length: 2048 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 + name: epoch_10.ckpt +training: + batch_size: 16 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + target_instrument: null + num_epochs: 100 + num_steps: 1000 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 diff --git a/uvr5_models/mdx_c_configs/model_2_stem_full_band.yaml b/uvr5_models/mdx_c_configs/model_2_stem_full_band.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af253ed08b65dbdecdde87b498601faf82a0ade8 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_2_stem_full_band.yaml @@ -0,0 +1,36 @@ +audio: + chunk_size: 260096 + dim_f: 6144 + dim_t: 128 + hop_length: 2048 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 6 + scale: + - 2 + - 2 +training: + batch_size: 14 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 3.0e-05 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_2_stem_full_band_2.yaml b/uvr5_models/mdx_c_configs/model_2_stem_full_band_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d8dfb77bcb021a2433f377e59afc0d3b53248ea --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_2_stem_full_band_2.yaml @@ -0,0 +1,36 @@ +audio: + chunk_size: 260096 + dim_f: 6144 + dim_t: 128 + hop_length: 2048 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 6 + scale: + - 2 + - 2 +training: + batch_size: 14 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 2.0e-05 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_2_stem_full_band_3.yaml b/uvr5_models/mdx_c_configs/model_2_stem_full_band_3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1394742ba5a5dff875405ca057988c35ca583ab --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_2_stem_full_band_3.yaml @@ -0,0 +1,39 @@ +audio: + chunk_size: 261120 + dim_f: 6144 + dim_t: 256 + hop_length: 1024 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 6 + scale: + - 2 + - 2 +training: + batch_size: 6 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-05 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_2_stem_full_band_8k.yaml b/uvr5_models/mdx_c_configs/model_2_stem_full_band_8k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..207aa712b561221c136576db6a6e7d6e35915ba4 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_2_stem_full_band_8k.yaml @@ -0,0 +1,43 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 + augmentation_type: simple1 + augmentation_mix: true + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_bs_roformer_ep_317_sdr_12.9755.yaml b/uvr5_models/mdx_c_configs/model_bs_roformer_ep_317_sdr_12.9755.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_bs_roformer_ep_317_sdr_12.9755.yaml @@ -0,0 +1,133 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 512 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 16 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_bs_roformer_ep_368_sdr_12.9628.yaml b/uvr5_models/mdx_c_configs/model_bs_roformer_ep_368_sdr_12.9628.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe893b1a68b8ae8ea8bb5a7ac2b7f12e0c53a826 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_bs_roformer_ep_368_sdr_12.9628.yaml @@ -0,0 +1,133 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 512 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 16 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 diff --git a/uvr5_models/mdx_c_configs/model_bs_roformer_ep_937_sdr_10.5309.yaml b/uvr5_models/mdx_c_configs/model_bs_roformer_ep_937_sdr_10.5309.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f623832cc06ebc5fa8a049fad6b1319c6038336d --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_bs_roformer_ep_937_sdr_10.5309.yaml @@ -0,0 +1,138 @@ +audio: + chunk_size: 131584 + dim_f: 1024 + dim_t: 256 + hop_length: 512 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + linear_transformer_depth: 0 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 512 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 4 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - No Drum-Bass + - Drum-Bass + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: No Drum-Bass + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 1 + dim_t: 512 + num_overlap: 4 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml b/uvr5_models/mdx_c_configs/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c906f2931cbae3cf64551c231e285ca10097fe5 --- /dev/null +++ b/uvr5_models/mdx_c_configs/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 9 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 4.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/uvr5_models/mdx_c_configs/sndfx.yaml b/uvr5_models/mdx_c_configs/sndfx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75fccad058b0c882b64a24afd6b185f5980683ba --- /dev/null +++ b/uvr5_models/mdx_c_configs/sndfx.yaml @@ -0,0 +1,41 @@ +audio: + chunk_size: 261120 + dim_f: 1024 + dim_t: 256 + hop_length: 1024 + min_mean_abs: 0.01 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + stereo_prob: 0.7 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 64 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 8 + ema_momentum: 0.999 + grad_clip: null + instruments: + - Music + - Speech + - SFX + lr: 0.0001 + num_steps: 30000 + target_instrument: null +inference: + batch_size: 8 + dim_t: 256 + instruments: + - Music + - Dialog + - Effect + num_overlap: 8 diff --git a/uvr5_models/model_data_new.json b/uvr5_models/model_data_new.json new file mode 100644 index 0000000000000000000000000000000000000000..cd12b0a94026d7e0159baab9ee1e517c0904fb49 --- /dev/null +++ b/uvr5_models/model_data_new.json @@ -0,0 +1,415 @@ +{ + "0ddfc0eb5792638ad5dc27850236c246": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals" + }, + "26d308f91f3423a67dc69a6d12a8793d": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 9, + "mdx_n_fft_scale_set": 8192, + "primary_stem": "Other" + }, + "2cdd429caac38f0194b133884160f2c6": { + "compensate": 1.045, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "2f5501189a2f6db6349916fabe8c90de": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals", + "is_karaoke": true + }, + "398580b6d5d973af3120df54cee6759d": { + "compensate": 1.75, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "488b3e6f8bd3717d9d7c428476be2d75": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "4910e7827f335048bdac11fa967772f9": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 7, + "mdx_n_fft_scale_set": 4096, + "primary_stem": "Drums" + }, + "53c4baf4d12c3e6c3831bb8f5b532b93": { + "compensate": 1.043, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "5d343409ef0df48c7d78cce9f0106781": { + "compensate": 1.075, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "5f6483271e1efb9bfb59e4a3e6d4d098": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 9, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals" + }, + "65ab5919372a128e4167f5e01a8fda85": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 8192, + "primary_stem": "Other" + }, + "6703e39f36f18aa7855ee1047765621d": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 9, + "mdx_n_fft_scale_set": 16384, + "primary_stem": "Bass" + }, + "6b31de20e84392859a3d09d43f089515": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals" + }, + "867595e9de46f6ab699008295df62798": { + "compensate": 1.03, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "a3cd63058945e777505c01d2507daf37": { + "compensate": 1.03, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals" + }, + "b33d9b3950b6cbf5fe90a32608924700": { + "compensate": 1.03, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "c3b29bdce8c4fa17ec609e16220330ab": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 16384, + "primary_stem": "Bass" + }, + "ceed671467c1f64ebdfac8a2490d0d52": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "d2a1376f310e4f7fa37fb9b5774eb701": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "d7bff498db9324db933d913388cba6be": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals" + }, + "d94058f8c7f1fae4164868ae8ae66b20": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Vocals" + }, + "dc41ede5961d50f277eb846db17f5319": { + "compensate": 1.035, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 9, + "mdx_n_fft_scale_set": 4096, + "primary_stem": "Drums" + }, + "e5572e58abf111f80d8241d2e44e7fa4": { + "compensate": 1.028, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "e7324c873b1f615c35c1967f912db92a": { + "compensate": 1.03, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "1c56ec0224f1d559c42fd6fd2a67b154": { + "compensate": 1.025, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 5120, + "primary_stem": "Instrumental" + }, + "f2df6d6863d8f435436d8b561594ff49": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "b06327a00d5e5fbc7d96e1781bbdb596": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "94ff780b977d3ca07c7a343dab2e25dd": { + "compensate": 1.039, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "73492b58195c3b52d34590d5474452f6": { + "compensate": 1.043, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "970b3f9492014d18fefeedfe4773cb42": { + "compensate": 1.009, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "1d64a6d2c30f709b8c9b4ce1366d96ee": { + "compensate": 1.065, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 5120, + "primary_stem": "Instrumental", + "is_karaoke": true + }, + "203f2a3955221b64df85a41af87cf8f0": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "291c2049608edb52648b96e27eb80e95": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "ead8d05dab12ec571d67549b3aab03fc": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "cc63408db3d80b4d85b0287d1d7c9632": { + "compensate": 1.033, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "cd5b2989ad863f116c855db1dfe24e39": { + "compensate": 1.035, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 9, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Reverb" + }, + "55657dd70583b0fedfba5f67df11d711": { + "compensate": 1.022, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 6144, + "primary_stem": "Instrumental" + }, + "b6bccda408a436db8500083ef3491e8b": { + "compensate": 1.02, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "8a88db95c7fb5dbe6a095ff2ffb428b1": { + "compensate": 1.026, + "mdx_dim_f_set": 2048, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 5120, + "primary_stem": "Instrumental" + }, + "b78da4afc6512f98e4756f5977f5c6b9": { + "compensate": 1.021, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Instrumental" + }, + "77d07b2667ddf05b9e3175941b4454a0": { + "compensate": 1.021, + "mdx_dim_f_set": 3072, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 7680, + "primary_stem": "Vocals" + }, + "0f2a6bc5b49d87d64728ee40e23bceb1": { + "compensate": 1.019, + "mdx_dim_f_set": 2560, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 5120, + "primary_stem": "Instrumental" + }, + "cb790d0c913647ced70fc6b38f5bea1a": { + "compensate": 1.010, + "mdx_dim_f_set": 2560, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 5120, + "primary_stem": "Instrumental" + }, + "b02be2d198d4968a121030cf8950b492": { + "compensate": 1.020, + "mdx_dim_f_set": 2560, + "mdx_dim_t_set": 8, + "mdx_n_fft_scale_set": 5120, + "primary_stem": "No Crowd" + }, + "2154254ee89b2945b97a7efed6e88820": { + "config_yaml": "model_2_stem_061321.yaml" + }, + "063aadd735d58150722926dcbf5852a9": { + "config_yaml": "model_2_stem_061321.yaml" + }, + "c09f714d978b41d718facfe3427e6001": { + "config_yaml": "model_2_stem_061321.yaml" + }, + "fe96801369f6a148df2720f5ced88c19": { + "config_yaml": "model3.yaml" + }, + "02e8b226f85fb566e5db894b9931c640": { + "config_yaml": "model2.yaml" + }, + "e3de6d861635ab9c1d766149edd680d6": { + "config_yaml": "model1.yaml" + }, + "3f2936c554ab73ce2e396d54636bd373": { + "config_yaml": "modelB.yaml" + }, + "890d0f6f82d7574bca741a9e8bcb8168": { + "config_yaml": "modelB.yaml" + }, + "63a3cb8c37c474681049be4ad1ba8815": { + "config_yaml": "modelB.yaml" + }, + "a7fc5d719743c7fd6b61bd2b4d48b9f0": { + "config_yaml": "modelA.yaml" + }, + "3567f3dee6e77bf366fcb1c7b8bc3745": { + "config_yaml": "modelA.yaml" + }, + "a28f4d717bd0d34cd2ff7a3b0a3d065e": { + "config_yaml": "modelA.yaml" + }, + "c9971a18da20911822593dc81caa8be9": { + "config_yaml": "sndfx.yaml" + }, + "57d94d5ed705460d21c75a5ac829a605": { + "config_yaml": "sndfx.yaml" + }, + "e7a25f8764f25a52c1b96c4946e66ba2": { + "config_yaml": "sndfx.yaml" + }, + "104081d24e37217086ce5fde09147ee1": { + "config_yaml": "model_2_stem_061321.yaml" + }, + "1e6165b601539f38d0a9330f3facffeb": { + "config_yaml": "model_2_stem_061321.yaml" + }, + "fe0108464ce0d8271be5ab810891bd7c": { + "config_yaml": "model_2_stem_full_band.yaml" + }, + "e9b82ec90ee56c507a3a982f1555714c": { + "config_yaml": "model_2_stem_full_band_2.yaml" + }, + "99b6ceaae542265a3b6d657bf9fde79f": { + "config_yaml": "model_2_stem_full_band_8k.yaml" + }, + "116f6f9dabb907b53d847ed9f7a9475f": { + "config_yaml": "model_2_stem_full_band_8k.yaml" + }, + "53f707017bfcbb56f5e1bfac420d6732": { + "config_yaml": "model_bs_roformer_ep_317_sdr_12.9755.yaml", + "is_roformer": true + }, + "63e41acc264bf681a73aa9f7e5f606cc": { + "config_yaml": "model_mel_band_roformer_ep_3005_sdr_11.4360.yaml", + "is_roformer": true + }, + "e733736763234047587931fc35322fd9": { + "config_yaml": "model_bs_roformer_ep_937_sdr_10.5309.yaml", + "is_roformer": true + }, + "d7a256bee3e7c620f554bceaab2f68f6": { + "config_yaml": "config_melbandroformer_inst.yaml", + "is_roformer": true + }, + "365ccfa0e04b31ac2e24bbb935142a81": { + "config_yaml": "config_melbandroformer_inst.yaml", + "is_roformer": true + }, + "3c15abf122d8eccc4a0eb97bf84a3e58": { + "config_yaml": "config_melbandroformer_instvoc_duality.yaml", + "is_roformer": true + }, + "9fb197af219c5172ea38703a33aceb79": { + "config_yaml": "config_melbandroformer_instvoc_duality.yaml", + "is_roformer": true + }, + "d789065adfd747d6f585b27b495bcdae": { + "config_yaml": "model_bs_roformer_ep_368_sdr_12.9628.yaml", + "is_roformer": true + }, + "e4ca75912fcff3224a19058e55facfbf": { + "config_yaml": "config_vocals_mel_band_roformer_kim.yaml", + "is_roformer": true + }, + "951f8ef420a941a395a9919f5d55cce9": { + "config_yaml": "config_melbandroformer_inst_v2.yaml", + "is_roformer": true + } +}