ASesYusuf1 commited on
Commit
5de8611
·
verified ·
1 Parent(s): 30bacd0

Upload 146 files

Browse files
.github/FUNDING.yml ADDED
@@ -0,0 +1 @@
 
 
1
+ github: beveradb
.github/ISSUE_TEMPLATE/BUG_REPORT.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug report
2
+ description: Report a problem you encountered
3
+ title: "[Bug]: "
4
+ labels: ["bug"]
5
+ body:
6
+ - type: textarea
7
+ id: bug-description
8
+ attributes:
9
+ label: Describe the bug
10
+ description: Please provide a concise description of the bug.
11
+ placeholder: Bug description
12
+ validations:
13
+ required: true
14
+ - type: checkboxes
15
+ attributes:
16
+ label: Have you searched for existing issues? 🔎
17
+ description: Please search to see if there is already an issue for the problem you encountered.
18
+ options:
19
+ - label: I have searched and found no existing issues.
20
+ required: true
21
+ - type: markdown
22
+ attributes:
23
+ value: "---"
24
+ - type: textarea
25
+ id: screenshots
26
+ attributes:
27
+ label: Screenshots or Videos
28
+ description: Add screenshots, gifs, or videos to help explain your problem.
29
+ placeholder: Upload screenshots, gifs, and videos here.
30
+ validations:
31
+ required: false
32
+ - type: textarea
33
+ id: logs
34
+ attributes:
35
+ label: Logs
36
+ description: Please include the full stack trace of the errors you encounter.
37
+ render: shell
38
+ - type: markdown
39
+ attributes:
40
+ value: "---"
41
+ - type: textarea
42
+ id: system-info
43
+ attributes:
44
+ label: System Info
45
+ description: Provide information about your system.
46
+ value: |
47
+ Operating System:
48
+ Python version:
49
+ Other...
50
+ render: shell
51
+ validations:
52
+ required: true
53
+ - type: textarea
54
+ id: additional
55
+ attributes:
56
+ label: Additional Information
57
+ description: Add any other useful information about the problem here.
58
+ placeholder: Is there any additional helpful information you can share?
59
+ validations:
60
+ required: false
.github/ISSUE_TEMPLATE/FEATURE_REQUEST.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Feature request
2
+ description: Suggest an idea for this project
3
+ title: "[Feature]: "
4
+ labels: ["enhancement", "feature"]
5
+ body:
6
+ - type: textarea
7
+ id: description
8
+ attributes:
9
+ label: Description
10
+ description: Clearly and concisely describe what you would like to change, add, or implement.
11
+ placeholder: Tell us your idea.
12
+ validations:
13
+ required: true
.github/workflows/github-sponsors.yml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Generate Sponsors README
2
+ on:
3
+ workflow_dispatch:
4
+ schedule:
5
+ - cron: 30 15 * * 0-6
6
+ permissions:
7
+ contents: write
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Checkout 🛎️
13
+ uses: actions/checkout@v4
14
+
15
+ - name: Generate Sponsors 💖
16
+ uses: JamesIves/github-sponsors-readme-action@v1
17
+ with:
18
+ token: ${{ secrets.SPONSORS_WORKFLOW_PAT }}
19
+ file: 'README.md'
20
+
21
+ - name: Deploy to GitHub Pages 🚀
22
+ uses: JamesIves/github-pages-deploy-action@v4
23
+ with:
24
+ branch: main
25
+ folder: '.'
.github/workflows/publish-to-docker.yml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: publish-to-docker
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - pyproject.toml
9
+ - Dockerfile.cpu
10
+ - Dockerfile.gpu
11
+ - Dockerfile.runpod
12
+ workflow_dispatch:
13
+
14
+ jobs:
15
+ build-and-push-docker:
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - name: Delete huge unnecessary tools folder
19
+ run: rm -rf /opt/hostedtoolcache
20
+
21
+ - name: Checkout
22
+ uses: actions/checkout@v4
23
+
24
+ - name: Set up Python
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: '3.x'
28
+
29
+ - name: Install TOML
30
+ run: pip install toml
31
+
32
+ - name: Get version from pyproject.toml
33
+ run: |
34
+ VERSION=$(python -c "import toml; print(toml.load('pyproject.toml')['tool']['poetry']['version'])")
35
+ echo "VERSION=$VERSION" >> $GITHUB_ENV
36
+
37
+ - name: Set up QEMU
38
+ uses: docker/setup-qemu-action@v3
39
+
40
+ - name: Set up Docker Buildx
41
+ uses: docker/setup-buildx-action@v3
42
+
43
+ - name: Login to Docker Hub
44
+ uses: docker/login-action@v3
45
+ with:
46
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
47
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
48
+
49
+ - name: Build and push Docker image for CPU
50
+ if: ${{ github.ref == 'refs/heads/main' }}
51
+ uses: docker/build-push-action@v5
52
+ with:
53
+ file: Dockerfile.cpu
54
+ context: .
55
+ platforms: linux/amd64,linux/arm64
56
+ push: true
57
+ tags: |
58
+ beveradb/audio-separator:cpu-${{ env.VERSION }}
59
+ beveradb/audio-separator:cpu
60
+ beveradb/audio-separator:latest
61
+
62
+ - name: Build and push Docker image for CUDA 11 GPU
63
+ if: ${{ github.ref == 'refs/heads/main' }}
64
+ uses: docker/build-push-action@v5
65
+ with:
66
+ file: Dockerfile.cuda11
67
+ context: .
68
+ platforms: linux/amd64
69
+ push: true
70
+ tags: |
71
+ beveradb/audio-separator:gpu-${{ env.VERSION }}
72
+ beveradb/audio-separator:gpu
73
+
74
+ - name: Build and push Docker image for CUDA 12 GPU
75
+ if: ${{ github.ref == 'refs/heads/main' }}
76
+ uses: docker/build-push-action@v5
77
+ with:
78
+ file: Dockerfile.cuda12
79
+ context: .
80
+ platforms: linux/amd64
81
+ push: true
82
+ tags: |
83
+ beveradb/audio-separator:cuda12-${{ env.VERSION }}
84
+ beveradb/audio-separator:cuda12
85
+
86
+ # Deliberately commented out because Github CI can't build this runpod image due to disk space limits
87
+ # Instead, I build this (17GB) image locally and push it to Docker Hub manually.
88
+ # - name: Build and push Docker image for Runpod
89
+ # if: ${{ github.ref == 'refs/heads/main' }}
90
+ # uses: docker/build-push-action@v5
91
+ # with:
92
+ # file: Dockerfile.runpod
93
+ # context: .
94
+ # platforms: linux/amd64
95
+ # push: true
96
+ # tags: |
97
+ # beveradb/audio-separator:runpod-${{ env.VERSION }}
98
+ # beveradb/audio-separator:runpod
.github/workflows/publish-to-pypi.yml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: publish-to-pypi
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - pyproject.toml
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ # Auto-publish when version is increased
13
+ publish-pypi:
14
+ # Only publish on `main` branch
15
+ if: github.ref == 'refs/heads/main'
16
+ runs-on: ubuntu-latest
17
+ permissions: # Don't forget permissions
18
+ contents: write
19
+
20
+ steps:
21
+ - uses: etils-actions/pypi-auto-publish@v1
22
+ with:
23
+ pypi-token: ${{ secrets.PYPI_API_TOKEN }}
24
+ gh-token: ${{ secrets.GITHUB_TOKEN }}
25
+ parse-changelog: false
.github/workflows/run-integration-tests.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: run-integration-tests
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ jobs:
7
+ integration-test:
8
+ runs-on: self-hosted
9
+ env:
10
+ AUDIO_SEPARATOR_MODEL_DIR: ${{ github.workspace }}/models
11
+
12
+ steps:
13
+ - name: Checkout project
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: '3.13'
20
+
21
+ - name: Install pipx
22
+ run: python -m pip install --user pipx && python -m pipx ensurepath
23
+
24
+ - name: Install poetry
25
+ run: python -m pipx install poetry
26
+
27
+ - name: Setup PATH
28
+ run: echo "/root/.local/bin" >> $GITHUB_PATH
29
+
30
+ - name: Install system dependencies
31
+ run: |
32
+ apt-get update
33
+ apt-get install -y ffmpeg libsamplerate0 libsamplerate-dev
34
+
35
+ - name: Set up Python
36
+ uses: actions/setup-python@v5
37
+ with:
38
+ python-version: '3.13'
39
+ cache: poetry
40
+
41
+ - name: Create models directory
42
+ run: mkdir -p $AUDIO_SEPARATOR_MODEL_DIR
43
+
44
+ - name: Cache models directory
45
+ uses: actions/cache@v3
46
+ id: model-cache
47
+ with:
48
+ path: ${{ env.AUDIO_SEPARATOR_MODEL_DIR }}
49
+ key: model-cache-${{ hashFiles('tests/integration/test_cli_integration.py') }}
50
+ restore-keys: model-cache-
51
+
52
+ - name: Install Poetry dependencies
53
+ run: poetry install -E cpu
54
+
55
+ - name: Display model cache status
56
+ run: |
57
+ echo "Model cache hit: ${{ steps.model-cache.outputs.cache-hit == 'true' }}"
58
+ echo "Models directory contents:"
59
+ ls -la $AUDIO_SEPARATOR_MODEL_DIR || echo "Directory empty or doesn't exist"
60
+
61
+ - name: Run integration tests
62
+ run: poetry run pytest -sv --cov=audio_separator --cov-report=xml tests/integration
63
+
64
+ - name: Upload coverage reports to Codecov
65
+ uses: codecov/codecov-action@v3
66
+ env:
67
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
68
+
69
+ - name: Upload test results
70
+ if: always()
71
+ uses: actions/upload-artifact@v4
72
+ with:
73
+ name: integration-test-results
74
+ path: |
75
+ *.flac
76
+ tests/*.flac
77
+ **/temp_images/**/*.png
78
+ tests/**/temp_images/**/*.png
.github/workflows/run-unit-tests.yaml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: run-unit-tests
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ jobs:
7
+ test-ubuntu:
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ matrix:
12
+ python-version: ['3.10', '3.11', '3.12', '3.13']
13
+
14
+ steps:
15
+ - name: Checkout project
16
+ uses: actions/checkout@v4
17
+
18
+ - name: Install poetry
19
+ run: pipx install poetry
20
+
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+ cache: poetry # caching dependencies from poetry.lock
26
+
27
+ - name: Install Poetry dependencies (CPU)
28
+ run: poetry install -E cpu
29
+
30
+ - name: Run unit tests with coverage
31
+ run: poetry run pytest tests/unit
32
+
33
+ test-macos:
34
+ runs-on: macos-latest
35
+
36
+ strategy:
37
+ matrix:
38
+ python-version: ['3.10', '3.11', '3.12', '3.13']
39
+
40
+ steps:
41
+ - name: Checkout project
42
+ uses: actions/checkout@v4
43
+
44
+ - name: Install poetry
45
+ run: pipx install poetry
46
+
47
+ - name: Set up Python
48
+ uses: actions/setup-python@v5
49
+ with:
50
+ python-version: ${{ matrix.python-version }}
51
+ cache: poetry # caching dependencies from poetry.lock
52
+
53
+ - name: Install Poetry dependencies (CPU)
54
+ run: poetry install -E cpu
55
+
56
+ - name: Run unit tests with coverage
57
+ run: |
58
+ poetry run pytest tests/unit
59
+
60
+ test-windows:
61
+ runs-on: windows-latest
62
+
63
+ strategy:
64
+ matrix:
65
+ python-version: ['3.10', '3.11', '3.12', '3.13']
66
+
67
+ steps:
68
+ - name: Checkout project
69
+ uses: actions/checkout@v4
70
+
71
+ - name: Install poetry
72
+ run: pipx install poetry
73
+
74
+ - name: Set up Python
75
+ uses: actions/setup-python@v5
76
+ with:
77
+ python-version: ${{ matrix.python-version }}
78
+ cache: poetry # caching dependencies from poetry.lock
79
+
80
+ - name: Install Poetry dependencies (CPU)
81
+ run: poetry install -E cpu
82
+
83
+ - name: Run unit tests with coverage
84
+ run: poetry run pytest tests/unit
.gitignore ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Andrew env
2
+ .DS_Store
3
+ .vscode
4
+
5
+ # Andrew functional adds
6
+ /tracks/
7
+ /lyrics/
8
+ /.cache/
9
+ *.onnx
10
+ *.pth
11
+ *.wav
12
+ /*.flac
13
+ *.mp3
14
+ tests/model-metrics/results
15
+ tests/model-metrics/datasets
16
+ temp_images
17
+
18
+ # Byte-compiled / optimized / DLL files
19
+ __pycache__/
20
+ *.py[cod]
21
+ *$py.class
22
+
23
+ # C extensions
24
+ *.so
25
+
26
+ # Distribution / packaging
27
+ .Python
28
+ build/
29
+ develop-eggs/
30
+ dist/
31
+ downloads/
32
+ eggs/
33
+ .eggs/
34
+ lib/
35
+ lib64/
36
+ parts/
37
+ sdist/
38
+ var/
39
+ wheels/
40
+ share/python-wheels/
41
+ *.egg-info/
42
+ .installed.cfg
43
+ *.egg
44
+ MANIFEST
45
+
46
+ # PyInstaller
47
+ # Usually these files are written by a python script from a template
48
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
49
+ *.manifest
50
+ *.spec
51
+
52
+ # Installer logs
53
+ pip-log.txt
54
+ pip-delete-this-directory.txt
55
+
56
+ # Unit test / coverage reports
57
+ htmlcov/
58
+ .tox/
59
+ .nox/
60
+ .coverage
61
+ .coverage.*
62
+ .cache
63
+ nosetests.xml
64
+ coverage.xml
65
+ *.cover
66
+ *.py,cover
67
+ .hypothesis/
68
+ .pytest_cache/
69
+ cover/
70
+
71
+ # Translations
72
+ *.mo
73
+ *.pot
74
+
75
+ # Django stuff:
76
+ *.log
77
+ local_settings.py
78
+ db.sqlite3
79
+ db.sqlite3-journal
80
+
81
+ # Flask stuff:
82
+ instance/
83
+ .webassets-cache
84
+
85
+ # Scrapy stuff:
86
+ .scrapy
87
+
88
+ # Sphinx documentation
89
+ docs/_build/
90
+
91
+ # PyBuilder
92
+ .pybuilder/
93
+ target/
94
+
95
+ # Jupyter Notebook
96
+ .ipynb_checkpoints
97
+
98
+ # IPython
99
+ profile_default/
100
+ ipython_config.py
101
+
102
+ # pyenv
103
+ # For a library or package, you might want to ignore these files since the code is
104
+ # intended to run in multiple environments; otherwise, check them in:
105
+ # .python-version
106
+
107
+ # pipenv
108
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
109
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
110
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
111
+ # install all needed dependencies.
112
+ #Pipfile.lock
113
+
114
+ # poetry
115
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
116
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
117
+ # commonly ignored for libraries.
118
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
119
+ #poetry.lock
120
+
121
+ # pdm
122
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
123
+ #pdm.lock
124
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
125
+ # in version control.
126
+ # https://pdm.fming.dev/#use-with-ide
127
+ .pdm.toml
128
+
129
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
130
+ __pypackages__/
131
+
132
+ # Celery stuff
133
+ celerybeat-schedule
134
+ celerybeat.pid
135
+
136
+ # SageMath parsed files
137
+ *.sage.py
138
+
139
+ # Environments
140
+ .env
141
+ .venv
142
+ env/
143
+ venv/
144
+ ENV/
145
+ env.bak/
146
+ venv.bak/
147
+
148
+ # Spyder project settings
149
+ .spyderproject
150
+ .spyproject
151
+
152
+ # Rope project settings
153
+ .ropeproject
154
+
155
+ # mkdocs documentation
156
+ /site
157
+
158
+ # mypy
159
+ .mypy_cache/
160
+ .dmypy.json
161
+ dmypy.json
162
+
163
+ # Pyre type checker
164
+ .pyre/
165
+
166
+ # pytype static type analyzer
167
+ .pytype/
168
+
169
+ # Cython debug symbols
170
+ cython_debug/
171
+
172
+ # PyCharm
173
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
174
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
175
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
176
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
177
+ #.idea/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 karaokenerds
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,562 @@
1
- ---
2
- title: dgfsfxc-tgsacxs-otyhrhs
3
- emoji: 🎵
4
- colorFrom: purple
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.29.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: ry46dgf35
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Audio Separator 🎶
2
+
3
+ [![PyPI version](https://badge.fury.io/py/audio-separator.svg)](https://badge.fury.io/py/audio-separator)
4
+ [![Conda Version](https://img.shields.io/conda/vn/conda-forge/audio-separator.svg)](https://anaconda.org/conda-forge/audio-separator)
5
+ [![Docker pulls](https://img.shields.io/docker/pulls/beveradb/audio-separator.svg)](https://hub.docker.com/r/beveradb/audio-separator/tags)
6
+ [![codecov](https://codecov.io/gh/karaokenerds/python-audio-separator/graph/badge.svg?token=N7YK4ET5JP)](https://codecov.io/gh/karaokenerds/python-audio-separator)
7
+ [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1gSlmSmna7f7fH6OjsiMEDLl-aJ9kGPkY?usp=sharing)
8
+ [![Open In Huggingface](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/theneos/audio-separator)
9
+
10
+ **Summary:** Easy to use audio stem separation from the command line or as a dependency in your own Python project, using the amazing MDX-Net, VR Arch, Demucs and MDXC models available in UVR by @Anjok07 & @aufr33.
11
+
12
+ Audio Separator is a Python package that allows you to separate an audio file into various stems, using models trained by @Anjok07 for use with [Ultimate Vocal Remover](https://github.com/Anjok07/ultimatevocalremovergui).
13
+
14
+ The simplest (and probably most used) use case for this package is to separate an audio file into two stems, Instrumental and Vocals, which can be very useful for producing karaoke videos! However, the models available in UVR can separate audio into many more stems, such as Drums, Bass, Piano, and Guitar, and perform other audio processing tasks, such as denoising or removing echo/reverb.
15
+
16
+ ## Features
17
+
18
+ - Separate audio into multiple stems, e.g. instrumental and vocals.
19
+ - Supports all common audio formats (WAV, MP3, FLAC, M4A, etc.)
20
+ - Ability to inference using a pre-trained model in PTH or ONNX format.
21
+ - CLI support for easy use in scripts and batch processing.
22
+ - Python API for integration into other projects.
23
+
24
+ ## Installation 🛠️
25
+
26
+ ### 🐳 Docker
27
+
28
+ If you're able to use docker, you don't actually need to _install_ anything - there are [images published on Docker Hub](https://hub.docker.com/r/beveradb/audio-separator/tags) for GPU (CUDA) and CPU inferencing, for both `amd64` and `arm64` platforms.
29
+
30
+ You probably want to volume-mount a folder containing whatever file you want to separate, which can then also be used as the output folder.
31
+
32
+ For instance, if your current directory has the file `input.wav`, you could execute `audio-separator` as shown below (see [usage](#usage-) section for more details):
33
+
34
+ ```sh
35
+ docker run -it -v `pwd`:/workdir beveradb/audio-separator input.wav
36
+ ```
37
+
38
+ If you're using a machine with a GPU, you'll want to use the GPU specific image and pass in the GPU device to the container, like this:
39
+
40
+ ```sh
41
+ docker run -it --gpus all -v `pwd`:/workdir beveradb/audio-separator:gpu input.wav
42
+ ```
43
+
44
+ If the GPU isn't being detected, make sure your docker runtime environment is passing through the GPU correctly - there are [various guides](https://www.celantur.com/blog/run-cuda-in-docker-on-linux/) online to help with that.
45
+
46
+ ### 🎮 Nvidia GPU with CUDA or 🧪 Google Colab
47
+
48
+ **Supported CUDA Versions:** 11.8 and 12.2
49
+
50
+ 💬 If successfully configured, you should see this log message when running `audio-separator --env_info`:
51
+ `ONNXruntime has CUDAExecutionProvider available, enabling acceleration`
52
+
53
+ Conda:
54
+ ```sh
55
+ conda install pytorch=*=*cuda* onnxruntime=*=*cuda* audio-separator -c pytorch -c conda-forge
56
+ ```
57
+
58
+ Pip:
59
+ ```sh
60
+ pip install "audio-separator[gpu]"
61
+ ```
62
+
63
+ Docker:
64
+ ```sh
65
+ beveradb/audio-separator:gpu
66
+ ```
67
+
68
+ ###  Apple Silicon, macOS Sonoma+ with M1 or newer CPU (CoreML acceleration)
69
+
70
+ 💬 If successfully configured, you should see this log message when running `audio-separator --env_info`:
71
+ `ONNXruntime has CoreMLExecutionProvider available, enabling acceleration`
72
+
73
+ Pip:
74
+ ```sh
75
+ pip install "audio-separator[cpu]"
76
+ ```
77
+
78
+ ### 🐢 No hardware acceleration, CPU only
79
+
80
+ Conda:
81
+ ```sh
82
+ conda install audio-separator-c pytorch -c conda-forge
83
+ ```
84
+
85
+ Pip:
86
+ ```sh
87
+ pip install "audio-separator[cpu]"
88
+ ```
89
+
90
+ Docker:
91
+ ```sh
92
+ beveradb/audio-separator
93
+ ```
94
+
95
+ ### 🎥 FFmpeg dependency
96
+
97
+ 💬 To test if `audio-separator` has been successfully configured to use FFmpeg, run `audio-separator --env_info`. The log will show `FFmpeg installed`.
98
+
99
+ If you installed `audio-separator` using `conda` or `docker`, FFmpeg should already be available in your environment.
100
+
101
+ You may need to separately install FFmpeg. It should be easy to install on most platforms, e.g.:
102
+
103
+ 🐧 Debian/Ubuntu:
104
+ ```sh
105
+ apt-get update; apt-get install -y ffmpeg
106
+ ```
107
+
108
+  macOS:
109
+ ```sh
110
+ brew update; brew install ffmpeg
111
+ ```
112
+
113
+ ## GPU / CUDA specific installation steps with Pip
114
+
115
+ In theory, all you should need to do to get `audio-separator` working with a GPU is install it with the `[gpu]` extra as above.
116
+
117
+ However, sometimes getting both PyTorch and ONNX Runtime working with CUDA support can be a bit tricky so it may not work that easily.
118
+
119
+ You may need to reinstall both packages directly, allowing pip to calculate the right versions for your platform, for example:
120
+
121
+ - `pip uninstall torch onnxruntime`
122
+ - `pip cache purge`
123
+ - `pip install --force-reinstall torch torchvision torchaudio`
124
+ - `pip install --force-reinstall onnxruntime-gpu`
125
+
126
+ I generally recommend installing the latest version of PyTorch for your environment using the command recommended by the wizard here:
127
+ <https://pytorch.org/get-started/locally/>
128
+
129
+ ### Multiple CUDA library versions may be needed
130
+
131
+ Depending on your CUDA version and environment, you may need to install specific version(s) of CUDA libraries for ONNX Runtime to use your GPU.
132
+
133
+ 🧪 Google Colab, for example, now uses CUDA 12 by default, but ONNX Runtime still needs CUDA 11 libraries to work.
134
+
135
+ If you see the error `Failed to load library` or `cannot open shared object file` when you run `audio-separator`, this is likely the issue.
136
+
137
+ You can install the CUDA 11 libraries _alongside_ CUDA 12 like so:
138
+ ```sh
139
+ apt update; apt install nvidia-cuda-toolkit
140
+ ```
141
+
142
+ If you encounter the following messages when running on Google Colab or in another environment:
143
+ ```
144
+ [E:onnxruntime:Default, provider_bridge_ort.cc:1862 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1539 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcudnn_adv.so.9: cannot open shared object file: No such file or directory
145
+
146
+ [W:onnxruntime:Default, onnxruntime_pybind_state.cc:993 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.
147
+ ```
148
+ You can resolve this by running the following command:
149
+ ```sh
150
+ python -m pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/
151
+ ```
152
+
153
+ > Note: if anyone knows how to make this cleaner so we can support both different platform-specific dependencies for hardware acceleration without a separate installation process for each, please let me know or raise a PR!
154
+
155
+ ## Usage 🚀
156
+
157
+ ### Command Line Interface (CLI)
158
+
159
+ You can use Audio Separator via the command line, for example:
160
+
161
+ ```sh
162
+ audio-separator /path/to/your/input/audio.wav --model_filename UVR-MDX-NET-Inst_HQ_3.onnx
163
+ ```
164
+
165
+ This command will download the specified model file, process the `audio.wav` input audio and generate two new files in the current directory, one containing vocals and one containing instrumental.
166
+
167
+ **Note:** You do not need to download any files yourself - audio-separator does that automatically for you!
168
+
169
+ To see a list of supported models, run `audio-separator --list_models`
170
+
171
+ Any file listed in the list models output can be specified (with file extension) with the model_filename parameter (e.g. `--model_filename UVR_MDXNET_KARA_2.onnx`) and it will be automatically downloaded to the `--model_file_dir` (default: `/tmp/audio-separator-models/`) folder on first usage.
172
+
173
+ ### Listing and Filtering Available Models
174
+
175
+ You can view all available models using the `--list_models` (or `-l`) flag:
176
+
177
+ ```sh
178
+ audio-separator --list_models
179
+ ```
180
+
181
+ The output shows a table with the following columns:
182
+ - Model Filename: The filename to use with `--model_filename`
183
+ - Arch: The model architecture (MDX, MDXC, Demucs, etc.)
184
+ - Output Stems (SDR): The stems this model can separate, with Signal-to-Distortion Ratio scores where available
185
+ - Friendly Name: A human-readable name describing the model
186
+
187
+ #### Filtering Models
188
+
189
+ You can filter and sort the model list by stem type using `--list_filter`. For example, to find models that can separate drums:
190
+
191
+ ```sh
192
+ audio-separator -l --list_filter=drums
193
+ ```
194
+
195
+ Example output:
196
+ ```
197
+ -----------------------------------------------------------------------------------------------------------------------------------
198
+ Model Filename Arch Output Stems (SDR) Friendly Name
199
+ -----------------------------------------------------------------------------------------------------------------------------------
200
+ htdemucs_ft.yaml Demucs vocals (10.8), drums (10.1), bass (11.9), other Demucs v4: htdemucs_ft
201
+ hdemucs_mmi.yaml Demucs vocals (10.3), drums (9.7), bass (12.0), other Demucs v4: hdemucs_mmi
202
+ htdemucs.yaml Demucs vocals (10.0), drums (9.4), bass (11.3), other Demucs v4: htdemucs
203
+ htdemucs_6s.yaml Demucs vocals (9.7), drums (8.5), bass (10.0), guitar, piano, other Demucs v4: htdemucs_6s
204
+ ```
205
+
206
+ #### Limiting Results
207
+
208
+ You can limit the number of results shown using `--list_limit`. This is useful for finding the best performing models for a particular stem. For example, to see the top 5 vocal separation models:
209
+
210
+ ```sh
211
+ audio-separator -l --list_filter=vocals --list_limit=5
212
+ ```
213
+
214
+ Example output:
215
+ ```
216
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------
217
+ Model Filename Arch Output Stems (SDR) Friendly Name
218
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------
219
+ model_bs_roformer_ep_317_sdr_12.9755.ckpt MDXC vocals* (12.9), instrumental (17.0) Roformer Model: BS-Roformer-Viperx-1297
220
+ model_bs_roformer_ep_368_sdr_12.9628.ckpt MDXC vocals* (12.9), instrumental (17.0) Roformer Model: BS-Roformer-Viperx-1296
221
+ vocals_mel_band_roformer.ckpt MDXC vocals* (12.6), other Roformer Model: MelBand Roformer | Vocals by Kimberley Jensen
222
+ melband_roformer_big_beta4.ckpt MDXC vocals* (12.5), other Roformer Model: MelBand Roformer Kim | Big Beta 4 FT by unwa
223
+ mel_band_roformer_kim_ft_unwa.ckpt MDXC vocals* (12.4), other Roformer Model: MelBand Roformer Kim | FT by unwa
224
+ ```
225
+
226
+ #### JSON Output
227
+
228
+ For programmatic use, you can output the model list in JSON format:
229
+
230
+ ```sh
231
+ audio-separator -l --list_format=json
232
+ ```
233
+
234
+ ### Full command-line interface options
235
+
236
+ ```sh
237
+ usage: audio-separator [-h] [-v] [-d] [-e] [-l] [--log_level LOG_LEVEL] [--list_filter LIST_FILTER] [--list_limit LIST_LIMIT] [--list_format {pretty,json}] [-m MODEL_FILENAME] [--output_format OUTPUT_FORMAT]
238
+ [--output_bitrate OUTPUT_BITRATE] [--output_dir OUTPUT_DIR] [--model_file_dir MODEL_FILE_DIR] [--download_model_only] [--invert_spect] [--normalization NORMALIZATION]
239
+ [--amplification AMPLIFICATION] [--single_stem SINGLE_STEM] [--sample_rate SAMPLE_RATE] [--use_soundfile] [--use_autocast] [--custom_output_names CUSTOM_OUTPUT_NAMES]
240
+ [--mdx_segment_size MDX_SEGMENT_SIZE] [--mdx_overlap MDX_OVERLAP] [--mdx_batch_size MDX_BATCH_SIZE] [--mdx_hop_length MDX_HOP_LENGTH] [--mdx_enable_denoise] [--vr_batch_size VR_BATCH_SIZE]
241
+ [--vr_window_size VR_WINDOW_SIZE] [--vr_aggression VR_AGGRESSION] [--vr_enable_tta] [--vr_high_end_process] [--vr_enable_post_process]
242
+ [--vr_post_process_threshold VR_POST_PROCESS_THRESHOLD] [--demucs_segment_size DEMUCS_SEGMENT_SIZE] [--demucs_shifts DEMUCS_SHIFTS] [--demucs_overlap DEMUCS_OVERLAP]
243
+ [--demucs_segments_enabled DEMUCS_SEGMENTS_ENABLED] [--mdxc_segment_size MDXC_SEGMENT_SIZE] [--mdxc_override_model_segment_size] [--mdxc_overlap MDXC_OVERLAP]
244
+ [--mdxc_batch_size MDXC_BATCH_SIZE] [--mdxc_pitch_shift MDXC_PITCH_SHIFT]
245
+ [audio_files ...]
246
+
247
+ Separate audio file into different stems.
248
+
249
+ positional arguments:
250
+ audio_files The audio file paths or directory to separate, in any common format.
251
+
252
+ options:
253
+ -h, --help show this help message and exit
254
+
255
+ Info and Debugging:
256
+ -v, --version Show the program's version number and exit.
257
+ -d, --debug Enable debug logging, equivalent to --log_level=debug.
258
+ -e, --env_info Print environment information and exit.
259
+ -l, --list_models List all supported models and exit. Use --list_filter to filter/sort the list and --list_limit to show only top N results.
260
+ --log_level LOG_LEVEL Log level, e.g. info, debug, warning (default: info).
261
+ --list_filter LIST_FILTER Filter and sort the model list by 'name', 'filename', or any stem e.g. vocals, instrumental, drums
262
+ --list_limit LIST_LIMIT Limit the number of models shown
263
+ --list_format {pretty,json} Format for listing models: 'pretty' for formatted output, 'json' for raw JSON dump
264
+
265
+ Separation I/O Params:
266
+ -m MODEL_FILENAME, --model_filename MODEL_FILENAME Model to use for separation (default: model_bs_roformer_ep_317_sdr_12.9755.yaml). Example: -m 2_HP-UVR.pth
267
+ --output_format OUTPUT_FORMAT Output format for separated files, any common format (default: FLAC). Example: --output_format=MP3
268
+ --output_bitrate OUTPUT_BITRATE Output bitrate for separated files, any ffmpeg-compatible bitrate (default: None). Example: --output_bitrate=320k
269
+ --output_dir OUTPUT_DIR Directory to write output files (default: <current dir>). Example: --output_dir=/app/separated
270
+ --model_file_dir MODEL_FILE_DIR Model files directory (default: /tmp/audio-separator-models/). Example: --model_file_dir=/app/models
271
+ --download_model_only Download a single model file only, without performing separation.
272
+
273
+ Common Separation Parameters:
274
+ --invert_spect Invert secondary stem using spectrogram (default: False). Example: --invert_spect
275
+ --normalization NORMALIZATION Max peak amplitude to normalize input and output audio to (default: 0.9). Example: --normalization=0.7
276
+ --amplification AMPLIFICATION Min peak amplitude to amplify input and output audio to (default: 0.0). Example: --amplification=0.4
277
+ --single_stem SINGLE_STEM Output only single stem, e.g. Instrumental, Vocals, Drums, Bass, Guitar, Piano, Other. Example: --single_stem=Instrumental
278
+ --sample_rate SAMPLE_RATE Modify the sample rate of the output audio (default: 44100). Example: --sample_rate=44100
279
+ --use_soundfile Use soundfile to write audio output (default: False). Example: --use_soundfile
280
+ --use_autocast Use PyTorch autocast for faster inference (default: False). Do not use for CPU inference. Example: --use_autocast
281
+ --custom_output_names CUSTOM_OUTPUT_NAMES Custom names for all output files in JSON format (default: None). Example: --custom_output_names='{"Vocals": "vocals_output", "Drums": "drums_output"}'
282
+
283
+ MDX Architecture Parameters:
284
+ --mdx_segment_size MDX_SEGMENT_SIZE Larger consumes more resources, but may give better results (default: 256). Example: --mdx_segment_size=256
285
+ --mdx_overlap MDX_OVERLAP Amount of overlap between prediction windows, 0.001-0.999. Higher is better but slower (default: 0.25). Example: --mdx_overlap=0.25
286
+ --mdx_batch_size MDX_BATCH_SIZE Larger consumes more RAM but may process slightly faster (default: 1). Example: --mdx_batch_size=4
287
+ --mdx_hop_length MDX_HOP_LENGTH Usually called stride in neural networks, only change if you know what you're doing (default: 1024). Example: --mdx_hop_length=1024
288
+ --mdx_enable_denoise Enable denoising during separation (default: False). Example: --mdx_enable_denoise
289
+
290
+ VR Architecture Parameters:
291
+ --vr_batch_size VR_BATCH_SIZE Number of batches to process at a time. Higher = more RAM, slightly faster processing (default: 1). Example: --vr_batch_size=16
292
+ --vr_window_size VR_WINDOW_SIZE Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality. (default: 512). Example: --vr_window_size=320
293
+ --vr_aggression VR_AGGRESSION Intensity of primary stem extraction, -100 - 100. Typically, 5 for vocals & instrumentals (default: 5). Example: --vr_aggression=2
294
+ --vr_enable_tta Enable Test-Time-Augmentation; slow but improves quality (default: False). Example: --vr_enable_tta
295
+ --vr_high_end_process Mirror the missing frequency range of the output (default: False). Example: --vr_high_end_process
296
+ --vr_enable_post_process Identify leftover artifacts within vocal output; may improve separation for some songs (default: False). Example: --vr_enable_post_process
297
+ --vr_post_process_threshold VR_POST_PROCESS_THRESHOLD Threshold for post_process feature: 0.1-0.3 (default: 0.2). Example: --vr_post_process_threshold=0.1
298
+
299
+ Demucs Architecture Parameters:
300
+ --demucs_segment_size DEMUCS_SEGMENT_SIZE Size of segments into which the audio is split, 1-100. Higher = slower but better quality (default: Default). Example: --demucs_segment_size=256
301
+ --demucs_shifts DEMUCS_SHIFTS Number of predictions with random shifts, higher = slower but better quality (default: 2). Example: --demucs_shifts=4
302
+ --demucs_overlap DEMUCS_OVERLAP Overlap between prediction windows, 0.001-0.999. Higher = slower but better quality (default: 0.25). Example: --demucs_overlap=0.25
303
+ --demucs_segments_enabled DEMUCS_SEGMENTS_ENABLED Enable segment-wise processing (default: True). Example: --demucs_segments_enabled=False
304
+
305
+ MDXC Architecture Parameters:
306
+ --mdxc_segment_size MDXC_SEGMENT_SIZE Larger consumes more resources, but may give better results (default: 256). Example: --mdxc_segment_size=256
307
+ --mdxc_override_model_segment_size Override model default segment size instead of using the model default value. Example: --mdxc_override_model_segment_size
308
+ --mdxc_overlap MDXC_OVERLAP Amount of overlap between prediction windows, 2-50. Higher is better but slower (default: 8). Example: --mdxc_overlap=8
309
+ --mdxc_batch_size MDXC_BATCH_SIZE Larger consumes more RAM but may process slightly faster (default: 1). Example: --mdxc_batch_size=4
310
+ --mdxc_pitch_shift MDXC_PITCH_SHIFT Shift audio pitch by a number of semitones while processing. May improve output for deep/high vocals. (default: 0). Example: --mdxc_pitch_shift=2
311
+ ```
312
+
313
+ ### As a Dependency in a Python Project
314
+
315
+ You can use Audio Separator in your own Python project. Here's a minimal example using the default two stem (Instrumental and Vocals) model:
316
+
317
+ ```python
318
+ from audio_separator.separator import Separator
319
+
320
+ # Initialize the Separator class (with optional configuration properties, below)
321
+ separator = Separator()
322
+
323
+ # Load a machine learning model (if unspecified, defaults to 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt')
324
+ separator.load_model()
325
+
326
+ # Perform the separation on specific audio files without reloading the model
327
+ output_files = separator.separate('audio1.wav')
328
+
329
+ print(f"Separation complete! Output file(s): {' '.join(output_files)}")
330
+ ```
331
+
332
+ #### Batch processing and processing with multiple models
333
+
334
+ You can process multiple files without reloading the model to save time and memory.
335
+
336
+ You only need to load a model when choosing or changing models. See example below:
337
+
338
+ ```python
339
+ from audio_separator.separator import Separator
340
+
341
+ # Initialize the Separator class (with optional configuration properties, below)
342
+ separator = Separator()
343
+
344
+ # Load a model
345
+ separator.load_model(model_filename='UVR-MDX-NET-Inst_HQ_3.onnx')
346
+
347
+ # Separate multiple audio files without reloading the model
348
+ output_files = separator.separate(['audio1.wav', 'audio2.wav', 'audio3.wav'])
349
+
350
+ # Load a different model
351
+ separator.load_model(model_filename='UVR_MDXNET_KARA_2.onnx')
352
+
353
+ # Separate the same files with the new model
354
+ output_files = separator.separate(['audio1.wav', 'audio2.wav', 'audio3.wav'])
355
+ ```
356
+
357
+ You can also specify the path to a folder containing audio files instead of listing the full paths to each of them:
358
+ ```python
359
+ from audio_separator.separator import Separator
360
+
361
+ # Initialize the Separator class (with optional configuration properties, below)
362
+ separator = Separator()
363
+
364
+ # Load a model
365
+ separator.load_model(model_filename='UVR-MDX-NET-Inst_HQ_3.onnx')
366
+
367
+ # Separate all audio files located in a folder
368
+ output_files = separator.separate('path/to/audio_directory')
369
+ ```
370
+
371
+ #### Renaming Stems
372
+
373
+ You can rename the output files by specifying the desired names. For example:
374
+ ```python
375
+ output_names = {
376
+ "Vocals": "vocals_output",
377
+ "Instrumental": "instrumental_output",
378
+ }
379
+ output_files = separator.separate('audio1.wav', output_names)
380
+ ```
381
+ In this case, the output file names will be: `vocals_output.wav` and `instrumental_output.wav`.
382
+
383
+ You can also rename specific stems:
384
+
385
+ - To rename the Vocals stem:
386
+ ```python
387
+ output_names = {
388
+ "Vocals": "vocals_output",
389
+ }
390
+ output_files = separator.separate('audio1.wav', output_names)
391
+ ```
392
+ > The output files will be named: `vocals_output.wav` and `audio1_(Instrumental)_model_mel_band_roformer_ep_3005_sdr_11.wav`
393
+ - To rename the Instrumental stem:
394
+ ```python
395
+ output_names = {
396
+ "Instrumental": "instrumental_output",
397
+ }
398
+ output_files = separator.separate('audio1.wav', output_names)
399
+ ```
400
+ > The output files will be named: `audio1_(Vocals)_model_mel_band_roformer_ep_3005_sdr_11.wav` and `instrumental_output.wav`
401
+ - List of stems for Demucs models:
402
+ - htdemucs_6s.yaml
403
+ ```python
404
+ output_names = {
405
+ "Vocals": "vocals_output",
406
+ "Drums": "drums_output",
407
+ "Bass": "bass_output",
408
+ "Other": "other_output",
409
+ "Guitar": "guitar_output",
410
+ "Piano": "piano_output",
411
+ }
412
+ ```
413
+ - Other Demucs models
414
+ ```python
415
+ output_names = {
416
+ "Vocals": "vocals_output",
417
+ "Drums": "drums_output",
418
+ "Bass": "bass_output",
419
+ "Other": "other_output",
420
+ }
421
+ ```
422
+
423
+ ## Parameters for the Separator class
424
+
425
+ - **`log_level`:** (Optional) Logging level, e.g., INFO, DEBUG, WARNING. `Default: logging.INFO`
426
+ - **`log_formatter`:** (Optional) The log format. Default: None, which falls back to '%(asctime)s - %(levelname)s - %(module)s - %(message)s'
427
+ - **`model_file_dir`:** (Optional) Directory to cache model files in. `Default: /tmp/audio-separator-models/`
428
+ - **`output_dir`:** (Optional) Directory where the separated files will be saved. If not specified, uses the current directory.
429
+ - **`output_format`:** (Optional) Format to encode output files, any common format (WAV, MP3, FLAC, M4A, etc.). `Default: WAV`
430
+ - **`normalization_threshold`:** (Optional) The amount by which the amplitude of the output audio will be multiplied. `Default: 0.9`
431
+ - **`amplification_threshold`:** (Optional) The minimum amplitude level at which the waveform will be amplified. If the peak amplitude of the audio is below this threshold, the waveform will be scaled up to meet it. `Default: 0.0`
432
+ - **`output_single_stem`:** (Optional) Output only a single stem, such as 'Instrumental' and 'Vocals'. `Default: None`
433
+ - **`invert_using_spec`:** (Optional) Flag to invert using spectrogram. `Default: False`
434
+ - **`sample_rate`:** (Optional) Set the sample rate of the output audio. `Default: 44100`
435
+ - **`use_soundfile`:** (Optional) Use soundfile for output writing, can solve OOM issues, especially on longer audio.
436
+ - **`use_autocast`:** (Optional) Flag to use PyTorch autocast for faster inference. Do not use for CPU inference. `Default: False`
437
+ - **`mdx_params`:** (Optional) MDX Architecture Specific Attributes & Defaults. `Default: {"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False}`
438
+ - **`vr_params`:** (Optional) VR Architecture Specific Attributes & Defaults. `Default: {"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False}`
439
+ - **`demucs_params`:** (Optional) Demucs Architecture Specific Attributes & Defaults. `Default: {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True}`
440
+ - **`mdxc_params`:** (Optional) MDXC Architecture Specific Attributes & Defaults. `Default: {"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0}`
441
+
442
+ ## Requirements 📋
443
+
444
+ Python >= 3.10
445
+
446
+ Libraries: torch, onnx, onnxruntime, numpy, librosa, requests, six, tqdm, pydub
447
+
448
+ ## Developing Locally
449
+
450
+ This project uses Poetry for dependency management and packaging. Follow these steps to setup a local development environment:
451
+
452
+ ### Prerequisites
453
+
454
+ - Make sure you have Python 3.10 or newer installed on your machine.
455
+ - Install Conda (I recommend Miniforge: [Miniforge GitHub](https://github.com/conda-forge/miniforge)) to manage your Python virtual environments
456
+
457
+ ### Clone the Repository
458
+
459
+ Clone the repository to your local machine:
460
+
461
+ ```sh
462
+ git clone https://github.com/YOUR_USERNAME/audio-separator.git
463
+ cd audio-separator
464
+ ```
465
+
466
+ Replace `YOUR_USERNAME` with your GitHub username if you've forked the repository, or use the main repository URL if you have the permissions.
467
+
468
+ ### Create and activate the Conda Environment
469
+
470
+ To create and activate the conda environment, use the following commands:
471
+
472
+ ```sh
473
+ conda env create
474
+ conda activate audio-separator-dev
475
+ ```
476
+
477
+ ### Install Dependencies
478
+
479
+ Once you're inside the conda env, run the following command to install the project dependencies:
480
+
481
+ ```sh
482
+ poetry install
483
+ ```
484
+
485
+ Install extra dependencies depending if you're running with GPU or CPU.
486
+ ```sh
487
+ poetry install --extras "cpu"
488
+ ```
489
+ or
490
+ ```sh
491
+ poetry install --extras "gpu"
492
+ ```
493
+ or
494
+ ```sh
495
+ poetry install --extras "dml"
496
+ ```
497
+
498
+ ### Running the Command-Line Interface Locally
499
+
500
+ You can run the CLI command directly within the virtual environment. For example:
501
+
502
+ ```sh
503
+ audio-separator path/to/your/audio-file.wav
504
+ ```
505
+
506
+ ### Deactivate the Virtual Environment
507
+
508
+ Once you are done with your development work, you can exit the virtual environment by simply typing:
509
+
510
+ ```sh
511
+ conda deactivate
512
+ ```
513
+
514
+ ### Building the Package
515
+
516
+ To build the package for distribution, use the following command:
517
+
518
+ ```sh
519
+ poetry build
520
+ ```
521
+
522
+ This will generate the distribution packages in the dist directory - but for now only @beveradb will be able to publish to PyPI.
523
+
524
+
525
+
526
+
527
+ ## Contributing 🤝
528
+
529
+ Contributions are very much welcome! Please fork the repository and submit a pull request with your changes, and I'll try to review, merge and publish promptly!
530
+
531
+ - This project is 100% open-source and free for anyone to use and modify as they wish.
532
+ - If the maintenance workload for this repo somehow becomes too much for me I'll ask for volunteers to share maintainership of the repo, though I don't think that is very likely
533
+ - Development and support for the MDX-Net separation models is part of the main [UVR project](https://github.com/Anjok07/ultimatevocalremovergui), this repo is just a CLI/Python package wrapper to simplify running those models programmatically. So, if you want to try and improve the actual models, please get involved in the UVR project and look for guidance there!
534
+
535
+ ## License 📄
536
+
537
+ This project is licensed under the MIT [License](LICENSE).
538
+
539
+ - **Please Note:** If you choose to integrate this project into some other project using the default model or any other model trained as part of the [UVR](https://github.com/Anjok07/ultimatevocalremovergui) project, please honor the MIT license by providing credit to UVR and its developers!
540
+
541
+ ## Credits 🙏
542
+
543
+ - [Anjok07](https://github.com/Anjok07) - Author of [Ultimate Vocal Remover GUI](https://github.com/Anjok07/ultimatevocalremovergui), which almost all of the code in this repo was copied from! Definitely deserving of credit for anything good from this project. Thank you!
544
+ - [DilanBoskan](https://github.com/DilanBoskan) - Your contributions at the start of this project were essential to the success of UVR. Thank you!
545
+ - [Kuielab & Woosung Choi](https://github.com/kuielab) - Developed the original MDX-Net AI code.
546
+ - [KimberleyJSN](https://github.com/KimberleyJensen) - Advised and aided the implementation of the training scripts for MDX-Net and Demucs. Thank you!
547
+ - [Hv](https://github.com/NaJeongMo/Colab-for-MDX_B) - Helped implement chunks into the MDX-Net AI code. Thank you!
548
+ - [zhzhongshi](https://github.com/zhzhongshi) - Helped add support for the MDXC models in `audio-separator`. Thank you!
549
+
550
+ ## Contact 💌
551
+
552
+ For questions or feedback, please raise an issue or reach out to @beveradb ([Andrew Beveridge](mailto:[email protected])) directly.
553
+
554
+ ## Sponsors
555
+
556
+ <!-- sponsors --><!-- sponsors -->
557
+
558
+ ## Thanks to all contributors for their efforts
559
+
560
+ <a href="https://github.com/nomadkaraoke/python-audio-separator/graphs/contributors">
561
+ <img src="https://contrib.rocks/image?repo=nomadkaraoke/python-audio-separator" />
562
+ </a>
TODO.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Audio-Separator TO-DO list
2
+
3
+ If you see something here, Andrew is aware it needs to be done, and he will hopefully get to it soon but can't make any promises!
4
+ This isn't his full time job, and he's doing his best to keep up with everything.
5
+
6
+ If you'd like something to be done sooner, please consider trying to work on it yourself and submitting a pull request!
7
+ If you don't know how to code, please consider learning - it's free, and anyone can do it! https://www.freecodecamp.org/learn/scientific-computing-with-python/
8
+
9
+ ## TODO:
10
+
11
+ - Add unit tests to all uvr lib functions to ensure no obvious errors are missed
12
+ - Add end-to-end tests which download all models and test separation with a very short input file for speed
13
+ - Add ability for user to download all models ahead of time
14
+ - Add tests for Windows, Linux and macOS separately
15
+ - Add tests for Python 3.10, 3.11
16
+ - Add support for MDXC models
17
+ - Add support for Demucs models
18
+ - Add support for Ensemble mode
19
+ - Add support for Chaining multiple models
20
+ - Add support for Splitting multiple stems from a single input file by running different models
audio_separator/separator/separator.py CHANGED
@@ -956,4 +956,4 @@ class Separator:
956
 
957
  return dict(sorted(filtered_list.items(), key=sort_key, reverse=True))
958
 
959
- return simplified_list
 
956
 
957
  return dict(sorted(filtered_list.items(), key=sort_key, reverse=True))
958
 
959
+ return simplified_list
ensemble.py CHANGED
@@ -5,27 +5,23 @@ import os
5
  import librosa
6
  import soundfile as sf
7
  import numpy as np
8
- import argparse
9
- import logging
10
  import gc
11
 
12
- logging.basicConfig(level=logging.INFO)
13
- logger = logging.getLogger(__name__)
14
-
15
  def stft(wave, nfft, hl):
16
- wave_left = np.ascontiguousarray(wave[0])
17
- wave_right = np.ascontiguousarray(wave[1])
18
- spec_left = librosa.stft(wave_left, n_fft=nfft, hop_length=hl)
19
- spec_right = librosa.stft(wave_right, n_fft=nfft, hop_length=hl)
20
- spec = np.stack([spec_left, spec_right])
21
  return spec
22
 
23
  def istft(spec, hl, length):
24
- spec_left = np.ascontiguousarray(spec[0])
25
- spec_right = np.ascontiguousarray(spec[1])
26
- wave_left = librosa.istft(spec_left, hop_length=hl, length=length)
27
- wave_right = librosa.istft(spec_right, hop_length=hl, length=length)
28
- wave = np.stack([wave_left, wave_right])
29
  return wave
30
 
31
  def absmax(a, *, axis):
@@ -68,17 +64,11 @@ def lambda_min(arr, axis=None, key=None, keepdims=False):
68
  else:
69
  return arr.flatten()[idxs]
70
 
71
- def average_waveforms(pred_track, weights, algorithm):
72
- """
73
- :param pred_track: shape = (num, channels, length)
74
- :param weights: shape = (num, )
75
- :param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
76
- :return: averaged waveform in shape (channels, length)
77
- """
78
- pred_track = np.asarray(pred_track) # NumPy 2.0+ compatibility
79
- final_length = pred_track.shape[-1]
80
-
81
  mod_track = []
 
82
  for i in range(pred_track.shape[0]):
83
  if algorithm == 'avg_wave':
84
  mod_track.append(pred_track[i] * weights[i])
@@ -90,87 +80,113 @@ def average_waveforms(pred_track, weights, algorithm):
90
  mod_track.append(spec * weights[i])
91
  else:
92
  mod_track.append(spec)
93
- del spec
94
- gc.collect()
95
- mod_track = np.asarray(mod_track) # NumPy 2.0+ compatibility
96
 
97
  if algorithm == 'avg_wave':
98
- result = mod_track.sum(axis=0) / np.sum(weights)
 
99
  elif algorithm == 'median_wave':
100
- result = np.median(mod_track, axis=0)
101
  elif algorithm == 'min_wave':
102
- result = lambda_min(mod_track, axis=0, key=np.abs)
103
  elif algorithm == 'max_wave':
104
- result = lambda_max(mod_track, axis=0, key=np.abs)
105
  elif algorithm == 'avg_fft':
106
- result = mod_track.sum(axis=0) / np.sum(weights)
107
- result = istft(result, 1024, final_length)
 
108
  elif algorithm == 'min_fft':
109
- result = lambda_min(mod_track, axis=0, key=np.abs)
110
- result = istft(result, 1024, final_length)
111
  elif algorithm == 'max_fft':
112
- result = absmax(mod_track, axis=0)
113
- result = istft(result, 1024, final_length)
114
  elif algorithm == 'median_fft':
115
- result = np.median(mod_track, axis=0)
116
- result = istft(result, 1024, final_length)
117
-
118
- gc.collect()
119
- return result
120
 
121
  def ensemble_files(args):
122
- parser = argparse.ArgumentParser(description="Ensemble audio files")
123
- parser.add_argument('--files', nargs='+', required=True, help="Input audio files")
124
- parser.add_argument('--type', required=True, choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], help="Ensemble type")
125
- parser.add_argument('--weights', nargs='+', type=float, default=None, help="Weights for each file")
126
- parser.add_argument('--output', required=True, help="Output file path")
127
-
128
- args = parser.parse_args(args) if isinstance(args, list) else args
129
-
130
- logger.info(f"Ensemble type: {args.type}")
131
- logger.info(f"Number of input files: {len(args.files)}")
132
- weights = args.weights if args.weights else [1.0] * len(args.files)
133
- if len(weights) != len(args.files):
134
- logger.error("Number of weights must match number of audio files")
135
- raise ValueError("Number of weights must match number of audio files")
136
- logger.info(f"Weights: {weights}")
137
- logger.info(f"Output file: {args.output}")
138
-
139
- data = []
140
- sr = None
141
- for f in args.files:
142
- if not os.path.isfile(f):
143
- logger.error(f"Cannot find file: {f}")
144
- raise FileNotFoundError(f"Cannot find file: {f}")
145
- logger.info(f"Reading file: {f}")
146
- try:
147
- wav, curr_sr = librosa.load(f, sr=None, mono=False)
148
- if sr is None:
149
- sr = curr_sr
150
- elif sr != curr_sr:
151
- logger.error("All audio files must have the same sample rate")
152
- raise ValueError("All audio files must have the same sample rate")
153
- logger.info(f"Waveform shape: {wav.shape} sample rate: {sr}")
154
- data.append(wav)
155
- del wav
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  gc.collect()
157
- except Exception as e:
158
- logger.error(f"Error reading audio file {f}: {str(e)}")
159
- raise RuntimeError(f"Error reading audio file {f}: {str(e)}")
160
-
161
- try:
162
- data = np.asarray(data) # NumPy 2.0+ compatibility
163
- res = average_waveforms(data, weights, args.type)
164
- logger.info(f"Result shape: {res.shape}")
165
- os.makedirs(os.path.dirname(args.output), exist_ok=True)
166
- sf.write(args.output, res.T, sr, 'FLOAT')
167
- logger.info(f"Output written to: {args.output}")
168
- return args.output
169
- except Exception as e:
170
- logger.error(f"Error during ensemble processing: {str(e)}")
171
- raise RuntimeError(f"Error during ensemble processing: {str(e)}")
172
- finally:
173
- gc.collect()
174
 
175
  if __name__ == "__main__":
176
- ensemble_files(sys.argv[1:])
 
5
  import librosa
6
  import soundfile as sf
7
  import numpy as np
8
+ import argparse # Add this line
 
9
  import gc
10
 
 
 
 
11
  def stft(wave, nfft, hl):
12
+ wave_left = np.asfortranarray(wave[0])
13
+ wave_right = np.asfortranarray(wave[1])
14
+ spec_left = librosa.stft(wave_left, n_fft=nfft, hop_length=hl, window='hann')
15
+ spec_right = librosa.stft(wave_right, n_fft=nfft, hop_length=hl, window='hann')
16
+ spec = np.asfortranarray([spec_left, spec_right])
17
  return spec
18
 
19
  def istft(spec, hl, length):
20
+ spec_left = np.asfortranarray(spec[0])
21
+ spec_right = np.asfortranarray(spec[1])
22
+ wave_left = librosa.istft(spec_left, hop_length=hl, length=length, window='hann')
23
+ wave_right = librosa.istft(spec_right, hop_length=hl, length=length, window='hann')
24
+ wave = np.asfortranarray([wave_left, wave_right])
25
  return wave
26
 
27
  def absmax(a, *, axis):
 
64
  else:
65
  return arr.flatten()[idxs]
66
 
67
+ def average_waveforms(pred_track, weights, algorithm, chunk_length):
68
+ pred_track = np.array(pred_track)
69
+ pred_track = np.array([p[:, :chunk_length] if p.shape[1] > chunk_length else np.pad(p, ((0, 0), (0, chunk_length - p.shape[1])), 'constant') for p in pred_track])
 
 
 
 
 
 
 
70
  mod_track = []
71
+
72
  for i in range(pred_track.shape[0]):
73
  if algorithm == 'avg_wave':
74
  mod_track.append(pred_track[i] * weights[i])
 
80
  mod_track.append(spec * weights[i])
81
  else:
82
  mod_track.append(spec)
83
+ pred_track = np.array(mod_track)
 
 
84
 
85
  if algorithm == 'avg_wave':
86
+ pred_track = pred_track.sum(axis=0)
87
+ pred_track /= np.array(weights).sum()
88
  elif algorithm == 'median_wave':
89
+ pred_track = np.median(pred_track, axis=0)
90
  elif algorithm == 'min_wave':
91
+ pred_track = lambda_min(pred_track, axis=0, key=np.abs)
92
  elif algorithm == 'max_wave':
93
+ pred_track = lambda_max(pred_track, axis=0, key=np.abs)
94
  elif algorithm == 'avg_fft':
95
+ pred_track = pred_track.sum(axis=0)
96
+ pred_track /= np.array(weights).sum()
97
+ pred_track = istft(pred_track, 1024, chunk_length)
98
  elif algorithm == 'min_fft':
99
+ pred_track = lambda_min(pred_track, axis=0, key=np.abs)
100
+ pred_track = istft(pred_track, 1024, chunk_length)
101
  elif algorithm == 'max_fft':
102
+ pred_track = absmax(pred_track, axis=0)
103
+ pred_track = istft(pred_track, 1024, chunk_length)
104
  elif algorithm == 'median_fft':
105
+ pred_track = np.median(pred_track, axis=0)
106
+ pred_track = istft(pred_track, 1024, chunk_length)
107
+
108
+ return pred_track
 
109
 
110
  def ensemble_files(args):
111
+ parser = argparse.ArgumentParser()
112
+ parser.add_argument("--files", type=str, required=True, nargs='+', help="Path to all audio-files to ensemble")
113
+ parser.add_argument("--type", type=str, default='avg_wave', help="One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft")
114
+ parser.add_argument("--weights", type=float, nargs='+', help="Weights to create ensemble. Number of weights must be equal to number of files")
115
+ parser.add_argument("--output", default="res.wav", type=str, help="Path to wav file where ensemble result will be stored")
116
+ if args is None:
117
+ args = parser.parse_args()
118
+ else:
119
+ args = parser.parse_args(args)
120
+
121
+ print('Ensemble type: {}'.format(args.type))
122
+ print('Number of input files: {}'.format(len(args.files)))
123
+ if args.weights is not None:
124
+ weights = np.array(args.weights)
125
+ else:
126
+ weights = np.ones(len(args.files))
127
+ print('Weights: {}'.format(weights))
128
+ print('Output file: {}'.format(args.output))
129
+
130
+ durations = [librosa.get_duration(filename=f) for f in args.files]
131
+ if not all(d == durations[0] for d in durations):
132
+ raise ValueError("All files must have the same duration")
133
+
134
+ total_duration = durations[0]
135
+ sr = librosa.get_samplerate(args.files[0])
136
+ chunk_duration = 30 # 30-second chunks
137
+ overlap_duration = 0.1 # 100 ms overlap
138
+ chunk_samples = int(chunk_duration * sr)
139
+ overlap_samples = int(overlap_duration * sr)
140
+ step_samples = chunk_samples - overlap_samples # Step size reduced by overlap
141
+ total_samples = int(total_duration * sr)
142
+
143
+ # Align chunk length with hop_length
144
+ hop_length = 1024
145
+ chunk_samples = ((chunk_samples + hop_length - 1) // hop_length) * hop_length
146
+ step_samples = chunk_samples - overlap_samples
147
+
148
+ prev_chunk_tail = None # To store the tail of the previous chunk for crossfading
149
+
150
+ with sf.SoundFile(args.output, 'w', sr, channels=2, subtype='FLOAT') as outfile:
151
+ for start in range(0, total_samples, step_samples):
152
+ end = min(start + chunk_samples, total_samples)
153
+ chunk_length = end - start
154
+ data = []
155
+
156
+ for f in args.files:
157
+ if not os.path.isfile(f):
158
+ print('Error. Can\'t find file: {}. Check paths.'.format(f))
159
+ exit()
160
+ # print(f'Reading chunk from file: {f} (start: {start/sr}s, duration: {(end-start)/sr}s)')
161
+ wav, _ = librosa.load(f, sr=sr, mono=False, offset=start/sr, duration=(end-start)/sr)
162
+ data.append(wav)
163
+
164
+ res = average_waveforms(data, weights, args.type, chunk_length)
165
+ res = res.astype(np.float32)
166
+ #print(f'Chunk result shape: {res.shape}')
167
+
168
+ # Crossfade with the previous chunk's tail
169
+ if start > 0 and prev_chunk_tail is not None:
170
+ new_data = res[:, :overlap_samples]
171
+ fade_out = np.linspace(1, 0, overlap_samples)
172
+ fade_in = np.linspace(0, 1, overlap_samples)
173
+ blended = prev_chunk_tail * fade_out + new_data * fade_in
174
+ outfile.write(blended.T)
175
+ outfile.write(res[:, overlap_samples:].T)
176
+ else:
177
+ outfile.write(res.T)
178
+
179
+ # Store the tail of the current chunk for the next iteration
180
+ if chunk_length > overlap_samples:
181
+ prev_chunk_tail = res[:, -overlap_samples:]
182
+ else:
183
+ prev_chunk_tail = res[:, :]
184
+
185
+ del data
186
+ del res
187
  gc.collect()
188
+
189
+ print(f'Ensemble completed. Output saved to: {args.output}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  if __name__ == "__main__":
192
+ ensemble_files(None)
models/te.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
 
5
  [tool.poetry]
6
  name = "audio-separator"
7
- version = "0.33.0"
8
  description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
9
  authors = ["Andrew Beveridge <[email protected]>"]
10
  license = "MIT"
 
4
 
5
  [tool.poetry]
6
  name = "audio-separator"
7
+ version = "0.34.0"
8
  description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
9
  authors = ["Andrew Beveridge <[email protected]>"]
10
  license = "MIT"
requirements.txt CHANGED
@@ -1,9 +1,8 @@
1
- audio-separator[gpu]==0.34.0
2
- gradio==4.44.0
 
3
  yt_dlp
4
  pypresence
 
5
  validators
6
  matchering==2.0.6
7
- spaces
8
- gdown
9
- scipy
 
1
+ audio-separator[gpu]==0.33.0
2
+ scipy
3
+ gradio
4
  yt_dlp
5
  pypresence
6
+ youtube_dl
7
  validators
8
  matchering==2.0.6