Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- .flake8 +3 -0
- .gitattributes +1 -0
- .github/ISSUE_TEMPLATE/bug_report.md +37 -0
- .github/workflows/stale.yml +27 -0
- .gitignore +14 -0
- LICENSE +661 -0
- README.md +96 -8
- __pycache__/settings.cpython-310.pyc +0 -0
- clip/__init__.py +1 -0
- clip/bpe_simple_vocab_16e6.txt.gz +3 -0
- clip/clip.py +245 -0
- clip/clipseg.py +538 -0
- clip/model.py +436 -0
- clip/simple_tokenizer.py +132 -0
- clip/vitseg.py +286 -0
- config.yaml +15 -0
- docs/screenshot.png +3 -0
- installer/installer.py +83 -0
- installer/windows_run.bat +80 -0
- models/CLIP/rd64-uni-refined.pth +3 -0
- models/CodeFormer/CodeFormerv0.1.onnx +3 -0
- models/DMDNet.pth +3 -0
- models/GFPGANv1.4.onnx +3 -0
- models/GPEN-BFR-512.onnx +3 -0
- models/inswapper_128.onnx +3 -0
- mypy.ini +7 -0
- requirements.txt +21 -0
- roop-unleashed.ipynb +184 -0
- roop/FaceSet.py +20 -0
- roop/ProcessEntry.py +7 -0
- roop/ProcessMgr.py +457 -0
- roop/ProcessOptions.py +9 -0
- roop/__init__.py +0 -0
- roop/__pycache__/FaceSet.cpython-310.pyc +0 -0
- roop/__pycache__/ProcessEntry.cpython-310.pyc +0 -0
- roop/__pycache__/ProcessMgr.cpython-310.pyc +0 -0
- roop/__pycache__/ProcessOptions.cpython-310.pyc +0 -0
- roop/__pycache__/__init__.cpython-310.pyc +0 -0
- roop/__pycache__/capturer.cpython-310.pyc +0 -0
- roop/__pycache__/core.cpython-310.pyc +0 -0
- roop/__pycache__/face_util.cpython-310.pyc +0 -0
- roop/__pycache__/ffmpeg_writer.cpython-310.pyc +0 -0
- roop/__pycache__/globals.cpython-310.pyc +0 -0
- roop/__pycache__/metadata.cpython-310.pyc +0 -0
- roop/__pycache__/template_parser.cpython-310.pyc +0 -0
- roop/__pycache__/typing.cpython-310.pyc +0 -0
- roop/__pycache__/util_ffmpeg.cpython-310.pyc +0 -0
- roop/__pycache__/utilities.cpython-310.pyc +0 -0
- roop/capturer.py +30 -0
- roop/core.py +360 -0
    	
        .flake8
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [flake8]
         | 
| 2 | 
            +
            select = E3, E4, F
         | 
| 3 | 
            +
            per-file-ignores = roop/core.py:E402
         | 
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            docs/screenshot.png filter=lfs diff=lfs merge=lfs -text
         | 
    	
        .github/ISSUE_TEMPLATE/bug_report.md
    ADDED
    
    | @@ -0,0 +1,37 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            name: Bug report
         | 
| 3 | 
            +
            about: Create a report to help us improve
         | 
| 4 | 
            +
            title: ''
         | 
| 5 | 
            +
            labels: ''
         | 
| 6 | 
            +
            assignees: ''
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            ---
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            **Describe the bug**
         | 
| 11 | 
            +
            A clear and concise description of what the bug is.
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            **To Reproduce**
         | 
| 14 | 
            +
            Steps to reproduce the behavior:
         | 
| 15 | 
            +
            1. Go to '...'
         | 
| 16 | 
            +
            2. Click on '....'
         | 
| 17 | 
            +
            3. Scroll down to '....'
         | 
| 18 | 
            +
            4. See error
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            **Details**
         | 
| 21 | 
            +
            What OS are you using?
         | 
| 22 | 
            +
            - [ ] Linux
         | 
| 23 | 
            +
            - [ ] Linux in WSL
         | 
| 24 | 
            +
            - [ ] Windows 
         | 
| 25 | 
            +
            - [ ] Mac
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            Are you using a GPU?
         | 
| 28 | 
            +
            - [ ] No. CPU FTW
         | 
| 29 | 
            +
            - [ ] NVIDIA
         | 
| 30 | 
            +
            - [ ] AMD
         | 
| 31 | 
            +
            - [ ] Intel
         | 
| 32 | 
            +
            - [ ] Mac
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            **Which version of roop unleashed are you using?**
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            **Screenshots**
         | 
| 37 | 
            +
            If applicable, add screenshots to help explain your problem.
         | 
    	
        .github/workflows/stale.yml
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # You can adjust the behavior by modifying this file.
         | 
| 4 | 
            +
            # For more information, see:
         | 
| 5 | 
            +
            # https://github.com/actions/stale
         | 
| 6 | 
            +
            name: Mark stale issues and pull requests
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            on:
         | 
| 9 | 
            +
              schedule:
         | 
| 10 | 
            +
              - cron: '32 0 * * *'
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            jobs:
         | 
| 13 | 
            +
              stale:
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                runs-on: ubuntu-latest
         | 
| 16 | 
            +
                permissions:
         | 
| 17 | 
            +
                  issues: write
         | 
| 18 | 
            +
                  pull-requests: write
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                steps:
         | 
| 21 | 
            +
                - uses: actions/stale@v5
         | 
| 22 | 
            +
                  with:
         | 
| 23 | 
            +
                    repo-token: ${{ secrets.GITHUB_TOKEN }}
         | 
| 24 | 
            +
                    stale-issue-message: 'Stale issue message'
         | 
| 25 | 
            +
                    stale-pr-message: 'Stale pull request message'
         | 
| 26 | 
            +
                    stale-issue-label: 'no-issue-activity'
         | 
| 27 | 
            +
                    stale-pr-label: 'no-pr-activity'
         | 
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            .vs
         | 
| 2 | 
            +
            .idea
         | 
| 3 | 
            +
            models
         | 
| 4 | 
            +
            temp
         | 
| 5 | 
            +
            __pycache__
         | 
| 6 | 
            +
            *.pth
         | 
| 7 | 
            +
            /start.bat
         | 
| 8 | 
            +
            /env
         | 
| 9 | 
            +
            .vscode
         | 
| 10 | 
            +
            output
         | 
| 11 | 
            +
            temp
         | 
| 12 | 
            +
            config.yaml
         | 
| 13 | 
            +
            run.bat
         | 
| 14 | 
            +
            venv
         | 
    	
        LICENSE
    ADDED
    
    | @@ -0,0 +1,661 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
                                GNU AFFERO GENERAL PUBLIC LICENSE
         | 
| 2 | 
            +
                                   Version 3, 19 November 2007
         | 
| 3 | 
            +
             | 
| 4 | 
            +
             Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
         | 
| 5 | 
            +
             Everyone is permitted to copy and distribute verbatim copies
         | 
| 6 | 
            +
             of this license document, but changing it is not allowed.
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                                        Preamble
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              The GNU Affero General Public License is a free, copyleft license for
         | 
| 11 | 
            +
            software and other kinds of works, specifically designed to ensure
         | 
| 12 | 
            +
            cooperation with the community in the case of network server software.
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              The licenses for most software and other practical works are designed
         | 
| 15 | 
            +
            to take away your freedom to share and change the works.  By contrast,
         | 
| 16 | 
            +
            our General Public Licenses are intended to guarantee your freedom to
         | 
| 17 | 
            +
            share and change all versions of a program--to make sure it remains free
         | 
| 18 | 
            +
            software for all its users.
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              When we speak of free software, we are referring to freedom, not
         | 
| 21 | 
            +
            price.  Our General Public Licenses are designed to make sure that you
         | 
| 22 | 
            +
            have the freedom to distribute copies of free software (and charge for
         | 
| 23 | 
            +
            them if you wish), that you receive source code or can get it if you
         | 
| 24 | 
            +
            want it, that you can change the software or use pieces of it in new
         | 
| 25 | 
            +
            free programs, and that you know you can do these things.
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              Developers that use our General Public Licenses protect your rights
         | 
| 28 | 
            +
            with two steps: (1) assert copyright on the software, and (2) offer
         | 
| 29 | 
            +
            you this License which gives you legal permission to copy, distribute
         | 
| 30 | 
            +
            and/or modify the software.
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              A secondary benefit of defending all users' freedom is that
         | 
| 33 | 
            +
            improvements made in alternate versions of the program, if they
         | 
| 34 | 
            +
            receive widespread use, become available for other developers to
         | 
| 35 | 
            +
            incorporate.  Many developers of free software are heartened and
         | 
| 36 | 
            +
            encouraged by the resulting cooperation.  However, in the case of
         | 
| 37 | 
            +
            software used on network servers, this result may fail to come about.
         | 
| 38 | 
            +
            The GNU General Public License permits making a modified version and
         | 
| 39 | 
            +
            letting the public access it on a server without ever releasing its
         | 
| 40 | 
            +
            source code to the public.
         | 
| 41 | 
            +
             | 
| 42 | 
            +
              The GNU Affero General Public License is designed specifically to
         | 
| 43 | 
            +
            ensure that, in such cases, the modified source code becomes available
         | 
| 44 | 
            +
            to the community.  It requires the operator of a network server to
         | 
| 45 | 
            +
            provide the source code of the modified version running there to the
         | 
| 46 | 
            +
            users of that server.  Therefore, public use of a modified version, on
         | 
| 47 | 
            +
            a publicly accessible server, gives the public access to the source
         | 
| 48 | 
            +
            code of the modified version.
         | 
| 49 | 
            +
             | 
| 50 | 
            +
              An older license, called the Affero General Public License and
         | 
| 51 | 
            +
            published by Affero, was designed to accomplish similar goals.  This is
         | 
| 52 | 
            +
            a different license, not a version of the Affero GPL, but Affero has
         | 
| 53 | 
            +
            released a new version of the Affero GPL which permits relicensing under
         | 
| 54 | 
            +
            this license.
         | 
| 55 | 
            +
             | 
| 56 | 
            +
              The precise terms and conditions for copying, distribution and
         | 
| 57 | 
            +
            modification follow.
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                                   TERMS AND CONDITIONS
         | 
| 60 | 
            +
             | 
| 61 | 
            +
              0. Definitions.
         | 
| 62 | 
            +
             | 
| 63 | 
            +
              "This License" refers to version 3 of the GNU Affero General Public License.
         | 
| 64 | 
            +
             | 
| 65 | 
            +
              "Copyright" also means copyright-like laws that apply to other kinds of
         | 
| 66 | 
            +
            works, such as semiconductor masks.
         | 
| 67 | 
            +
             | 
| 68 | 
            +
              "The Program" refers to any copyrightable work licensed under this
         | 
| 69 | 
            +
            License.  Each licensee is addressed as "you".  "Licensees" and
         | 
| 70 | 
            +
            "recipients" may be individuals or organizations.
         | 
| 71 | 
            +
             | 
| 72 | 
            +
              To "modify" a work means to copy from or adapt all or part of the work
         | 
| 73 | 
            +
            in a fashion requiring copyright permission, other than the making of an
         | 
| 74 | 
            +
            exact copy.  The resulting work is called a "modified version" of the
         | 
| 75 | 
            +
            earlier work or a work "based on" the earlier work.
         | 
| 76 | 
            +
             | 
| 77 | 
            +
              A "covered work" means either the unmodified Program or a work based
         | 
| 78 | 
            +
            on the Program.
         | 
| 79 | 
            +
             | 
| 80 | 
            +
              To "propagate" a work means to do anything with it that, without
         | 
| 81 | 
            +
            permission, would make you directly or secondarily liable for
         | 
| 82 | 
            +
            infringement under applicable copyright law, except executing it on a
         | 
| 83 | 
            +
            computer or modifying a private copy.  Propagation includes copying,
         | 
| 84 | 
            +
            distribution (with or without modification), making available to the
         | 
| 85 | 
            +
            public, and in some countries other activities as well.
         | 
| 86 | 
            +
             | 
| 87 | 
            +
              To "convey" a work means any kind of propagation that enables other
         | 
| 88 | 
            +
            parties to make or receive copies.  Mere interaction with a user through
         | 
| 89 | 
            +
            a computer network, with no transfer of a copy, is not conveying.
         | 
| 90 | 
            +
             | 
| 91 | 
            +
              An interactive user interface displays "Appropriate Legal Notices"
         | 
| 92 | 
            +
            to the extent that it includes a convenient and prominently visible
         | 
| 93 | 
            +
            feature that (1) displays an appropriate copyright notice, and (2)
         | 
| 94 | 
            +
            tells the user that there is no warranty for the work (except to the
         | 
| 95 | 
            +
            extent that warranties are provided), that licensees may convey the
         | 
| 96 | 
            +
            work under this License, and how to view a copy of this License.  If
         | 
| 97 | 
            +
            the interface presents a list of user commands or options, such as a
         | 
| 98 | 
            +
            menu, a prominent item in the list meets this criterion.
         | 
| 99 | 
            +
             | 
| 100 | 
            +
              1. Source Code.
         | 
| 101 | 
            +
             | 
| 102 | 
            +
              The "source code" for a work means the preferred form of the work
         | 
| 103 | 
            +
            for making modifications to it.  "Object code" means any non-source
         | 
| 104 | 
            +
            form of a work.
         | 
| 105 | 
            +
             | 
| 106 | 
            +
              A "Standard Interface" means an interface that either is an official
         | 
| 107 | 
            +
            standard defined by a recognized standards body, or, in the case of
         | 
| 108 | 
            +
            interfaces specified for a particular programming language, one that
         | 
| 109 | 
            +
            is widely used among developers working in that language.
         | 
| 110 | 
            +
             | 
| 111 | 
            +
              The "System Libraries" of an executable work include anything, other
         | 
| 112 | 
            +
            than the work as a whole, that (a) is included in the normal form of
         | 
| 113 | 
            +
            packaging a Major Component, but which is not part of that Major
         | 
| 114 | 
            +
            Component, and (b) serves only to enable use of the work with that
         | 
| 115 | 
            +
            Major Component, or to implement a Standard Interface for which an
         | 
| 116 | 
            +
            implementation is available to the public in source code form.  A
         | 
| 117 | 
            +
            "Major Component", in this context, means a major essential component
         | 
| 118 | 
            +
            (kernel, window system, and so on) of the specific operating system
         | 
| 119 | 
            +
            (if any) on which the executable work runs, or a compiler used to
         | 
| 120 | 
            +
            produce the work, or an object code interpreter used to run it.
         | 
| 121 | 
            +
             | 
| 122 | 
            +
              The "Corresponding Source" for a work in object code form means all
         | 
| 123 | 
            +
            the source code needed to generate, install, and (for an executable
         | 
| 124 | 
            +
            work) run the object code and to modify the work, including scripts to
         | 
| 125 | 
            +
            control those activities.  However, it does not include the work's
         | 
| 126 | 
            +
            System Libraries, or general-purpose tools or generally available free
         | 
| 127 | 
            +
            programs which are used unmodified in performing those activities but
         | 
| 128 | 
            +
            which are not part of the work.  For example, Corresponding Source
         | 
| 129 | 
            +
            includes interface definition files associated with source files for
         | 
| 130 | 
            +
            the work, and the source code for shared libraries and dynamically
         | 
| 131 | 
            +
            linked subprograms that the work is specifically designed to require,
         | 
| 132 | 
            +
            such as by intimate data communication or control flow between those
         | 
| 133 | 
            +
            subprograms and other parts of the work.
         | 
| 134 | 
            +
             | 
| 135 | 
            +
              The Corresponding Source need not include anything that users
         | 
| 136 | 
            +
            can regenerate automatically from other parts of the Corresponding
         | 
| 137 | 
            +
            Source.
         | 
| 138 | 
            +
             | 
| 139 | 
            +
              The Corresponding Source for a work in source code form is that
         | 
| 140 | 
            +
            same work.
         | 
| 141 | 
            +
             | 
| 142 | 
            +
              2. Basic Permissions.
         | 
| 143 | 
            +
             | 
| 144 | 
            +
              All rights granted under this License are granted for the term of
         | 
| 145 | 
            +
            copyright on the Program, and are irrevocable provided the stated
         | 
| 146 | 
            +
            conditions are met.  This License explicitly affirms your unlimited
         | 
| 147 | 
            +
            permission to run the unmodified Program.  The output from running a
         | 
| 148 | 
            +
            covered work is covered by this License only if the output, given its
         | 
| 149 | 
            +
            content, constitutes a covered work.  This License acknowledges your
         | 
| 150 | 
            +
            rights of fair use or other equivalent, as provided by copyright law.
         | 
| 151 | 
            +
             | 
| 152 | 
            +
              You may make, run and propagate covered works that you do not
         | 
| 153 | 
            +
            convey, without conditions so long as your license otherwise remains
         | 
| 154 | 
            +
            in force.  You may convey covered works to others for the sole purpose
         | 
| 155 | 
            +
            of having them make modifications exclusively for you, or provide you
         | 
| 156 | 
            +
            with facilities for running those works, provided that you comply with
         | 
| 157 | 
            +
            the terms of this License in conveying all material for which you do
         | 
| 158 | 
            +
            not control copyright.  Those thus making or running the covered works
         | 
| 159 | 
            +
            for you must do so exclusively on your behalf, under your direction
         | 
| 160 | 
            +
            and control, on terms that prohibit them from making any copies of
         | 
| 161 | 
            +
            your copyrighted material outside their relationship with you.
         | 
| 162 | 
            +
             | 
| 163 | 
            +
              Conveying under any other circumstances is permitted solely under
         | 
| 164 | 
            +
            the conditions stated below.  Sublicensing is not allowed; section 10
         | 
| 165 | 
            +
            makes it unnecessary.
         | 
| 166 | 
            +
             | 
| 167 | 
            +
              3. Protecting Users' Legal Rights From Anti-Circumvention Law.
         | 
| 168 | 
            +
             | 
| 169 | 
            +
              No covered work shall be deemed part of an effective technological
         | 
| 170 | 
            +
            measure under any applicable law fulfilling obligations under article
         | 
| 171 | 
            +
            11 of the WIPO copyright treaty adopted on 20 December 1996, or
         | 
| 172 | 
            +
            similar laws prohibiting or restricting circumvention of such
         | 
| 173 | 
            +
            measures.
         | 
| 174 | 
            +
             | 
| 175 | 
            +
              When you convey a covered work, you waive any legal power to forbid
         | 
| 176 | 
            +
            circumvention of technological measures to the extent such circumvention
         | 
| 177 | 
            +
            is effected by exercising rights under this License with respect to
         | 
| 178 | 
            +
            the covered work, and you disclaim any intention to limit operation or
         | 
| 179 | 
            +
            modification of the work as a means of enforcing, against the work's
         | 
| 180 | 
            +
            users, your or third parties' legal rights to forbid circumvention of
         | 
| 181 | 
            +
            technological measures.
         | 
| 182 | 
            +
             | 
| 183 | 
            +
              4. Conveying Verbatim Copies.
         | 
| 184 | 
            +
             | 
| 185 | 
            +
              You may convey verbatim copies of the Program's source code as you
         | 
| 186 | 
            +
            receive it, in any medium, provided that you conspicuously and
         | 
| 187 | 
            +
            appropriately publish on each copy an appropriate copyright notice;
         | 
| 188 | 
            +
            keep intact all notices stating that this License and any
         | 
| 189 | 
            +
            non-permissive terms added in accord with section 7 apply to the code;
         | 
| 190 | 
            +
            keep intact all notices of the absence of any warranty; and give all
         | 
| 191 | 
            +
            recipients a copy of this License along with the Program.
         | 
| 192 | 
            +
             | 
| 193 | 
            +
              You may charge any price or no price for each copy that you convey,
         | 
| 194 | 
            +
            and you may offer support or warranty protection for a fee.
         | 
| 195 | 
            +
             | 
| 196 | 
            +
              5. Conveying Modified Source Versions.
         | 
| 197 | 
            +
             | 
| 198 | 
            +
              You may convey a work based on the Program, or the modifications to
         | 
| 199 | 
            +
            produce it from the Program, in the form of source code under the
         | 
| 200 | 
            +
            terms of section 4, provided that you also meet all of these conditions:
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                a) The work must carry prominent notices stating that you modified
         | 
| 203 | 
            +
                it, and giving a relevant date.
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                b) The work must carry prominent notices stating that it is
         | 
| 206 | 
            +
                released under this License and any conditions added under section
         | 
| 207 | 
            +
                7.  This requirement modifies the requirement in section 4 to
         | 
| 208 | 
            +
                "keep intact all notices".
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                c) You must license the entire work, as a whole, under this
         | 
| 211 | 
            +
                License to anyone who comes into possession of a copy.  This
         | 
| 212 | 
            +
                License will therefore apply, along with any applicable section 7
         | 
| 213 | 
            +
                additional terms, to the whole of the work, and all its parts,
         | 
| 214 | 
            +
                regardless of how they are packaged.  This License gives no
         | 
| 215 | 
            +
                permission to license the work in any other way, but it does not
         | 
| 216 | 
            +
                invalidate such permission if you have separately received it.
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                d) If the work has interactive user interfaces, each must display
         | 
| 219 | 
            +
                Appropriate Legal Notices; however, if the Program has interactive
         | 
| 220 | 
            +
                interfaces that do not display Appropriate Legal Notices, your
         | 
| 221 | 
            +
                work need not make them do so.
         | 
| 222 | 
            +
             | 
| 223 | 
            +
              A compilation of a covered work with other separate and independent
         | 
| 224 | 
            +
            works, which are not by their nature extensions of the covered work,
         | 
| 225 | 
            +
            and which are not combined with it such as to form a larger program,
         | 
| 226 | 
            +
            in or on a volume of a storage or distribution medium, is called an
         | 
| 227 | 
            +
            "aggregate" if the compilation and its resulting copyright are not
         | 
| 228 | 
            +
            used to limit the access or legal rights of the compilation's users
         | 
| 229 | 
            +
            beyond what the individual works permit.  Inclusion of a covered work
         | 
| 230 | 
            +
            in an aggregate does not cause this License to apply to the other
         | 
| 231 | 
            +
            parts of the aggregate.
         | 
| 232 | 
            +
             | 
| 233 | 
            +
              6. Conveying Non-Source Forms.
         | 
| 234 | 
            +
             | 
| 235 | 
            +
              You may convey a covered work in object code form under the terms
         | 
| 236 | 
            +
            of sections 4 and 5, provided that you also convey the
         | 
| 237 | 
            +
            machine-readable Corresponding Source under the terms of this License,
         | 
| 238 | 
            +
            in one of these ways:
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                a) Convey the object code in, or embodied in, a physical product
         | 
| 241 | 
            +
                (including a physical distribution medium), accompanied by the
         | 
| 242 | 
            +
                Corresponding Source fixed on a durable physical medium
         | 
| 243 | 
            +
                customarily used for software interchange.
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                b) Convey the object code in, or embodied in, a physical product
         | 
| 246 | 
            +
                (including a physical distribution medium), accompanied by a
         | 
| 247 | 
            +
                written offer, valid for at least three years and valid for as
         | 
| 248 | 
            +
                long as you offer spare parts or customer support for that product
         | 
| 249 | 
            +
                model, to give anyone who possesses the object code either (1) a
         | 
| 250 | 
            +
                copy of the Corresponding Source for all the software in the
         | 
| 251 | 
            +
                product that is covered by this License, on a durable physical
         | 
| 252 | 
            +
                medium customarily used for software interchange, for a price no
         | 
| 253 | 
            +
                more than your reasonable cost of physically performing this
         | 
| 254 | 
            +
                conveying of source, or (2) access to copy the
         | 
| 255 | 
            +
                Corresponding Source from a network server at no charge.
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                c) Convey individual copies of the object code with a copy of the
         | 
| 258 | 
            +
                written offer to provide the Corresponding Source.  This
         | 
| 259 | 
            +
                alternative is allowed only occasionally and noncommercially, and
         | 
| 260 | 
            +
                only if you received the object code with such an offer, in accord
         | 
| 261 | 
            +
                with subsection 6b.
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                d) Convey the object code by offering access from a designated
         | 
| 264 | 
            +
                place (gratis or for a charge), and offer equivalent access to the
         | 
| 265 | 
            +
                Corresponding Source in the same way through the same place at no
         | 
| 266 | 
            +
                further charge.  You need not require recipients to copy the
         | 
| 267 | 
            +
                Corresponding Source along with the object code.  If the place to
         | 
| 268 | 
            +
                copy the object code is a network server, the Corresponding Source
         | 
| 269 | 
            +
                may be on a different server (operated by you or a third party)
         | 
| 270 | 
            +
                that supports equivalent copying facilities, provided you maintain
         | 
| 271 | 
            +
                clear directions next to the object code saying where to find the
         | 
| 272 | 
            +
                Corresponding Source.  Regardless of what server hosts the
         | 
| 273 | 
            +
                Corresponding Source, you remain obligated to ensure that it is
         | 
| 274 | 
            +
                available for as long as needed to satisfy these requirements.
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                e) Convey the object code using peer-to-peer transmission, provided
         | 
| 277 | 
            +
                you inform other peers where the object code and Corresponding
         | 
| 278 | 
            +
                Source of the work are being offered to the general public at no
         | 
| 279 | 
            +
                charge under subsection 6d.
         | 
| 280 | 
            +
             | 
| 281 | 
            +
              A separable portion of the object code, whose source code is excluded
         | 
| 282 | 
            +
            from the Corresponding Source as a System Library, need not be
         | 
| 283 | 
            +
            included in conveying the object code work.
         | 
| 284 | 
            +
             | 
| 285 | 
            +
              A "User Product" is either (1) a "consumer product", which means any
         | 
| 286 | 
            +
            tangible personal property which is normally used for personal, family,
         | 
| 287 | 
            +
            or household purposes, or (2) anything designed or sold for incorporation
         | 
| 288 | 
            +
            into a dwelling.  In determining whether a product is a consumer product,
         | 
| 289 | 
            +
            doubtful cases shall be resolved in favor of coverage.  For a particular
         | 
| 290 | 
            +
            product received by a particular user, "normally used" refers to a
         | 
| 291 | 
            +
            typical or common use of that class of product, regardless of the status
         | 
| 292 | 
            +
            of the particular user or of the way in which the particular user
         | 
| 293 | 
            +
            actually uses, or expects or is expected to use, the product.  A product
         | 
| 294 | 
            +
            is a consumer product regardless of whether the product has substantial
         | 
| 295 | 
            +
            commercial, industrial or non-consumer uses, unless such uses represent
         | 
| 296 | 
            +
            the only significant mode of use of the product.
         | 
| 297 | 
            +
             | 
| 298 | 
            +
              "Installation Information" for a User Product means any methods,
         | 
| 299 | 
            +
            procedures, authorization keys, or other information required to install
         | 
| 300 | 
            +
            and execute modified versions of a covered work in that User Product from
         | 
| 301 | 
            +
            a modified version of its Corresponding Source.  The information must
         | 
| 302 | 
            +
            suffice to ensure that the continued functioning of the modified object
         | 
| 303 | 
            +
            code is in no case prevented or interfered with solely because
         | 
| 304 | 
            +
            modification has been made.
         | 
| 305 | 
            +
             | 
| 306 | 
            +
              If you convey an object code work under this section in, or with, or
         | 
| 307 | 
            +
            specifically for use in, a User Product, and the conveying occurs as
         | 
| 308 | 
            +
            part of a transaction in which the right of possession and use of the
         | 
| 309 | 
            +
            User Product is transferred to the recipient in perpetuity or for a
         | 
| 310 | 
            +
            fixed term (regardless of how the transaction is characterized), the
         | 
| 311 | 
            +
            Corresponding Source conveyed under this section must be accompanied
         | 
| 312 | 
            +
            by the Installation Information.  But this requirement does not apply
         | 
| 313 | 
            +
            if neither you nor any third party retains the ability to install
         | 
| 314 | 
            +
            modified object code on the User Product (for example, the work has
         | 
| 315 | 
            +
            been installed in ROM).
         | 
| 316 | 
            +
             | 
| 317 | 
            +
              The requirement to provide Installation Information does not include a
         | 
| 318 | 
            +
            requirement to continue to provide support service, warranty, or updates
         | 
| 319 | 
            +
            for a work that has been modified or installed by the recipient, or for
         | 
| 320 | 
            +
            the User Product in which it has been modified or installed.  Access to a
         | 
| 321 | 
            +
            network may be denied when the modification itself materially and
         | 
| 322 | 
            +
            adversely affects the operation of the network or violates the rules and
         | 
| 323 | 
            +
            protocols for communication across the network.
         | 
| 324 | 
            +
             | 
| 325 | 
            +
              Corresponding Source conveyed, and Installation Information provided,
         | 
| 326 | 
            +
            in accord with this section must be in a format that is publicly
         | 
| 327 | 
            +
            documented (and with an implementation available to the public in
         | 
| 328 | 
            +
            source code form), and must require no special password or key for
         | 
| 329 | 
            +
            unpacking, reading or copying.
         | 
| 330 | 
            +
             | 
| 331 | 
            +
              7. Additional Terms.
         | 
| 332 | 
            +
             | 
| 333 | 
            +
              "Additional permissions" are terms that supplement the terms of this
         | 
| 334 | 
            +
            License by making exceptions from one or more of its conditions.
         | 
| 335 | 
            +
            Additional permissions that are applicable to the entire Program shall
         | 
| 336 | 
            +
            be treated as though they were included in this License, to the extent
         | 
| 337 | 
            +
            that they are valid under applicable law.  If additional permissions
         | 
| 338 | 
            +
            apply only to part of the Program, that part may be used separately
         | 
| 339 | 
            +
            under those permissions, but the entire Program remains governed by
         | 
| 340 | 
            +
            this License without regard to the additional permissions.
         | 
| 341 | 
            +
             | 
| 342 | 
            +
              When you convey a copy of a covered work, you may at your option
         | 
| 343 | 
            +
            remove any additional permissions from that copy, or from any part of
         | 
| 344 | 
            +
            it.  (Additional permissions may be written to require their own
         | 
| 345 | 
            +
            removal in certain cases when you modify the work.)  You may place
         | 
| 346 | 
            +
            additional permissions on material, added by you to a covered work,
         | 
| 347 | 
            +
            for which you have or can give appropriate copyright permission.
         | 
| 348 | 
            +
             | 
| 349 | 
            +
              Notwithstanding any other provision of this License, for material you
         | 
| 350 | 
            +
            add to a covered work, you may (if authorized by the copyright holders of
         | 
| 351 | 
            +
            that material) supplement the terms of this License with terms:
         | 
| 352 | 
            +
             | 
| 353 | 
            +
                a) Disclaiming warranty or limiting liability differently from the
         | 
| 354 | 
            +
                terms of sections 15 and 16 of this License; or
         | 
| 355 | 
            +
             | 
| 356 | 
            +
                b) Requiring preservation of specified reasonable legal notices or
         | 
| 357 | 
            +
                author attributions in that material or in the Appropriate Legal
         | 
| 358 | 
            +
                Notices displayed by works containing it; or
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                c) Prohibiting misrepresentation of the origin of that material, or
         | 
| 361 | 
            +
                requiring that modified versions of such material be marked in
         | 
| 362 | 
            +
                reasonable ways as different from the original version; or
         | 
| 363 | 
            +
             | 
| 364 | 
            +
                d) Limiting the use for publicity purposes of names of licensors or
         | 
| 365 | 
            +
                authors of the material; or
         | 
| 366 | 
            +
             | 
| 367 | 
            +
                e) Declining to grant rights under trademark law for use of some
         | 
| 368 | 
            +
                trade names, trademarks, or service marks; or
         | 
| 369 | 
            +
             | 
| 370 | 
            +
                f) Requiring indemnification of licensors and authors of that
         | 
| 371 | 
            +
                material by anyone who conveys the material (or modified versions of
         | 
| 372 | 
            +
                it) with contractual assumptions of liability to the recipient, for
         | 
| 373 | 
            +
                any liability that these contractual assumptions directly impose on
         | 
| 374 | 
            +
                those licensors and authors.
         | 
| 375 | 
            +
             | 
| 376 | 
            +
              All other non-permissive additional terms are considered "further
         | 
| 377 | 
            +
            restrictions" within the meaning of section 10.  If the Program as you
         | 
| 378 | 
            +
            received it, or any part of it, contains a notice stating that it is
         | 
| 379 | 
            +
            governed by this License along with a term that is a further
         | 
| 380 | 
            +
            restriction, you may remove that term.  If a license document contains
         | 
| 381 | 
            +
            a further restriction but permits relicensing or conveying under this
         | 
| 382 | 
            +
            License, you may add to a covered work material governed by the terms
         | 
| 383 | 
            +
            of that license document, provided that the further restriction does
         | 
| 384 | 
            +
            not survive such relicensing or conveying.
         | 
| 385 | 
            +
             | 
| 386 | 
            +
              If you add terms to a covered work in accord with this section, you
         | 
| 387 | 
            +
            must place, in the relevant source files, a statement of the
         | 
| 388 | 
            +
            additional terms that apply to those files, or a notice indicating
         | 
| 389 | 
            +
            where to find the applicable terms.
         | 
| 390 | 
            +
             | 
| 391 | 
            +
              Additional terms, permissive or non-permissive, may be stated in the
         | 
| 392 | 
            +
            form of a separately written license, or stated as exceptions;
         | 
| 393 | 
            +
            the above requirements apply either way.
         | 
| 394 | 
            +
             | 
| 395 | 
            +
              8. Termination.
         | 
| 396 | 
            +
             | 
| 397 | 
            +
              You may not propagate or modify a covered work except as expressly
         | 
| 398 | 
            +
            provided under this License.  Any attempt otherwise to propagate or
         | 
| 399 | 
            +
            modify it is void, and will automatically terminate your rights under
         | 
| 400 | 
            +
            this License (including any patent licenses granted under the third
         | 
| 401 | 
            +
            paragraph of section 11).
         | 
| 402 | 
            +
             | 
| 403 | 
            +
              However, if you cease all violation of this License, then your
         | 
| 404 | 
            +
            license from a particular copyright holder is reinstated (a)
         | 
| 405 | 
            +
            provisionally, unless and until the copyright holder explicitly and
         | 
| 406 | 
            +
            finally terminates your license, and (b) permanently, if the copyright
         | 
| 407 | 
            +
            holder fails to notify you of the violation by some reasonable means
         | 
| 408 | 
            +
            prior to 60 days after the cessation.
         | 
| 409 | 
            +
             | 
| 410 | 
            +
              Moreover, your license from a particular copyright holder is
         | 
| 411 | 
            +
            reinstated permanently if the copyright holder notifies you of the
         | 
| 412 | 
            +
            violation by some reasonable means, this is the first time you have
         | 
| 413 | 
            +
            received notice of violation of this License (for any work) from that
         | 
| 414 | 
            +
            copyright holder, and you cure the violation prior to 30 days after
         | 
| 415 | 
            +
            your receipt of the notice.
         | 
| 416 | 
            +
             | 
| 417 | 
            +
              Termination of your rights under this section does not terminate the
         | 
| 418 | 
            +
            licenses of parties who have received copies or rights from you under
         | 
| 419 | 
            +
            this License.  If your rights have been terminated and not permanently
         | 
| 420 | 
            +
            reinstated, you do not qualify to receive new licenses for the same
         | 
| 421 | 
            +
            material under section 10.
         | 
| 422 | 
            +
             | 
| 423 | 
            +
              9. Acceptance Not Required for Having Copies.
         | 
| 424 | 
            +
             | 
| 425 | 
            +
              You are not required to accept this License in order to receive or
         | 
| 426 | 
            +
            run a copy of the Program.  Ancillary propagation of a covered work
         | 
| 427 | 
            +
            occurring solely as a consequence of using peer-to-peer transmission
         | 
| 428 | 
            +
            to receive a copy likewise does not require acceptance.  However,
         | 
| 429 | 
            +
            nothing other than this License grants you permission to propagate or
         | 
| 430 | 
            +
            modify any covered work.  These actions infringe copyright if you do
         | 
| 431 | 
            +
            not accept this License.  Therefore, by modifying or propagating a
         | 
| 432 | 
            +
            covered work, you indicate your acceptance of this License to do so.
         | 
| 433 | 
            +
             | 
| 434 | 
            +
              10. Automatic Licensing of Downstream Recipients.
         | 
| 435 | 
            +
             | 
| 436 | 
            +
              Each time you convey a covered work, the recipient automatically
         | 
| 437 | 
            +
            receives a license from the original licensors, to run, modify and
         | 
| 438 | 
            +
            propagate that work, subject to this License.  You are not responsible
         | 
| 439 | 
            +
            for enforcing compliance by third parties with this License.
         | 
| 440 | 
            +
             | 
| 441 | 
            +
              An "entity transaction" is a transaction transferring control of an
         | 
| 442 | 
            +
            organization, or substantially all assets of one, or subdividing an
         | 
| 443 | 
            +
            organization, or merging organizations.  If propagation of a covered
         | 
| 444 | 
            +
            work results from an entity transaction, each party to that
         | 
| 445 | 
            +
            transaction who receives a copy of the work also receives whatever
         | 
| 446 | 
            +
            licenses to the work the party's predecessor in interest had or could
         | 
| 447 | 
            +
            give under the previous paragraph, plus a right to possession of the
         | 
| 448 | 
            +
            Corresponding Source of the work from the predecessor in interest, if
         | 
| 449 | 
            +
            the predecessor has it or can get it with reasonable efforts.
         | 
| 450 | 
            +
             | 
| 451 | 
            +
              You may not impose any further restrictions on the exercise of the
         | 
| 452 | 
            +
            rights granted or affirmed under this License.  For example, you may
         | 
| 453 | 
            +
            not impose a license fee, royalty, or other charge for exercise of
         | 
| 454 | 
            +
            rights granted under this License, and you may not initiate litigation
         | 
| 455 | 
            +
            (including a cross-claim or counterclaim in a lawsuit) alleging that
         | 
| 456 | 
            +
            any patent claim is infringed by making, using, selling, offering for
         | 
| 457 | 
            +
            sale, or importing the Program or any portion of it.
         | 
| 458 | 
            +
             | 
| 459 | 
            +
              11. Patents.
         | 
| 460 | 
            +
             | 
| 461 | 
            +
              A "contributor" is a copyright holder who authorizes use under this
         | 
| 462 | 
            +
            License of the Program or a work on which the Program is based.  The
         | 
| 463 | 
            +
            work thus licensed is called the contributor's "contributor version".
         | 
| 464 | 
            +
             | 
| 465 | 
            +
              A contributor's "essential patent claims" are all patent claims
         | 
| 466 | 
            +
            owned or controlled by the contributor, whether already acquired or
         | 
| 467 | 
            +
            hereafter acquired, that would be infringed by some manner, permitted
         | 
| 468 | 
            +
            by this License, of making, using, or selling its contributor version,
         | 
| 469 | 
            +
            but do not include claims that would be infringed only as a
         | 
| 470 | 
            +
            consequence of further modification of the contributor version.  For
         | 
| 471 | 
            +
            purposes of this definition, "control" includes the right to grant
         | 
| 472 | 
            +
            patent sublicenses in a manner consistent with the requirements of
         | 
| 473 | 
            +
            this License.
         | 
| 474 | 
            +
             | 
| 475 | 
            +
              Each contributor grants you a non-exclusive, worldwide, royalty-free
         | 
| 476 | 
            +
            patent license under the contributor's essential patent claims, to
         | 
| 477 | 
            +
            make, use, sell, offer for sale, import and otherwise run, modify and
         | 
| 478 | 
            +
            propagate the contents of its contributor version.
         | 
| 479 | 
            +
             | 
| 480 | 
            +
              In the following three paragraphs, a "patent license" is any express
         | 
| 481 | 
            +
            agreement or commitment, however denominated, not to enforce a patent
         | 
| 482 | 
            +
            (such as an express permission to practice a patent or covenant not to
         | 
| 483 | 
            +
            sue for patent infringement).  To "grant" such a patent license to a
         | 
| 484 | 
            +
            party means to make such an agreement or commitment not to enforce a
         | 
| 485 | 
            +
            patent against the party.
         | 
| 486 | 
            +
             | 
| 487 | 
            +
              If you convey a covered work, knowingly relying on a patent license,
         | 
| 488 | 
            +
            and the Corresponding Source of the work is not available for anyone
         | 
| 489 | 
            +
            to copy, free of charge and under the terms of this License, through a
         | 
| 490 | 
            +
            publicly available network server or other readily accessible means,
         | 
| 491 | 
            +
            then you must either (1) cause the Corresponding Source to be so
         | 
| 492 | 
            +
            available, or (2) arrange to deprive yourself of the benefit of the
         | 
| 493 | 
            +
            patent license for this particular work, or (3) arrange, in a manner
         | 
| 494 | 
            +
            consistent with the requirements of this License, to extend the patent
         | 
| 495 | 
            +
            license to downstream recipients.  "Knowingly relying" means you have
         | 
| 496 | 
            +
            actual knowledge that, but for the patent license, your conveying the
         | 
| 497 | 
            +
            covered work in a country, or your recipient's use of the covered work
         | 
| 498 | 
            +
            in a country, would infringe one or more identifiable patents in that
         | 
| 499 | 
            +
            country that you have reason to believe are valid.
         | 
| 500 | 
            +
             | 
| 501 | 
            +
              If, pursuant to or in connection with a single transaction or
         | 
| 502 | 
            +
            arrangement, you convey, or propagate by procuring conveyance of, a
         | 
| 503 | 
            +
            covered work, and grant a patent license to some of the parties
         | 
| 504 | 
            +
            receiving the covered work authorizing them to use, propagate, modify
         | 
| 505 | 
            +
            or convey a specific copy of the covered work, then the patent license
         | 
| 506 | 
            +
            you grant is automatically extended to all recipients of the covered
         | 
| 507 | 
            +
            work and works based on it.
         | 
| 508 | 
            +
             | 
| 509 | 
            +
              A patent license is "discriminatory" if it does not include within
         | 
| 510 | 
            +
            the scope of its coverage, prohibits the exercise of, or is
         | 
| 511 | 
            +
            conditioned on the non-exercise of one or more of the rights that are
         | 
| 512 | 
            +
            specifically granted under this License.  You may not convey a covered
         | 
| 513 | 
            +
            work if you are a party to an arrangement with a third party that is
         | 
| 514 | 
            +
            in the business of distributing software, under which you make payment
         | 
| 515 | 
            +
            to the third party based on the extent of your activity of conveying
         | 
| 516 | 
            +
            the work, and under which the third party grants, to any of the
         | 
| 517 | 
            +
            parties who would receive the covered work from you, a discriminatory
         | 
| 518 | 
            +
            patent license (a) in connection with copies of the covered work
         | 
| 519 | 
            +
            conveyed by you (or copies made from those copies), or (b) primarily
         | 
| 520 | 
            +
            for and in connection with specific products or compilations that
         | 
| 521 | 
            +
            contain the covered work, unless you entered into that arrangement,
         | 
| 522 | 
            +
            or that patent license was granted, prior to 28 March 2007.
         | 
| 523 | 
            +
             | 
| 524 | 
            +
              Nothing in this License shall be construed as excluding or limiting
         | 
| 525 | 
            +
            any implied license or other defenses to infringement that may
         | 
| 526 | 
            +
            otherwise be available to you under applicable patent law.
         | 
| 527 | 
            +
             | 
| 528 | 
            +
              12. No Surrender of Others' Freedom.
         | 
| 529 | 
            +
             | 
| 530 | 
            +
              If conditions are imposed on you (whether by court order, agreement or
         | 
| 531 | 
            +
            otherwise) that contradict the conditions of this License, they do not
         | 
| 532 | 
            +
            excuse you from the conditions of this License.  If you cannot convey a
         | 
| 533 | 
            +
            covered work so as to satisfy simultaneously your obligations under this
         | 
| 534 | 
            +
            License and any other pertinent obligations, then as a consequence you may
         | 
| 535 | 
            +
            not convey it at all.  For example, if you agree to terms that obligate you
         | 
| 536 | 
            +
            to collect a royalty for further conveying from those to whom you convey
         | 
| 537 | 
            +
            the Program, the only way you could satisfy both those terms and this
         | 
| 538 | 
            +
            License would be to refrain entirely from conveying the Program.
         | 
| 539 | 
            +
             | 
| 540 | 
            +
              13. Remote Network Interaction; Use with the GNU General Public License.
         | 
| 541 | 
            +
             | 
| 542 | 
            +
              Notwithstanding any other provision of this License, if you modify the
         | 
| 543 | 
            +
            Program, your modified version must prominently offer all users
         | 
| 544 | 
            +
            interacting with it remotely through a computer network (if your version
         | 
| 545 | 
            +
            supports such interaction) an opportunity to receive the Corresponding
         | 
| 546 | 
            +
            Source of your version by providing access to the Corresponding Source
         | 
| 547 | 
            +
            from a network server at no charge, through some standard or customary
         | 
| 548 | 
            +
            means of facilitating copying of software.  This Corresponding Source
         | 
| 549 | 
            +
            shall include the Corresponding Source for any work covered by version 3
         | 
| 550 | 
            +
            of the GNU General Public License that is incorporated pursuant to the
         | 
| 551 | 
            +
            following paragraph.
         | 
| 552 | 
            +
             | 
| 553 | 
            +
              Notwithstanding any other provision of this License, you have
         | 
| 554 | 
            +
            permission to link or combine any covered work with a work licensed
         | 
| 555 | 
            +
            under version 3 of the GNU General Public License into a single
         | 
| 556 | 
            +
            combined work, and to convey the resulting work.  The terms of this
         | 
| 557 | 
            +
            License will continue to apply to the part which is the covered work,
         | 
| 558 | 
            +
            but the work with which it is combined will remain governed by version
         | 
| 559 | 
            +
            3 of the GNU General Public License.
         | 
| 560 | 
            +
             | 
| 561 | 
            +
              14. Revised Versions of this License.
         | 
| 562 | 
            +
             | 
| 563 | 
            +
              The Free Software Foundation may publish revised and/or new versions of
         | 
| 564 | 
            +
            the GNU Affero General Public License from time to time.  Such new versions
         | 
| 565 | 
            +
            will be similar in spirit to the present version, but may differ in detail to
         | 
| 566 | 
            +
            address new problems or concerns.
         | 
| 567 | 
            +
             | 
| 568 | 
            +
              Each version is given a distinguishing version number.  If the
         | 
| 569 | 
            +
            Program specifies that a certain numbered version of the GNU Affero General
         | 
| 570 | 
            +
            Public License "or any later version" applies to it, you have the
         | 
| 571 | 
            +
            option of following the terms and conditions either of that numbered
         | 
| 572 | 
            +
            version or of any later version published by the Free Software
         | 
| 573 | 
            +
            Foundation.  If the Program does not specify a version number of the
         | 
| 574 | 
            +
            GNU Affero General Public License, you may choose any version ever published
         | 
| 575 | 
            +
            by the Free Software Foundation.
         | 
| 576 | 
            +
             | 
| 577 | 
            +
              If the Program specifies that a proxy can decide which future
         | 
| 578 | 
            +
            versions of the GNU Affero General Public License can be used, that proxy's
         | 
| 579 | 
            +
            public statement of acceptance of a version permanently authorizes you
         | 
| 580 | 
            +
            to choose that version for the Program.
         | 
| 581 | 
            +
             | 
| 582 | 
            +
              Later license versions may give you additional or different
         | 
| 583 | 
            +
            permissions.  However, no additional obligations are imposed on any
         | 
| 584 | 
            +
            author or copyright holder as a result of your choosing to follow a
         | 
| 585 | 
            +
            later version.
         | 
| 586 | 
            +
             | 
| 587 | 
            +
              15. Disclaimer of Warranty.
         | 
| 588 | 
            +
             | 
| 589 | 
            +
              THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
         | 
| 590 | 
            +
            APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
         | 
| 591 | 
            +
            HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
         | 
| 592 | 
            +
            OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
         | 
| 593 | 
            +
            THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
         | 
| 594 | 
            +
            PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
         | 
| 595 | 
            +
            IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
         | 
| 596 | 
            +
            ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
         | 
| 597 | 
            +
             | 
| 598 | 
            +
              16. Limitation of Liability.
         | 
| 599 | 
            +
             | 
| 600 | 
            +
              IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
         | 
| 601 | 
            +
            WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
         | 
| 602 | 
            +
            THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
         | 
| 603 | 
            +
            GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
         | 
| 604 | 
            +
            USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
         | 
| 605 | 
            +
            DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
         | 
| 606 | 
            +
            PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
         | 
| 607 | 
            +
            EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
         | 
| 608 | 
            +
            SUCH DAMAGES.
         | 
| 609 | 
            +
             | 
| 610 | 
            +
              17. Interpretation of Sections 15 and 16.
         | 
| 611 | 
            +
             | 
| 612 | 
            +
              If the disclaimer of warranty and limitation of liability provided
         | 
| 613 | 
            +
            above cannot be given local legal effect according to their terms,
         | 
| 614 | 
            +
            reviewing courts shall apply local law that most closely approximates
         | 
| 615 | 
            +
            an absolute waiver of all civil liability in connection with the
         | 
| 616 | 
            +
            Program, unless a warranty or assumption of liability accompanies a
         | 
| 617 | 
            +
            copy of the Program in return for a fee.
         | 
| 618 | 
            +
             | 
| 619 | 
            +
                                 END OF TERMS AND CONDITIONS
         | 
| 620 | 
            +
             | 
| 621 | 
            +
                        How to Apply These Terms to Your New Programs
         | 
| 622 | 
            +
             | 
| 623 | 
            +
              If you develop a new program, and you want it to be of the greatest
         | 
| 624 | 
            +
            possible use to the public, the best way to achieve this is to make it
         | 
| 625 | 
            +
            free software which everyone can redistribute and change under these terms.
         | 
| 626 | 
            +
             | 
| 627 | 
            +
              To do so, attach the following notices to the program.  It is safest
         | 
| 628 | 
            +
            to attach them to the start of each source file to most effectively
         | 
| 629 | 
            +
            state the exclusion of warranty; and each file should have at least
         | 
| 630 | 
            +
            the "copyright" line and a pointer to where the full notice is found.
         | 
| 631 | 
            +
             | 
| 632 | 
            +
                <one line to give the program's name and a brief idea of what it does.>
         | 
| 633 | 
            +
                Copyright (C) <year>  <name of author>
         | 
| 634 | 
            +
             | 
| 635 | 
            +
                This program is free software: you can redistribute it and/or modify
         | 
| 636 | 
            +
                it under the terms of the GNU Affero General Public License as published
         | 
| 637 | 
            +
                by the Free Software Foundation, either version 3 of the License, or
         | 
| 638 | 
            +
                (at your option) any later version.
         | 
| 639 | 
            +
             | 
| 640 | 
            +
                This program is distributed in the hope that it will be useful,
         | 
| 641 | 
            +
                but WITHOUT ANY WARRANTY; without even the implied warranty of
         | 
| 642 | 
            +
                MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
         | 
| 643 | 
            +
                GNU Affero General Public License for more details.
         | 
| 644 | 
            +
             | 
| 645 | 
            +
                You should have received a copy of the GNU Affero General Public License
         | 
| 646 | 
            +
                along with this program.  If not, see <https://www.gnu.org/licenses/>.
         | 
| 647 | 
            +
             | 
| 648 | 
            +
            Also add information on how to contact you by electronic and paper mail.
         | 
| 649 | 
            +
             | 
| 650 | 
            +
              If your software can interact with users remotely through a computer
         | 
| 651 | 
            +
            network, you should also make sure that it provides a way for users to
         | 
| 652 | 
            +
            get its source.  For example, if your program is a web application, its
         | 
| 653 | 
            +
            interface could display a "Source" link that leads users to an archive
         | 
| 654 | 
            +
            of the code.  There are many ways you could offer source, and different
         | 
| 655 | 
            +
            solutions will be better for different programs; see section 13 for the
         | 
| 656 | 
            +
            specific requirements.
         | 
| 657 | 
            +
             | 
| 658 | 
            +
              You should also get your employer (if you work as a programmer) or school,
         | 
| 659 | 
            +
            if any, to sign a "copyright disclaimer" for the program, if necessary.
         | 
| 660 | 
            +
            For more information on this, and how to apply and follow the GNU AGPL, see
         | 
| 661 | 
            +
            <https://www.gnu.org/licenses/>.
         | 
    	
        README.md
    CHANGED
    
    | @@ -1,12 +1,100 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
            -
            title:  | 
| 3 | 
            -
             | 
| 4 | 
            -
            colorFrom: green
         | 
| 5 | 
            -
            colorTo: green
         | 
| 6 | 
             
            sdk: gradio
         | 
| 7 | 
            -
            sdk_version: 3. | 
| 8 | 
            -
            app_file: app.py
         | 
| 9 | 
            -
            pinned: false
         | 
| 10 | 
             
            ---
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 11 |  | 
| 12 | 
            -
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
            +
            title: roop-unleashed
         | 
| 3 | 
            +
            app_file: run.py
         | 
|  | |
|  | |
| 4 | 
             
            sdk: gradio
         | 
| 5 | 
            +
            sdk_version: 3.44.2
         | 
|  | |
|  | |
| 6 | 
             
            ---
         | 
| 7 | 
            +
            # roop-unleashed
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            [Changelog](#changelog) • [Usage](#usage) • [Wiki](https://github.com/C0untFloyd/roop-unleashed/wiki)
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            Uncensored Deepfakes for images and videos without training and an easy-to-use GUI.
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            ### Features
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            - Platform-independant Browser GUI
         | 
| 20 | 
            +
            - Selection of multiple input/output faces in one go
         | 
| 21 | 
            +
            - Many different swapping modes, first detected, face selections, by gender
         | 
| 22 | 
            +
            - Batch processing of images/videos
         | 
| 23 | 
            +
            - Masking of face occluders using text prompts
         | 
| 24 | 
            +
            - Optional Face Restoration using different enhancers
         | 
| 25 | 
            +
            - Preview swapping from different video frames
         | 
| 26 | 
            +
            - Live Fake Cam using your webcam
         | 
| 27 | 
            +
            - Extras Tab for cutting videos etc.
         | 
| 28 | 
            +
            - Settings - storing configuration for next session
         | 
| 29 | 
            +
            - Theme Support
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            and lots more...
         | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
            ## Disclaimer
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            This project is for technical and academic use only.
         | 
| 37 | 
            +
            Users of this software are expected to use this software responsibly while abiding the local law. If a face of a real person is being used, users are suggested to get consent from the concerned person and clearly mention that it is a deepfake when posting content online. Developers of this software will not be responsible for actions of end-users.
         | 
| 38 | 
            +
            **Please do not apply it to illegal and unethical scenarios.**
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            In the event of violation of the legal and ethical requirements of the user's country or region, this code repository is exempt from liability
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            ### Installation
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            Please refer to the Wiki.
         | 
| 45 | 
            +
             | 
| 46 | 
            +
             | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            ### Usage
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            - Windows: run the `windows_run.bat` from the Installer.
         | 
| 52 | 
            +
            - Linux: `python run.py`
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            <a target="_blank" href="https://colab.research.google.com/github/C0untFloyd/roop-unleashed/blob/main/roop-unleashed.ipynb">
         | 
| 55 | 
            +
              <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
         | 
| 56 | 
            +
            </a>
         | 
| 57 | 
            +
              
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            Additional commandline arguments are currently unsupported and settings should be done via the UI.
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            > Note: When you run this program for the first time, it will download some models roughly ~2Gb in size.
         | 
| 62 | 
            +
             | 
| 63 | 
            +
             | 
| 64 | 
            +
             | 
| 65 | 
            +
             | 
| 66 | 
            +
            ### Changelog
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            **16.10.2023** v3.3.4
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            **11.8.2023** v2.7.0
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            Initial Gradio Version - old TkInter Version now deprecated
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            - Re-added unified padding to face enhancers
         | 
| 75 | 
            +
            - Fixed DMDNet for all resolutions
         | 
| 76 | 
            +
            - Selecting target face now automatically switches swapping mode to selected
         | 
| 77 | 
            +
            - GPU providers are correctly set using the GUI (needs restart currently)
         | 
| 78 | 
            +
            - Local output folder can be opened from page
         | 
| 79 | 
            +
            - Unfinished extras functions disabled for now
         | 
| 80 | 
            +
            - Installer checks out specific commit, allowing to go back to first install
         | 
| 81 | 
            +
            - Updated readme for new gradio version
         | 
| 82 | 
            +
            - Updated Colab
         | 
| 83 | 
            +
             | 
| 84 | 
            +
             | 
| 85 | 
            +
            # Acknowledgements
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            Lots of ideas, code or pre-trained models used from the following projects:
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            https://github.com/deepinsight/insightface
         | 
| 90 | 
            +
            https://github.com/s0md3v/roop
         | 
| 91 | 
            +
            https://github.com/AUTOMATIC1111/stable-diffusion-webui
         | 
| 92 | 
            +
            https://github.com/Hillobar/Rope
         | 
| 93 | 
            +
            https://github.com/janvarev/chain-img-processor
         | 
| 94 | 
            +
            https://github.com/TencentARC/GFPGAN   
         | 
| 95 | 
            +
            https://github.com/kadirnar/codeformer-pip
         | 
| 96 | 
            +
            https://github.com/csxmli2016/DMDNet
         | 
| 97 | 
            +
             | 
| 98 | 
            +
             | 
| 99 | 
            +
            Thanks to all developers!
         | 
| 100 |  | 
|  | 
    	
        __pycache__/settings.cpython-310.pyc
    ADDED
    
    | Binary file (2.16 kB). View file | 
|  | 
    	
        clip/__init__.py
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            from .clip import *
         | 
    	
        clip/bpe_simple_vocab_16e6.txt.gz
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a
         | 
| 3 | 
            +
            size 1356917
         | 
    	
        clip/clip.py
    ADDED
    
    | @@ -0,0 +1,245 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import hashlib
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            import urllib
         | 
| 4 | 
            +
            import warnings
         | 
| 5 | 
            +
            from typing import Any, Union, List
         | 
| 6 | 
            +
            from pkg_resources import packaging
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            import torch
         | 
| 9 | 
            +
            from PIL import Image
         | 
| 10 | 
            +
            from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
         | 
| 11 | 
            +
            from tqdm import tqdm
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            from .model import build_model
         | 
| 14 | 
            +
            from .simple_tokenizer import SimpleTokenizer as _Tokenizer
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            try:
         | 
| 17 | 
            +
                from torchvision.transforms import InterpolationMode
         | 
| 18 | 
            +
                BICUBIC = InterpolationMode.BICUBIC
         | 
| 19 | 
            +
            except ImportError:
         | 
| 20 | 
            +
                BICUBIC = Image.BICUBIC
         | 
| 21 | 
            +
             | 
| 22 | 
            +
             | 
| 23 | 
            +
            if packaging.version.parse(torch.__version__) < packaging.version.parse("1.7.1"):
         | 
| 24 | 
            +
                warnings.warn("PyTorch version 1.7.1 or higher is recommended")
         | 
| 25 | 
            +
             | 
| 26 | 
            +
             | 
| 27 | 
            +
            __all__ = ["available_models", "load", "tokenize"]
         | 
| 28 | 
            +
            _tokenizer = _Tokenizer()
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            _MODELS = {
         | 
| 31 | 
            +
                "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt",
         | 
| 32 | 
            +
                "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt",
         | 
| 33 | 
            +
                "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt",
         | 
| 34 | 
            +
                "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt",
         | 
| 35 | 
            +
                "RN50x64": "https://openaipublic.azureedge.net/clip/models/be1cfb55d75a9666199fb2206c106743da0f6468c9d327f3e0d0a543a9919d9c/RN50x64.pt",
         | 
| 36 | 
            +
                "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
         | 
| 37 | 
            +
                "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
         | 
| 38 | 
            +
                "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt",
         | 
| 39 | 
            +
                "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt",
         | 
| 40 | 
            +
            }
         | 
| 41 | 
            +
             | 
| 42 | 
            +
             | 
| 43 | 
            +
            def _download(url: str, root: str):
         | 
| 44 | 
            +
                os.makedirs(root, exist_ok=True)
         | 
| 45 | 
            +
                filename = os.path.basename(url)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                expected_sha256 = url.split("/")[-2]
         | 
| 48 | 
            +
                download_target = os.path.join(root, filename)
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                if os.path.exists(download_target) and not os.path.isfile(download_target):
         | 
| 51 | 
            +
                    raise RuntimeError(f"{download_target} exists and is not a regular file")
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                if os.path.isfile(download_target):
         | 
| 54 | 
            +
                    if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256:
         | 
| 55 | 
            +
                        return download_target
         | 
| 56 | 
            +
                    else:
         | 
| 57 | 
            +
                        warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
         | 
| 60 | 
            +
                    with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop:
         | 
| 61 | 
            +
                        while True:
         | 
| 62 | 
            +
                            buffer = source.read(8192)
         | 
| 63 | 
            +
                            if not buffer:
         | 
| 64 | 
            +
                                break
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                            output.write(buffer)
         | 
| 67 | 
            +
                            loop.update(len(buffer))
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256:
         | 
| 70 | 
            +
                    raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match")
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                return download_target
         | 
| 73 | 
            +
             | 
| 74 | 
            +
             | 
| 75 | 
            +
            def _convert_image_to_rgb(image):
         | 
| 76 | 
            +
                return image.convert("RGB")
         | 
| 77 | 
            +
             | 
| 78 | 
            +
             | 
| 79 | 
            +
            def _transform(n_px):
         | 
| 80 | 
            +
                return Compose([
         | 
| 81 | 
            +
                    Resize(n_px, interpolation=BICUBIC),
         | 
| 82 | 
            +
                    CenterCrop(n_px),
         | 
| 83 | 
            +
                    _convert_image_to_rgb,
         | 
| 84 | 
            +
                    ToTensor(),
         | 
| 85 | 
            +
                    Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
         | 
| 86 | 
            +
                ])
         | 
| 87 | 
            +
             | 
| 88 | 
            +
             | 
| 89 | 
            +
            def available_models() -> List[str]:
         | 
| 90 | 
            +
                """Returns the names of available CLIP models"""
         | 
| 91 | 
            +
                return list(_MODELS.keys())
         | 
| 92 | 
            +
             | 
| 93 | 
            +
             | 
| 94 | 
            +
            def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit: bool = False, download_root: str = None):
         | 
| 95 | 
            +
                """Load a CLIP model
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                Parameters
         | 
| 98 | 
            +
                ----------
         | 
| 99 | 
            +
                name : str
         | 
| 100 | 
            +
                    A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                device : Union[str, torch.device]
         | 
| 103 | 
            +
                    The device to put the loaded model
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                jit : bool
         | 
| 106 | 
            +
                    Whether to load the optimized JIT model or more hackable non-JIT model (default).
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                download_root: str
         | 
| 109 | 
            +
                    path to download the model files; by default, it uses "~/.cache/clip"
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                Returns
         | 
| 112 | 
            +
                -------
         | 
| 113 | 
            +
                model : torch.nn.Module
         | 
| 114 | 
            +
                    The CLIP model
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                preprocess : Callable[[PIL.Image], torch.Tensor]
         | 
| 117 | 
            +
                    A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input
         | 
| 118 | 
            +
                """
         | 
| 119 | 
            +
                if name in _MODELS:
         | 
| 120 | 
            +
                    model_path = _download(_MODELS[name], download_root or os.path.expanduser("~/.cache/clip"))
         | 
| 121 | 
            +
                elif os.path.isfile(name):
         | 
| 122 | 
            +
                    model_path = name
         | 
| 123 | 
            +
                else:
         | 
| 124 | 
            +
                    raise RuntimeError(f"Model {name} not found; available models = {available_models()}")
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                with open(model_path, 'rb') as opened_file:
         | 
| 127 | 
            +
                    try:
         | 
| 128 | 
            +
                        # loading JIT archive
         | 
| 129 | 
            +
                        model = torch.jit.load(opened_file, map_location=device if jit else "cpu").eval()
         | 
| 130 | 
            +
                        state_dict = None
         | 
| 131 | 
            +
                    except RuntimeError:
         | 
| 132 | 
            +
                        # loading saved state dict
         | 
| 133 | 
            +
                        if jit:
         | 
| 134 | 
            +
                            warnings.warn(f"File {model_path} is not a JIT archive. Loading as a state dict instead")
         | 
| 135 | 
            +
                            jit = False
         | 
| 136 | 
            +
                        state_dict = torch.load(opened_file, map_location="cpu")
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                if not jit:
         | 
| 139 | 
            +
                    model = build_model(state_dict or model.state_dict()).to(device)
         | 
| 140 | 
            +
                    if str(device) == "cpu":
         | 
| 141 | 
            +
                        model.float()
         | 
| 142 | 
            +
                    return model, _transform(model.visual.input_resolution)
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                # patch the device names
         | 
| 145 | 
            +
                device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(device)), example_inputs=[])
         | 
| 146 | 
            +
                device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                def _node_get(node: torch._C.Node, key: str):
         | 
| 149 | 
            +
                    """Gets attributes of a node which is polymorphic over return type.
         | 
| 150 | 
            +
                    
         | 
| 151 | 
            +
                    From https://github.com/pytorch/pytorch/pull/82628
         | 
| 152 | 
            +
                    """
         | 
| 153 | 
            +
                    sel = node.kindOf(key)
         | 
| 154 | 
            +
                    return getattr(node, sel)(key)
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                def patch_device(module):
         | 
| 157 | 
            +
                    try:
         | 
| 158 | 
            +
                        graphs = [module.graph] if hasattr(module, "graph") else []
         | 
| 159 | 
            +
                    except RuntimeError:
         | 
| 160 | 
            +
                        graphs = []
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                    if hasattr(module, "forward1"):
         | 
| 163 | 
            +
                        graphs.append(module.forward1.graph)
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                    for graph in graphs:
         | 
| 166 | 
            +
                        for node in graph.findAllNodes("prim::Constant"):
         | 
| 167 | 
            +
                            if "value" in node.attributeNames() and str(_node_get(node, "value")).startswith("cuda"):
         | 
| 168 | 
            +
                                node.copyAttributes(device_node)
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                model.apply(patch_device)
         | 
| 171 | 
            +
                patch_device(model.encode_image)
         | 
| 172 | 
            +
                patch_device(model.encode_text)
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                # patch dtype to float32 on CPU
         | 
| 175 | 
            +
                if str(device) == "cpu":
         | 
| 176 | 
            +
                    float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
         | 
| 177 | 
            +
                    float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
         | 
| 178 | 
            +
                    float_node = float_input.node()
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                    def patch_float(module):
         | 
| 181 | 
            +
                        try:
         | 
| 182 | 
            +
                            graphs = [module.graph] if hasattr(module, "graph") else []
         | 
| 183 | 
            +
                        except RuntimeError:
         | 
| 184 | 
            +
                            graphs = []
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                        if hasattr(module, "forward1"):
         | 
| 187 | 
            +
                            graphs.append(module.forward1.graph)
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                        for graph in graphs:
         | 
| 190 | 
            +
                            for node in graph.findAllNodes("aten::to"):
         | 
| 191 | 
            +
                                inputs = list(node.inputs())
         | 
| 192 | 
            +
                                for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
         | 
| 193 | 
            +
                                    if _node_get(inputs[i].node(), "value") == 5:
         | 
| 194 | 
            +
                                        inputs[i].node().copyAttributes(float_node)
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                    model.apply(patch_float)
         | 
| 197 | 
            +
                    patch_float(model.encode_image)
         | 
| 198 | 
            +
                    patch_float(model.encode_text)
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                    model.float()
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                return model, _transform(model.input_resolution.item())
         | 
| 203 | 
            +
             | 
| 204 | 
            +
             | 
| 205 | 
            +
            def tokenize(texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False) -> Union[torch.IntTensor, torch.LongTensor]:
         | 
| 206 | 
            +
                """
         | 
| 207 | 
            +
                Returns the tokenized representation of given input string(s)
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                Parameters
         | 
| 210 | 
            +
                ----------
         | 
| 211 | 
            +
                texts : Union[str, List[str]]
         | 
| 212 | 
            +
                    An input string or a list of input strings to tokenize
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                context_length : int
         | 
| 215 | 
            +
                    The context length to use; all CLIP models use 77 as the context length
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                truncate: bool
         | 
| 218 | 
            +
                    Whether to truncate the text in case its encoding is longer than the context length
         | 
| 219 | 
            +
             | 
| 220 | 
            +
                Returns
         | 
| 221 | 
            +
                -------
         | 
| 222 | 
            +
                A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length].
         | 
| 223 | 
            +
                We return LongTensor when torch version is <1.8.0, since older index_select requires indices to be long.
         | 
| 224 | 
            +
                """
         | 
| 225 | 
            +
                if isinstance(texts, str):
         | 
| 226 | 
            +
                    texts = [texts]
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                sot_token = _tokenizer.encoder["<|startoftext|>"]
         | 
| 229 | 
            +
                eot_token = _tokenizer.encoder["<|endoftext|>"]
         | 
| 230 | 
            +
                all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts]
         | 
| 231 | 
            +
                if packaging.version.parse(torch.__version__) < packaging.version.parse("1.8.0"):
         | 
| 232 | 
            +
                    result = torch.zeros(len(all_tokens), context_length, dtype=torch.long)
         | 
| 233 | 
            +
                else:
         | 
| 234 | 
            +
                    result = torch.zeros(len(all_tokens), context_length, dtype=torch.int)
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                for i, tokens in enumerate(all_tokens):
         | 
| 237 | 
            +
                    if len(tokens) > context_length:
         | 
| 238 | 
            +
                        if truncate:
         | 
| 239 | 
            +
                            tokens = tokens[:context_length]
         | 
| 240 | 
            +
                            tokens[-1] = eot_token
         | 
| 241 | 
            +
                        else:
         | 
| 242 | 
            +
                            raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}")
         | 
| 243 | 
            +
                    result[i, :len(tokens)] = torch.tensor(tokens)
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                return result
         | 
    	
        clip/clipseg.py
    ADDED
    
    | @@ -0,0 +1,538 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import math
         | 
| 2 | 
            +
            from os.path import basename, dirname, join, isfile
         | 
| 3 | 
            +
            import torch
         | 
| 4 | 
            +
            from torch import nn
         | 
| 5 | 
            +
            from torch.nn import functional as nnf
         | 
| 6 | 
            +
            from torch.nn.modules.activation import ReLU
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            def get_prompt_list(prompt):
         | 
| 10 | 
            +
                if prompt == 'plain':
         | 
| 11 | 
            +
                    return ['{}']    
         | 
| 12 | 
            +
                elif prompt == 'fixed':
         | 
| 13 | 
            +
                    return ['a photo of a {}.']
         | 
| 14 | 
            +
                elif prompt == 'shuffle':
         | 
| 15 | 
            +
                    return ['a photo of a {}.', 'a photograph of a {}.', 'an image of a {}.', '{}.']
         | 
| 16 | 
            +
                elif prompt == 'shuffle+':
         | 
| 17 | 
            +
                    return ['a photo of a {}.', 'a photograph of a {}.', 'an image of a {}.', '{}.',
         | 
| 18 | 
            +
                                        'a cropped photo of a {}.', 'a good photo of a {}.', 'a photo of one {}.',
         | 
| 19 | 
            +
                                        'a bad photo of a {}.', 'a photo of the {}.']
         | 
| 20 | 
            +
                else:
         | 
| 21 | 
            +
                    raise ValueError('Invalid value for prompt')        
         | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
            def forward_multihead_attention(x, b, with_aff=False, attn_mask=None):
         | 
| 25 | 
            +
                """ 
         | 
| 26 | 
            +
                Simplified version of multihead attention (taken from torch source code but without tons of if clauses). 
         | 
| 27 | 
            +
                The mlp and layer norm come from CLIP.
         | 
| 28 | 
            +
                x: input.
         | 
| 29 | 
            +
                b: multihead attention module. 
         | 
| 30 | 
            +
                """
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                x_ = b.ln_1(x)
         | 
| 33 | 
            +
                q, k, v = nnf.linear(x_, b.attn.in_proj_weight, b.attn.in_proj_bias).chunk(3, dim=-1)
         | 
| 34 | 
            +
                tgt_len, bsz, embed_dim = q.size()
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                head_dim = embed_dim // b.attn.num_heads
         | 
| 37 | 
            +
                scaling = float(head_dim) ** -0.5
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                q = q.contiguous().view(tgt_len, bsz * b.attn.num_heads, b.attn.head_dim).transpose(0, 1)
         | 
| 40 | 
            +
                k = k.contiguous().view(-1, bsz * b.attn.num_heads, b.attn.head_dim).transpose(0, 1)
         | 
| 41 | 
            +
                v = v.contiguous().view(-1, bsz * b.attn.num_heads, b.attn.head_dim).transpose(0, 1)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                q = q * scaling
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                attn_output_weights = torch.bmm(q, k.transpose(1, 2)) #  n_heads * batch_size, tokens^2, tokens^2
         | 
| 46 | 
            +
                if attn_mask is not None:
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
                    attn_mask_type, attn_mask = attn_mask
         | 
| 50 | 
            +
                    n_heads = attn_output_weights.size(0) // attn_mask.size(0)
         | 
| 51 | 
            +
                    attn_mask = attn_mask.repeat(n_heads, 1)
         | 
| 52 | 
            +
                    
         | 
| 53 | 
            +
                    if attn_mask_type == 'cls_token':
         | 
| 54 | 
            +
                        # the mask only affects similarities compared to the readout-token.
         | 
| 55 | 
            +
                        attn_output_weights[:, 0, 1:] = attn_output_weights[:, 0, 1:] * attn_mask[None,...]
         | 
| 56 | 
            +
                        # attn_output_weights[:, 0, 0] = 0*attn_output_weights[:, 0, 0]
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    if attn_mask_type == 'all':
         | 
| 59 | 
            +
                        # print(attn_output_weights.shape, attn_mask[:, None].shape)
         | 
| 60 | 
            +
                        attn_output_weights[:, 1:, 1:] = attn_output_weights[:, 1:, 1:] * attn_mask[:, None]
         | 
| 61 | 
            +
                    
         | 
| 62 | 
            +
                
         | 
| 63 | 
            +
                attn_output_weights = torch.softmax(attn_output_weights, dim=-1)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                attn_output = torch.bmm(attn_output_weights, v)
         | 
| 66 | 
            +
                attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
         | 
| 67 | 
            +
                attn_output = b.attn.out_proj(attn_output)
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                x = x + attn_output
         | 
| 70 | 
            +
                x = x + b.mlp(b.ln_2(x))
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                if with_aff:
         | 
| 73 | 
            +
                    return x, attn_output_weights
         | 
| 74 | 
            +
                else:
         | 
| 75 | 
            +
                    return x
         | 
| 76 | 
            +
             | 
| 77 | 
            +
             | 
| 78 | 
            +
            class CLIPDenseBase(nn.Module):
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def __init__(self, version, reduce_cond, reduce_dim, prompt, n_tokens):
         | 
| 81 | 
            +
                    super().__init__()
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                    import clip
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                    # prec = torch.FloatTensor
         | 
| 86 | 
            +
                    self.clip_model, _ = clip.load(version, device='cpu', jit=False)
         | 
| 87 | 
            +
                    self.model = self.clip_model.visual
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    # if not None, scale conv weights such that we obtain n_tokens.
         | 
| 90 | 
            +
                    self.n_tokens = n_tokens
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                    for p in self.clip_model.parameters():
         | 
| 93 | 
            +
                        p.requires_grad_(False)
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                    # conditional
         | 
| 96 | 
            +
                    if reduce_cond is not None:
         | 
| 97 | 
            +
                        self.reduce_cond = nn.Linear(512, reduce_cond)
         | 
| 98 | 
            +
                        for p in self.reduce_cond.parameters():
         | 
| 99 | 
            +
                            p.requires_grad_(False)
         | 
| 100 | 
            +
                    else:
         | 
| 101 | 
            +
                        self.reduce_cond = None        
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                    self.film_mul = nn.Linear(512 if reduce_cond is None else reduce_cond, reduce_dim)
         | 
| 104 | 
            +
                    self.film_add = nn.Linear(512 if reduce_cond is None else reduce_cond, reduce_dim)
         | 
| 105 | 
            +
                    
         | 
| 106 | 
            +
                    self.reduce = nn.Linear(768, reduce_dim)
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                    self.prompt_list = get_prompt_list(prompt)     
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                    # precomputed prompts
         | 
| 111 | 
            +
                    import pickle
         | 
| 112 | 
            +
                    if isfile('precomputed_prompt_vectors.pickle'):
         | 
| 113 | 
            +
                        precomp = pickle.load(open('precomputed_prompt_vectors.pickle', 'rb'))
         | 
| 114 | 
            +
                        self.precomputed_prompts = {k: torch.from_numpy(v) for k, v in precomp.items()}        
         | 
| 115 | 
            +
                    else:
         | 
| 116 | 
            +
                        self.precomputed_prompts = dict()
         | 
| 117 | 
            +
                
         | 
| 118 | 
            +
                def rescaled_pos_emb(self, new_size):
         | 
| 119 | 
            +
                    assert len(new_size) == 2
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                    a = self.model.positional_embedding[1:].T.view(1, 768, *self.token_shape)
         | 
| 122 | 
            +
                    b = nnf.interpolate(a, new_size, mode='bicubic', align_corners=False).squeeze(0).view(768, new_size[0]*new_size[1]).T
         | 
| 123 | 
            +
                    return torch.cat([self.model.positional_embedding[:1], b])
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                def visual_forward(self, x_inp, extract_layers=(), skip=False, mask=None):
         | 
| 126 | 
            +
                    
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                    with torch.no_grad():
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                        inp_size = x_inp.shape[2:]
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                        if self.n_tokens is not None:
         | 
| 133 | 
            +
                            stride2 = x_inp.shape[2] // self.n_tokens
         | 
| 134 | 
            +
                            conv_weight2 = nnf.interpolate(self.model.conv1.weight, (stride2, stride2), mode='bilinear', align_corners=True)
         | 
| 135 | 
            +
                            x = nnf.conv2d(x_inp, conv_weight2, bias=self.model.conv1.bias, stride=stride2, dilation=self.model.conv1.dilation)
         | 
| 136 | 
            +
                        else:
         | 
| 137 | 
            +
                            x = self.model.conv1(x_inp)  # shape = [*, width, grid, grid]
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                        x = x.reshape(x.shape[0], x.shape[1], -1)  # shape = [*, width, grid ** 2]
         | 
| 140 | 
            +
                        x = x.permute(0, 2, 1)  # shape = [*, grid ** 2, width]
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                        x = torch.cat([self.model.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1)  # shape = [*, grid ** 2 + 1, width]
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                        standard_n_tokens = 50 if self.model.conv1.kernel_size[0] == 32 else 197
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                        if x.shape[1] != standard_n_tokens:
         | 
| 147 | 
            +
                            new_shape = int(math.sqrt(x.shape[1]-1))
         | 
| 148 | 
            +
                            x = x + self.rescaled_pos_emb((new_shape, new_shape)).to(x.dtype)[None,:,:]
         | 
| 149 | 
            +
                        else:
         | 
| 150 | 
            +
                            x = x + self.model.positional_embedding.to(x.dtype)
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                        x = self.model.ln_pre(x)
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                        x = x.permute(1, 0, 2)  # NLD -> LND
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                        activations, affinities = [], []
         | 
| 157 | 
            +
                        for i, res_block in enumerate(self.model.transformer.resblocks):
         | 
| 158 | 
            +
                            
         | 
| 159 | 
            +
                            if mask is not None:
         | 
| 160 | 
            +
                                mask_layer, mask_type, mask_tensor = mask
         | 
| 161 | 
            +
                                if mask_layer == i or mask_layer == 'all':
         | 
| 162 | 
            +
                                    # import ipdb; ipdb.set_trace()
         | 
| 163 | 
            +
                                    size = int(math.sqrt(x.shape[0] - 1))
         | 
| 164 | 
            +
                                    
         | 
| 165 | 
            +
                                    attn_mask = (mask_type, nnf.interpolate(mask_tensor.unsqueeze(1).float(), (size, size)).view(mask_tensor.shape[0], size * size))
         | 
| 166 | 
            +
                                    
         | 
| 167 | 
            +
                                else:
         | 
| 168 | 
            +
                                    attn_mask = None
         | 
| 169 | 
            +
                            else:
         | 
| 170 | 
            +
                                attn_mask = None
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                            x, aff_per_head = forward_multihead_attention(x, res_block, with_aff=True, attn_mask=attn_mask)
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                            if i in extract_layers:
         | 
| 175 | 
            +
                                affinities += [aff_per_head]
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                                #if self.n_tokens is not None:
         | 
| 178 | 
            +
                                #    activations += [nnf.interpolate(x, inp_size, mode='bilinear', align_corners=True)]
         | 
| 179 | 
            +
                                #else:
         | 
| 180 | 
            +
                                activations += [x]
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                            if len(extract_layers) > 0 and i == max(extract_layers) and skip:
         | 
| 183 | 
            +
                                print('early skip')
         | 
| 184 | 
            +
                                break
         | 
| 185 | 
            +
                            
         | 
| 186 | 
            +
                        x = x.permute(1, 0, 2)  # LND -> NLD
         | 
| 187 | 
            +
                        x = self.model.ln_post(x[:, 0, :])
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                        if self.model.proj is not None:
         | 
| 190 | 
            +
                            x = x @ self.model.proj
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                        return x, activations, affinities
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                def sample_prompts(self, words, prompt_list=None):
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                    prompt_list = prompt_list if prompt_list is not None else self.prompt_list
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    prompt_indices = torch.multinomial(torch.ones(len(prompt_list)), len(words), replacement=True)
         | 
| 199 | 
            +
                    prompts = [prompt_list[i] for i in prompt_indices]
         | 
| 200 | 
            +
                    return [promt.format(w) for promt, w in zip(prompts, words)]
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                def get_cond_vec(self, conditional, batch_size):
         | 
| 203 | 
            +
                    # compute conditional from a single string
         | 
| 204 | 
            +
                    if conditional is not None and type(conditional) == str:
         | 
| 205 | 
            +
                        cond = self.compute_conditional(conditional)
         | 
| 206 | 
            +
                        cond = cond.repeat(batch_size, 1)
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                    # compute conditional from string list/tuple
         | 
| 209 | 
            +
                    elif conditional is not None and type(conditional) in {list, tuple} and type(conditional[0]) == str:
         | 
| 210 | 
            +
                        assert len(conditional) == batch_size
         | 
| 211 | 
            +
                        cond = self.compute_conditional(conditional)
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    # use conditional directly
         | 
| 214 | 
            +
                    elif conditional is not None and type(conditional) == torch.Tensor and conditional.ndim == 2:
         | 
| 215 | 
            +
                        cond = conditional
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                    # compute conditional from image
         | 
| 218 | 
            +
                    elif conditional is not None and type(conditional) == torch.Tensor:
         | 
| 219 | 
            +
                        with torch.no_grad():
         | 
| 220 | 
            +
                            cond, _, _ = self.visual_forward(conditional)
         | 
| 221 | 
            +
                    else:
         | 
| 222 | 
            +
                        raise ValueError('invalid conditional')
         | 
| 223 | 
            +
                    return cond   
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                def compute_conditional(self, conditional):
         | 
| 226 | 
            +
                    import clip
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                    dev = next(self.parameters()).device
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    if type(conditional) in {list, tuple}:
         | 
| 231 | 
            +
                        text_tokens = clip.tokenize(conditional).to(dev)
         | 
| 232 | 
            +
                        cond = self.clip_model.encode_text(text_tokens)
         | 
| 233 | 
            +
                    else:
         | 
| 234 | 
            +
                        if conditional in self.precomputed_prompts:
         | 
| 235 | 
            +
                            cond = self.precomputed_prompts[conditional].float().to(dev)
         | 
| 236 | 
            +
                        else:
         | 
| 237 | 
            +
                            text_tokens = clip.tokenize([conditional]).to(dev)
         | 
| 238 | 
            +
                            cond = self.clip_model.encode_text(text_tokens)[0]
         | 
| 239 | 
            +
                    
         | 
| 240 | 
            +
                    if self.shift_vector is not None:
         | 
| 241 | 
            +
                        return cond + self.shift_vector
         | 
| 242 | 
            +
                    else:
         | 
| 243 | 
            +
                        return cond
         | 
| 244 | 
            +
             | 
| 245 | 
            +
             | 
| 246 | 
            +
            def clip_load_untrained(version):
         | 
| 247 | 
            +
                assert version == 'ViT-B/16'
         | 
| 248 | 
            +
                from clip.model import CLIP
         | 
| 249 | 
            +
                from clip.clip import _MODELS, _download
         | 
| 250 | 
            +
                model = torch.jit.load(_download(_MODELS['ViT-B/16'])).eval()
         | 
| 251 | 
            +
                state_dict = model.state_dict()
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                vision_width = state_dict["visual.conv1.weight"].shape[0]
         | 
| 254 | 
            +
                vision_layers = len([k for k in state_dict.keys() if k.startswith("visual.") and k.endswith(".attn.in_proj_weight")])
         | 
| 255 | 
            +
                vision_patch_size = state_dict["visual.conv1.weight"].shape[-1]
         | 
| 256 | 
            +
                grid_size = round((state_dict["visual.positional_embedding"].shape[0] - 1) ** 0.5)
         | 
| 257 | 
            +
                image_resolution = vision_patch_size * grid_size
         | 
| 258 | 
            +
                embed_dim = state_dict["text_projection"].shape[1]
         | 
| 259 | 
            +
                context_length = state_dict["positional_embedding"].shape[0]
         | 
| 260 | 
            +
                vocab_size = state_dict["token_embedding.weight"].shape[0]
         | 
| 261 | 
            +
                transformer_width = state_dict["ln_final.weight"].shape[0]
         | 
| 262 | 
            +
                transformer_heads = transformer_width // 64
         | 
| 263 | 
            +
                transformer_layers = len(set(k.split(".")[2] for k in state_dict if k.startswith(f"transformer.resblocks")))
         | 
| 264 | 
            +
             | 
| 265 | 
            +
                return CLIP(embed_dim, image_resolution, vision_layers, vision_width, vision_patch_size, 
         | 
| 266 | 
            +
                    context_length, vocab_size, transformer_width, transformer_heads, transformer_layers)    
         | 
| 267 | 
            +
             | 
| 268 | 
            +
             | 
| 269 | 
            +
            class CLIPDensePredT(CLIPDenseBase):
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                def __init__(self, version='ViT-B/32', extract_layers=(3, 6, 9), cond_layer=0, reduce_dim=128, n_heads=4, prompt='fixed', 
         | 
| 272 | 
            +
                             extra_blocks=0, reduce_cond=None, fix_shift=False,
         | 
| 273 | 
            +
                             learn_trans_conv_only=False,  limit_to_clip_only=False, upsample=False, 
         | 
| 274 | 
            +
                             add_calibration=False, rev_activations=False, trans_conv=None, n_tokens=None, complex_trans_conv=False):
         | 
| 275 | 
            +
                    
         | 
| 276 | 
            +
                    super().__init__(version, reduce_cond, reduce_dim, prompt, n_tokens)
         | 
| 277 | 
            +
                    # device = 'cpu'
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                    self.extract_layers = extract_layers
         | 
| 280 | 
            +
                    self.cond_layer = cond_layer
         | 
| 281 | 
            +
                    self.limit_to_clip_only = limit_to_clip_only
         | 
| 282 | 
            +
                    self.process_cond = None
         | 
| 283 | 
            +
                    self.rev_activations = rev_activations
         | 
| 284 | 
            +
                    
         | 
| 285 | 
            +
                    depth = len(extract_layers)
         | 
| 286 | 
            +
             | 
| 287 | 
            +
                    if add_calibration:
         | 
| 288 | 
            +
                        self.calibration_conds = 1
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                    self.upsample_proj = nn.Conv2d(reduce_dim, 1, kernel_size=1) if upsample else None
         | 
| 291 | 
            +
             | 
| 292 | 
            +
                    self.add_activation1 = True
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                    self.version = version
         | 
| 295 | 
            +
                    
         | 
| 296 | 
            +
                    self.token_shape = {'ViT-B/32': (7, 7), 'ViT-B/16': (14, 14)}[version]
         | 
| 297 | 
            +
             | 
| 298 | 
            +
                    if fix_shift:
         | 
| 299 | 
            +
                        # self.shift_vector = nn.Parameter(torch.load(join(dirname(basename(__file__)), 'clip_text_shift_vector.pth')), requires_grad=False)
         | 
| 300 | 
            +
                        self.shift_vector = nn.Parameter(torch.load(join(dirname(basename(__file__)), 'shift_text_to_vis.pth')), requires_grad=False)
         | 
| 301 | 
            +
                        # self.shift_vector = nn.Parameter(-1*torch.load(join(dirname(basename(__file__)), 'shift2.pth')), requires_grad=False)
         | 
| 302 | 
            +
                    else:
         | 
| 303 | 
            +
                        self.shift_vector = None
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                    if trans_conv is None:
         | 
| 306 | 
            +
                        trans_conv_ks = {'ViT-B/32': (32, 32), 'ViT-B/16': (16, 16)}[version]
         | 
| 307 | 
            +
                    else:
         | 
| 308 | 
            +
                        # explicitly define transposed conv kernel size
         | 
| 309 | 
            +
                        trans_conv_ks = (trans_conv, trans_conv)
         | 
| 310 | 
            +
             | 
| 311 | 
            +
                    if not complex_trans_conv:
         | 
| 312 | 
            +
                        self.trans_conv = nn.ConvTranspose2d(reduce_dim, 1, trans_conv_ks, stride=trans_conv_ks)
         | 
| 313 | 
            +
                    else:
         | 
| 314 | 
            +
                        assert trans_conv_ks[0] == trans_conv_ks[1]
         | 
| 315 | 
            +
             | 
| 316 | 
            +
                        tp_kernels = (trans_conv_ks[0] // 4, trans_conv_ks[0] // 4)
         | 
| 317 | 
            +
             | 
| 318 | 
            +
                        self.trans_conv = nn.Sequential(
         | 
| 319 | 
            +
                            nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1),
         | 
| 320 | 
            +
                            nn.ReLU(),
         | 
| 321 | 
            +
                            nn.ConvTranspose2d(reduce_dim, reduce_dim // 2, kernel_size=tp_kernels[0], stride=tp_kernels[0]),
         | 
| 322 | 
            +
                            nn.ReLU(),
         | 
| 323 | 
            +
                            nn.ConvTranspose2d(reduce_dim // 2, 1, kernel_size=tp_kernels[1], stride=tp_kernels[1]),               
         | 
| 324 | 
            +
                        )
         | 
| 325 | 
            +
             | 
| 326 | 
            +
            #        self.trans_conv = nn.ConvTranspose2d(reduce_dim, 1, trans_conv_ks, stride=trans_conv_ks)
         | 
| 327 | 
            +
                    
         | 
| 328 | 
            +
                    assert len(self.extract_layers) == depth
         | 
| 329 | 
            +
             | 
| 330 | 
            +
                    self.reduces = nn.ModuleList([nn.Linear(768, reduce_dim) for _ in range(depth)])
         | 
| 331 | 
            +
                    self.blocks = nn.ModuleList([nn.TransformerEncoderLayer(d_model=reduce_dim, nhead=n_heads) for _ in range(len(self.extract_layers))])
         | 
| 332 | 
            +
                    self.extra_blocks = nn.ModuleList([nn.TransformerEncoderLayer(d_model=reduce_dim, nhead=n_heads) for _ in range(extra_blocks)])
         | 
| 333 | 
            +
                    
         | 
| 334 | 
            +
                    # refinement and trans conv
         | 
| 335 | 
            +
             | 
| 336 | 
            +
                    if learn_trans_conv_only:
         | 
| 337 | 
            +
                        for p in self.parameters():
         | 
| 338 | 
            +
                            p.requires_grad_(False)
         | 
| 339 | 
            +
                        
         | 
| 340 | 
            +
                        for p in self.trans_conv.parameters():
         | 
| 341 | 
            +
                            p.requires_grad_(True)
         | 
| 342 | 
            +
             | 
| 343 | 
            +
                    self.prompt_list = get_prompt_list(prompt)
         | 
| 344 | 
            +
             | 
| 345 | 
            +
             | 
| 346 | 
            +
                def forward(self, inp_image, conditional=None, return_features=False, mask=None):
         | 
| 347 | 
            +
             | 
| 348 | 
            +
                    assert type(return_features) == bool
         | 
| 349 | 
            +
             | 
| 350 | 
            +
                    inp_image = inp_image.to(self.model.positional_embedding.device)
         | 
| 351 | 
            +
             | 
| 352 | 
            +
                    if mask is not None:
         | 
| 353 | 
            +
                        raise ValueError('mask not supported')
         | 
| 354 | 
            +
             | 
| 355 | 
            +
                    # x_inp = normalize(inp_image)
         | 
| 356 | 
            +
                    x_inp = inp_image
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                    bs, dev = inp_image.shape[0], x_inp.device
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                    cond = self.get_cond_vec(conditional, bs)
         | 
| 361 | 
            +
             | 
| 362 | 
            +
                    visual_q, activations, _ = self.visual_forward(x_inp, extract_layers=[0] + list(self.extract_layers))
         | 
| 363 | 
            +
             | 
| 364 | 
            +
                    activation1 = activations[0]
         | 
| 365 | 
            +
                    activations = activations[1:]
         | 
| 366 | 
            +
             | 
| 367 | 
            +
                    _activations = activations[::-1] if not self.rev_activations else activations
         | 
| 368 | 
            +
             | 
| 369 | 
            +
                    a = None
         | 
| 370 | 
            +
                    for i, (activation, block, reduce) in enumerate(zip(_activations, self.blocks, self.reduces)):
         | 
| 371 | 
            +
                        
         | 
| 372 | 
            +
                        if a is not None:
         | 
| 373 | 
            +
                            a = reduce(activation) + a
         | 
| 374 | 
            +
                        else:
         | 
| 375 | 
            +
                            a = reduce(activation)
         | 
| 376 | 
            +
             | 
| 377 | 
            +
                        if i == self.cond_layer:
         | 
| 378 | 
            +
                            if self.reduce_cond is not None:
         | 
| 379 | 
            +
                                cond = self.reduce_cond(cond)
         | 
| 380 | 
            +
                            
         | 
| 381 | 
            +
                            a = self.film_mul(cond) * a + self.film_add(cond)
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                        a = block(a)
         | 
| 384 | 
            +
             | 
| 385 | 
            +
                    for block in self.extra_blocks:
         | 
| 386 | 
            +
                        a = a + block(a)
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                    a = a[1:].permute(1, 2, 0) # rm cls token and -> BS, Feats, Tokens
         | 
| 389 | 
            +
             | 
| 390 | 
            +
                    size = int(math.sqrt(a.shape[2]))
         | 
| 391 | 
            +
             | 
| 392 | 
            +
                    a = a.view(bs, a.shape[1], size, size)
         | 
| 393 | 
            +
             | 
| 394 | 
            +
                    a = self.trans_conv(a)
         | 
| 395 | 
            +
             | 
| 396 | 
            +
                    if self.n_tokens is not None:
         | 
| 397 | 
            +
                        a = nnf.interpolate(a, x_inp.shape[2:], mode='bilinear', align_corners=True) 
         | 
| 398 | 
            +
             | 
| 399 | 
            +
                    if self.upsample_proj is not None:
         | 
| 400 | 
            +
                        a = self.upsample_proj(a)
         | 
| 401 | 
            +
                        a = nnf.interpolate(a, x_inp.shape[2:], mode='bilinear')
         | 
| 402 | 
            +
             | 
| 403 | 
            +
                    if return_features:
         | 
| 404 | 
            +
                        return a, visual_q, cond, [activation1] + activations
         | 
| 405 | 
            +
                    else:
         | 
| 406 | 
            +
                        return a,
         | 
| 407 | 
            +
             | 
| 408 | 
            +
             | 
| 409 | 
            +
             | 
| 410 | 
            +
            class CLIPDensePredTMasked(CLIPDensePredT):
         | 
| 411 | 
            +
             | 
| 412 | 
            +
                def __init__(self, version='ViT-B/32', extract_layers=(3, 6, 9), cond_layer=0, reduce_dim=128, n_heads=4, 
         | 
| 413 | 
            +
                             prompt='fixed', extra_blocks=0, reduce_cond=None, fix_shift=False, learn_trans_conv_only=False, 
         | 
| 414 | 
            +
                             refine=None, limit_to_clip_only=False, upsample=False, add_calibration=False, n_tokens=None):
         | 
| 415 | 
            +
             | 
| 416 | 
            +
                    super().__init__(version=version, extract_layers=extract_layers, cond_layer=cond_layer, reduce_dim=reduce_dim, 
         | 
| 417 | 
            +
                                     n_heads=n_heads, prompt=prompt, extra_blocks=extra_blocks, reduce_cond=reduce_cond, 
         | 
| 418 | 
            +
                                     fix_shift=fix_shift, learn_trans_conv_only=learn_trans_conv_only,
         | 
| 419 | 
            +
                                     limit_to_clip_only=limit_to_clip_only, upsample=upsample, add_calibration=add_calibration,
         | 
| 420 | 
            +
                                     n_tokens=n_tokens)
         | 
| 421 | 
            +
             | 
| 422 | 
            +
                def visual_forward_masked(self, img_s, seg_s):
         | 
| 423 | 
            +
                    return super().visual_forward(img_s, mask=('all', 'cls_token', seg_s))
         | 
| 424 | 
            +
             | 
| 425 | 
            +
                def forward(self, img_q, cond_or_img_s, seg_s=None, return_features=False):
         | 
| 426 | 
            +
             | 
| 427 | 
            +
                    if seg_s is None:
         | 
| 428 | 
            +
                        cond = cond_or_img_s
         | 
| 429 | 
            +
                    else:
         | 
| 430 | 
            +
                        img_s = cond_or_img_s
         | 
| 431 | 
            +
             | 
| 432 | 
            +
                        with torch.no_grad():
         | 
| 433 | 
            +
                            cond, _, _ = self.visual_forward_masked(img_s, seg_s)
         | 
| 434 | 
            +
             | 
| 435 | 
            +
                    return super().forward(img_q, cond, return_features=return_features)
         | 
| 436 | 
            +
             | 
| 437 | 
            +
             | 
| 438 | 
            +
             | 
| 439 | 
            +
            class CLIPDenseBaseline(CLIPDenseBase):
         | 
| 440 | 
            +
             | 
| 441 | 
            +
                def __init__(self, version='ViT-B/32', cond_layer=0, 
         | 
| 442 | 
            +
                            extract_layer=9, reduce_dim=128, reduce2_dim=None, prompt='fixed', 
         | 
| 443 | 
            +
                             reduce_cond=None, limit_to_clip_only=False, n_tokens=None):
         | 
| 444 | 
            +
                    
         | 
| 445 | 
            +
                    super().__init__(version, reduce_cond, reduce_dim, prompt, n_tokens)
         | 
| 446 | 
            +
                    device = 'cpu'
         | 
| 447 | 
            +
             | 
| 448 | 
            +
                    # self.cond_layer = cond_layer
         | 
| 449 | 
            +
                    self.extract_layer = extract_layer
         | 
| 450 | 
            +
                    self.limit_to_clip_only = limit_to_clip_only
         | 
| 451 | 
            +
                    self.shift_vector = None
         | 
| 452 | 
            +
             | 
| 453 | 
            +
                    self.token_shape = {'ViT-B/32': (7, 7), 'ViT-B/16': (14, 14)}[version]
         | 
| 454 | 
            +
                    
         | 
| 455 | 
            +
                    assert reduce2_dim is not None
         | 
| 456 | 
            +
             | 
| 457 | 
            +
                    self.reduce2 = nn.Sequential(
         | 
| 458 | 
            +
                        nn.Linear(reduce_dim, reduce2_dim),
         | 
| 459 | 
            +
                        nn.ReLU(),
         | 
| 460 | 
            +
                        nn.Linear(reduce2_dim, reduce_dim)
         | 
| 461 | 
            +
                    )
         | 
| 462 | 
            +
                    
         | 
| 463 | 
            +
                    trans_conv_ks = {'ViT-B/32': (32, 32), 'ViT-B/16': (16, 16)}[version]
         | 
| 464 | 
            +
                    self.trans_conv = nn.ConvTranspose2d(reduce_dim, 1, trans_conv_ks, stride=trans_conv_ks)
         | 
| 465 | 
            +
             | 
| 466 | 
            +
             | 
| 467 | 
            +
                def forward(self, inp_image, conditional=None, return_features=False):
         | 
| 468 | 
            +
             | 
| 469 | 
            +
                    inp_image = inp_image.to(self.model.positional_embedding.device)
         | 
| 470 | 
            +
             | 
| 471 | 
            +
                    # x_inp = normalize(inp_image)
         | 
| 472 | 
            +
                    x_inp = inp_image
         | 
| 473 | 
            +
             | 
| 474 | 
            +
                    bs, dev = inp_image.shape[0], x_inp.device
         | 
| 475 | 
            +
             | 
| 476 | 
            +
                    cond = self.get_cond_vec(conditional, bs)
         | 
| 477 | 
            +
             | 
| 478 | 
            +
                    visual_q, activations, affinities = self.visual_forward(x_inp, extract_layers=[self.extract_layer])
         | 
| 479 | 
            +
             | 
| 480 | 
            +
                    a = activations[0]
         | 
| 481 | 
            +
                    a = self.reduce(a)
         | 
| 482 | 
            +
                    a = self.film_mul(cond) * a + self.film_add(cond)
         | 
| 483 | 
            +
             | 
| 484 | 
            +
                    if self.reduce2 is not None:
         | 
| 485 | 
            +
                        a = self.reduce2(a)
         | 
| 486 | 
            +
             | 
| 487 | 
            +
                    # the original model would execute a transformer block here
         | 
| 488 | 
            +
             | 
| 489 | 
            +
                    a = a[1:].permute(1, 2, 0) # rm cls token and -> BS, Feats, Tokens
         | 
| 490 | 
            +
             | 
| 491 | 
            +
                    size = int(math.sqrt(a.shape[2]))
         | 
| 492 | 
            +
             | 
| 493 | 
            +
                    a = a.view(bs, a.shape[1], size, size)
         | 
| 494 | 
            +
                    a = self.trans_conv(a)
         | 
| 495 | 
            +
             | 
| 496 | 
            +
                    if return_features:
         | 
| 497 | 
            +
                        return a, visual_q, cond, activations
         | 
| 498 | 
            +
                    else:
         | 
| 499 | 
            +
                        return a,
         | 
| 500 | 
            +
             | 
| 501 | 
            +
             | 
| 502 | 
            +
            class CLIPSegMultiLabel(nn.Module):
         | 
| 503 | 
            +
             | 
| 504 | 
            +
                def __init__(self, model) -> None:
         | 
| 505 | 
            +
                    super().__init__()
         | 
| 506 | 
            +
             | 
| 507 | 
            +
                    from third_party.JoEm.data_loader import get_seen_idx, get_unseen_idx, VOC
         | 
| 508 | 
            +
             | 
| 509 | 
            +
                    self.pascal_classes = VOC
         | 
| 510 | 
            +
             | 
| 511 | 
            +
                    from clip.clipseg import CLIPDensePredT
         | 
| 512 | 
            +
                    from general_utils import load_model
         | 
| 513 | 
            +
                    # self.clipseg = load_model('rd64-vit16-neg0.2-phrasecut', strict=False)
         | 
| 514 | 
            +
                    self.clipseg = load_model(model, strict=False)
         | 
| 515 | 
            +
                    
         | 
| 516 | 
            +
                    self.clipseg.eval()
         | 
| 517 | 
            +
             | 
| 518 | 
            +
                def forward(self, x):
         | 
| 519 | 
            +
             | 
| 520 | 
            +
                    bs = x.shape[0]
         | 
| 521 | 
            +
                    out = torch.ones(21, bs, 352, 352).to(x.device) * -10
         | 
| 522 | 
            +
             | 
| 523 | 
            +
                    for class_id, class_name in enumerate(self.pascal_classes):
         | 
| 524 | 
            +
                    
         | 
| 525 | 
            +
                        fac = 3 if class_name == 'background' else 1
         | 
| 526 | 
            +
             | 
| 527 | 
            +
                        with torch.no_grad():
         | 
| 528 | 
            +
                            pred = torch.sigmoid(self.clipseg(x, class_name)[0][:,0]) * fac
         | 
| 529 | 
            +
             | 
| 530 | 
            +
                        out[class_id] += pred
         | 
| 531 | 
            +
             | 
| 532 | 
            +
             | 
| 533 | 
            +
                    out = out.permute(1, 0, 2, 3)
         | 
| 534 | 
            +
             | 
| 535 | 
            +
                    return out
         | 
| 536 | 
            +
             | 
| 537 | 
            +
                    # construct output tensor
         | 
| 538 | 
            +
                                
         | 
    	
        clip/model.py
    ADDED
    
    | @@ -0,0 +1,436 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from collections import OrderedDict
         | 
| 2 | 
            +
            from typing import Tuple, Union
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import numpy as np
         | 
| 5 | 
            +
            import torch
         | 
| 6 | 
            +
            import torch.nn.functional as F
         | 
| 7 | 
            +
            from torch import nn
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class Bottleneck(nn.Module):
         | 
| 11 | 
            +
                expansion = 4
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def __init__(self, inplanes, planes, stride=1):
         | 
| 14 | 
            +
                    super().__init__()
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                    # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1
         | 
| 17 | 
            +
                    self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False)
         | 
| 18 | 
            +
                    self.bn1 = nn.BatchNorm2d(planes)
         | 
| 19 | 
            +
                    self.relu1 = nn.ReLU(inplace=True)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False)
         | 
| 22 | 
            +
                    self.bn2 = nn.BatchNorm2d(planes)
         | 
| 23 | 
            +
                    self.relu2 = nn.ReLU(inplace=True)
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity()
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False)
         | 
| 28 | 
            +
                    self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         | 
| 29 | 
            +
                    self.relu3 = nn.ReLU(inplace=True)
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    self.downsample = None
         | 
| 32 | 
            +
                    self.stride = stride
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                    if stride > 1 or inplanes != planes * Bottleneck.expansion:
         | 
| 35 | 
            +
                        # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1
         | 
| 36 | 
            +
                        self.downsample = nn.Sequential(OrderedDict([
         | 
| 37 | 
            +
                            ("-1", nn.AvgPool2d(stride)),
         | 
| 38 | 
            +
                            ("0", nn.Conv2d(inplanes, planes * self.expansion, 1, stride=1, bias=False)),
         | 
| 39 | 
            +
                            ("1", nn.BatchNorm2d(planes * self.expansion))
         | 
| 40 | 
            +
                        ]))
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def forward(self, x: torch.Tensor):
         | 
| 43 | 
            +
                    identity = x
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                    out = self.relu1(self.bn1(self.conv1(x)))
         | 
| 46 | 
            +
                    out = self.relu2(self.bn2(self.conv2(out)))
         | 
| 47 | 
            +
                    out = self.avgpool(out)
         | 
| 48 | 
            +
                    out = self.bn3(self.conv3(out))
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                    if self.downsample is not None:
         | 
| 51 | 
            +
                        identity = self.downsample(x)
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                    out += identity
         | 
| 54 | 
            +
                    out = self.relu3(out)
         | 
| 55 | 
            +
                    return out
         | 
| 56 | 
            +
             | 
| 57 | 
            +
             | 
| 58 | 
            +
            class AttentionPool2d(nn.Module):
         | 
| 59 | 
            +
                def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None):
         | 
| 60 | 
            +
                    super().__init__()
         | 
| 61 | 
            +
                    self.positional_embedding = nn.Parameter(torch.randn(spacial_dim ** 2 + 1, embed_dim) / embed_dim ** 0.5)
         | 
| 62 | 
            +
                    self.k_proj = nn.Linear(embed_dim, embed_dim)
         | 
| 63 | 
            +
                    self.q_proj = nn.Linear(embed_dim, embed_dim)
         | 
| 64 | 
            +
                    self.v_proj = nn.Linear(embed_dim, embed_dim)
         | 
| 65 | 
            +
                    self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim)
         | 
| 66 | 
            +
                    self.num_heads = num_heads
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def forward(self, x):
         | 
| 69 | 
            +
                    x = x.flatten(start_dim=2).permute(2, 0, 1)  # NCHW -> (HW)NC
         | 
| 70 | 
            +
                    x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0)  # (HW+1)NC
         | 
| 71 | 
            +
                    x = x + self.positional_embedding[:, None, :].to(x.dtype)  # (HW+1)NC
         | 
| 72 | 
            +
                    x, _ = F.multi_head_attention_forward(
         | 
| 73 | 
            +
                        query=x[:1], key=x, value=x,
         | 
| 74 | 
            +
                        embed_dim_to_check=x.shape[-1],
         | 
| 75 | 
            +
                        num_heads=self.num_heads,
         | 
| 76 | 
            +
                        q_proj_weight=self.q_proj.weight,
         | 
| 77 | 
            +
                        k_proj_weight=self.k_proj.weight,
         | 
| 78 | 
            +
                        v_proj_weight=self.v_proj.weight,
         | 
| 79 | 
            +
                        in_proj_weight=None,
         | 
| 80 | 
            +
                        in_proj_bias=torch.cat([self.q_proj.bias, self.k_proj.bias, self.v_proj.bias]),
         | 
| 81 | 
            +
                        bias_k=None,
         | 
| 82 | 
            +
                        bias_v=None,
         | 
| 83 | 
            +
                        add_zero_attn=False,
         | 
| 84 | 
            +
                        dropout_p=0,
         | 
| 85 | 
            +
                        out_proj_weight=self.c_proj.weight,
         | 
| 86 | 
            +
                        out_proj_bias=self.c_proj.bias,
         | 
| 87 | 
            +
                        use_separate_proj_weight=True,
         | 
| 88 | 
            +
                        training=self.training,
         | 
| 89 | 
            +
                        need_weights=False
         | 
| 90 | 
            +
                    )
         | 
| 91 | 
            +
                    return x.squeeze(0)
         | 
| 92 | 
            +
             | 
| 93 | 
            +
             | 
| 94 | 
            +
            class ModifiedResNet(nn.Module):
         | 
| 95 | 
            +
                """
         | 
| 96 | 
            +
                A ResNet class that is similar to torchvision's but contains the following changes:
         | 
| 97 | 
            +
                - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool.
         | 
| 98 | 
            +
                - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1
         | 
| 99 | 
            +
                - The final pooling layer is a QKV attention instead of an average pool
         | 
| 100 | 
            +
                """
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def __init__(self, layers, output_dim, heads, input_resolution=224, width=64):
         | 
| 103 | 
            +
                    super().__init__()
         | 
| 104 | 
            +
                    self.output_dim = output_dim
         | 
| 105 | 
            +
                    self.input_resolution = input_resolution
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    # the 3-layer stem
         | 
| 108 | 
            +
                    self.conv1 = nn.Conv2d(3, width // 2, kernel_size=3, stride=2, padding=1, bias=False)
         | 
| 109 | 
            +
                    self.bn1 = nn.BatchNorm2d(width // 2)
         | 
| 110 | 
            +
                    self.relu1 = nn.ReLU(inplace=True)
         | 
| 111 | 
            +
                    self.conv2 = nn.Conv2d(width // 2, width // 2, kernel_size=3, padding=1, bias=False)
         | 
| 112 | 
            +
                    self.bn2 = nn.BatchNorm2d(width // 2)
         | 
| 113 | 
            +
                    self.relu2 = nn.ReLU(inplace=True)
         | 
| 114 | 
            +
                    self.conv3 = nn.Conv2d(width // 2, width, kernel_size=3, padding=1, bias=False)
         | 
| 115 | 
            +
                    self.bn3 = nn.BatchNorm2d(width)
         | 
| 116 | 
            +
                    self.relu3 = nn.ReLU(inplace=True)
         | 
| 117 | 
            +
                    self.avgpool = nn.AvgPool2d(2)
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                    # residual layers
         | 
| 120 | 
            +
                    self._inplanes = width  # this is a *mutable* variable used during construction
         | 
| 121 | 
            +
                    self.layer1 = self._make_layer(width, layers[0])
         | 
| 122 | 
            +
                    self.layer2 = self._make_layer(width * 2, layers[1], stride=2)
         | 
| 123 | 
            +
                    self.layer3 = self._make_layer(width * 4, layers[2], stride=2)
         | 
| 124 | 
            +
                    self.layer4 = self._make_layer(width * 8, layers[3], stride=2)
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                    embed_dim = width * 32  # the ResNet feature dimension
         | 
| 127 | 
            +
                    self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim)
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                def _make_layer(self, planes, blocks, stride=1):
         | 
| 130 | 
            +
                    layers = [Bottleneck(self._inplanes, planes, stride)]
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                    self._inplanes = planes * Bottleneck.expansion
         | 
| 133 | 
            +
                    for _ in range(1, blocks):
         | 
| 134 | 
            +
                        layers.append(Bottleneck(self._inplanes, planes))
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                    return nn.Sequential(*layers)
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                def forward(self, x):
         | 
| 139 | 
            +
                    def stem(x):
         | 
| 140 | 
            +
                        x = self.relu1(self.bn1(self.conv1(x)))
         | 
| 141 | 
            +
                        x = self.relu2(self.bn2(self.conv2(x)))
         | 
| 142 | 
            +
                        x = self.relu3(self.bn3(self.conv3(x)))
         | 
| 143 | 
            +
                        x = self.avgpool(x)
         | 
| 144 | 
            +
                        return x
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                    x = x.type(self.conv1.weight.dtype)
         | 
| 147 | 
            +
                    x = stem(x)
         | 
| 148 | 
            +
                    x = self.layer1(x)
         | 
| 149 | 
            +
                    x = self.layer2(x)
         | 
| 150 | 
            +
                    x = self.layer3(x)
         | 
| 151 | 
            +
                    x = self.layer4(x)
         | 
| 152 | 
            +
                    x = self.attnpool(x)
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                    return x
         | 
| 155 | 
            +
             | 
| 156 | 
            +
             | 
| 157 | 
            +
            class LayerNorm(nn.LayerNorm):
         | 
| 158 | 
            +
                """Subclass torch's LayerNorm to handle fp16."""
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                def forward(self, x: torch.Tensor):
         | 
| 161 | 
            +
                    orig_type = x.dtype
         | 
| 162 | 
            +
                    ret = super().forward(x.type(torch.float32))
         | 
| 163 | 
            +
                    return ret.type(orig_type)
         | 
| 164 | 
            +
             | 
| 165 | 
            +
             | 
| 166 | 
            +
            class QuickGELU(nn.Module):
         | 
| 167 | 
            +
                def forward(self, x: torch.Tensor):
         | 
| 168 | 
            +
                    return x * torch.sigmoid(1.702 * x)
         | 
| 169 | 
            +
             | 
| 170 | 
            +
             | 
| 171 | 
            +
            class ResidualAttentionBlock(nn.Module):
         | 
| 172 | 
            +
                def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None):
         | 
| 173 | 
            +
                    super().__init__()
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                    self.attn = nn.MultiheadAttention(d_model, n_head)
         | 
| 176 | 
            +
                    self.ln_1 = LayerNorm(d_model)
         | 
| 177 | 
            +
                    self.mlp = nn.Sequential(OrderedDict([
         | 
| 178 | 
            +
                        ("c_fc", nn.Linear(d_model, d_model * 4)),
         | 
| 179 | 
            +
                        ("gelu", QuickGELU()),
         | 
| 180 | 
            +
                        ("c_proj", nn.Linear(d_model * 4, d_model))
         | 
| 181 | 
            +
                    ]))
         | 
| 182 | 
            +
                    self.ln_2 = LayerNorm(d_model)
         | 
| 183 | 
            +
                    self.attn_mask = attn_mask
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                def attention(self, x: torch.Tensor):
         | 
| 186 | 
            +
                    self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None
         | 
| 187 | 
            +
                    return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0]
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                def forward(self, x: torch.Tensor):
         | 
| 190 | 
            +
                    x = x + self.attention(self.ln_1(x))
         | 
| 191 | 
            +
                    x = x + self.mlp(self.ln_2(x))
         | 
| 192 | 
            +
                    return x
         | 
| 193 | 
            +
             | 
| 194 | 
            +
             | 
| 195 | 
            +
            class Transformer(nn.Module):
         | 
| 196 | 
            +
                def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None):
         | 
| 197 | 
            +
                    super().__init__()
         | 
| 198 | 
            +
                    self.width = width
         | 
| 199 | 
            +
                    self.layers = layers
         | 
| 200 | 
            +
                    self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)])
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                def forward(self, x: torch.Tensor):
         | 
| 203 | 
            +
                    return self.resblocks(x)
         | 
| 204 | 
            +
             | 
| 205 | 
            +
             | 
| 206 | 
            +
            class VisionTransformer(nn.Module):
         | 
| 207 | 
            +
                def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int):
         | 
| 208 | 
            +
                    super().__init__()
         | 
| 209 | 
            +
                    self.input_resolution = input_resolution
         | 
| 210 | 
            +
                    self.output_dim = output_dim
         | 
| 211 | 
            +
                    self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False)
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    scale = width ** -0.5
         | 
| 214 | 
            +
                    self.class_embedding = nn.Parameter(scale * torch.randn(width))
         | 
| 215 | 
            +
                    self.positional_embedding = nn.Parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width))
         | 
| 216 | 
            +
                    self.ln_pre = LayerNorm(width)
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                    self.transformer = Transformer(width, layers, heads)
         | 
| 219 | 
            +
             | 
| 220 | 
            +
                    self.ln_post = LayerNorm(width)
         | 
| 221 | 
            +
                    self.proj = nn.Parameter(scale * torch.randn(width, output_dim))
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                def forward(self, x: torch.Tensor):
         | 
| 224 | 
            +
                    x = self.conv1(x)  # shape = [*, width, grid, grid]
         | 
| 225 | 
            +
                    x = x.reshape(x.shape[0], x.shape[1], -1)  # shape = [*, width, grid ** 2]
         | 
| 226 | 
            +
                    x = x.permute(0, 2, 1)  # shape = [*, grid ** 2, width]
         | 
| 227 | 
            +
                    x = torch.cat([self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1)  # shape = [*, grid ** 2 + 1, width]
         | 
| 228 | 
            +
                    x = x + self.positional_embedding.to(x.dtype)
         | 
| 229 | 
            +
                    x = self.ln_pre(x)
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                    x = x.permute(1, 0, 2)  # NLD -> LND
         | 
| 232 | 
            +
                    x = self.transformer(x)
         | 
| 233 | 
            +
                    x = x.permute(1, 0, 2)  # LND -> NLD
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    x = self.ln_post(x[:, 0, :])
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    if self.proj is not None:
         | 
| 238 | 
            +
                        x = x @ self.proj
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                    return x
         | 
| 241 | 
            +
             | 
| 242 | 
            +
             | 
| 243 | 
            +
            class CLIP(nn.Module):
         | 
| 244 | 
            +
                def __init__(self,
         | 
| 245 | 
            +
                             embed_dim: int,
         | 
| 246 | 
            +
                             # vision
         | 
| 247 | 
            +
                             image_resolution: int,
         | 
| 248 | 
            +
                             vision_layers: Union[Tuple[int, int, int, int], int],
         | 
| 249 | 
            +
                             vision_width: int,
         | 
| 250 | 
            +
                             vision_patch_size: int,
         | 
| 251 | 
            +
                             # text
         | 
| 252 | 
            +
                             context_length: int,
         | 
| 253 | 
            +
                             vocab_size: int,
         | 
| 254 | 
            +
                             transformer_width: int,
         | 
| 255 | 
            +
                             transformer_heads: int,
         | 
| 256 | 
            +
                             transformer_layers: int
         | 
| 257 | 
            +
                             ):
         | 
| 258 | 
            +
                    super().__init__()
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                    self.context_length = context_length
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                    if isinstance(vision_layers, (tuple, list)):
         | 
| 263 | 
            +
                        vision_heads = vision_width * 32 // 64
         | 
| 264 | 
            +
                        self.visual = ModifiedResNet(
         | 
| 265 | 
            +
                            layers=vision_layers,
         | 
| 266 | 
            +
                            output_dim=embed_dim,
         | 
| 267 | 
            +
                            heads=vision_heads,
         | 
| 268 | 
            +
                            input_resolution=image_resolution,
         | 
| 269 | 
            +
                            width=vision_width
         | 
| 270 | 
            +
                        )
         | 
| 271 | 
            +
                    else:
         | 
| 272 | 
            +
                        vision_heads = vision_width // 64
         | 
| 273 | 
            +
                        self.visual = VisionTransformer(
         | 
| 274 | 
            +
                            input_resolution=image_resolution,
         | 
| 275 | 
            +
                            patch_size=vision_patch_size,
         | 
| 276 | 
            +
                            width=vision_width,
         | 
| 277 | 
            +
                            layers=vision_layers,
         | 
| 278 | 
            +
                            heads=vision_heads,
         | 
| 279 | 
            +
                            output_dim=embed_dim
         | 
| 280 | 
            +
                        )
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                    self.transformer = Transformer(
         | 
| 283 | 
            +
                        width=transformer_width,
         | 
| 284 | 
            +
                        layers=transformer_layers,
         | 
| 285 | 
            +
                        heads=transformer_heads,
         | 
| 286 | 
            +
                        attn_mask=self.build_attention_mask()
         | 
| 287 | 
            +
                    )
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                    self.vocab_size = vocab_size
         | 
| 290 | 
            +
                    self.token_embedding = nn.Embedding(vocab_size, transformer_width)
         | 
| 291 | 
            +
                    self.positional_embedding = nn.Parameter(torch.empty(self.context_length, transformer_width))
         | 
| 292 | 
            +
                    self.ln_final = LayerNorm(transformer_width)
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                    self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim))
         | 
| 295 | 
            +
                    self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                    self.initialize_parameters()
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                def initialize_parameters(self):
         | 
| 300 | 
            +
                    nn.init.normal_(self.token_embedding.weight, std=0.02)
         | 
| 301 | 
            +
                    nn.init.normal_(self.positional_embedding, std=0.01)
         | 
| 302 | 
            +
             | 
| 303 | 
            +
                    if isinstance(self.visual, ModifiedResNet):
         | 
| 304 | 
            +
                        if self.visual.attnpool is not None:
         | 
| 305 | 
            +
                            std = self.visual.attnpool.c_proj.in_features ** -0.5
         | 
| 306 | 
            +
                            nn.init.normal_(self.visual.attnpool.q_proj.weight, std=std)
         | 
| 307 | 
            +
                            nn.init.normal_(self.visual.attnpool.k_proj.weight, std=std)
         | 
| 308 | 
            +
                            nn.init.normal_(self.visual.attnpool.v_proj.weight, std=std)
         | 
| 309 | 
            +
                            nn.init.normal_(self.visual.attnpool.c_proj.weight, std=std)
         | 
| 310 | 
            +
             | 
| 311 | 
            +
                        for resnet_block in [self.visual.layer1, self.visual.layer2, self.visual.layer3, self.visual.layer4]:
         | 
| 312 | 
            +
                            for name, param in resnet_block.named_parameters():
         | 
| 313 | 
            +
                                if name.endswith("bn3.weight"):
         | 
| 314 | 
            +
                                    nn.init.zeros_(param)
         | 
| 315 | 
            +
             | 
| 316 | 
            +
                    proj_std = (self.transformer.width ** -0.5) * ((2 * self.transformer.layers) ** -0.5)
         | 
| 317 | 
            +
                    attn_std = self.transformer.width ** -0.5
         | 
| 318 | 
            +
                    fc_std = (2 * self.transformer.width) ** -0.5
         | 
| 319 | 
            +
                    for block in self.transformer.resblocks:
         | 
| 320 | 
            +
                        nn.init.normal_(block.attn.in_proj_weight, std=attn_std)
         | 
| 321 | 
            +
                        nn.init.normal_(block.attn.out_proj.weight, std=proj_std)
         | 
| 322 | 
            +
                        nn.init.normal_(block.mlp.c_fc.weight, std=fc_std)
         | 
| 323 | 
            +
                        nn.init.normal_(block.mlp.c_proj.weight, std=proj_std)
         | 
| 324 | 
            +
             | 
| 325 | 
            +
                    if self.text_projection is not None:
         | 
| 326 | 
            +
                        nn.init.normal_(self.text_projection, std=self.transformer.width ** -0.5)
         | 
| 327 | 
            +
             | 
| 328 | 
            +
                def build_attention_mask(self):
         | 
| 329 | 
            +
                    # lazily create causal attention mask, with full attention between the vision tokens
         | 
| 330 | 
            +
                    # pytorch uses additive attention mask; fill with -inf
         | 
| 331 | 
            +
                    mask = torch.empty(self.context_length, self.context_length)
         | 
| 332 | 
            +
                    mask.fill_(float("-inf"))
         | 
| 333 | 
            +
                    mask.triu_(1)  # zero out the lower diagonal
         | 
| 334 | 
            +
                    return mask
         | 
| 335 | 
            +
             | 
| 336 | 
            +
                @property
         | 
| 337 | 
            +
                def dtype(self):
         | 
| 338 | 
            +
                    return self.visual.conv1.weight.dtype
         | 
| 339 | 
            +
             | 
| 340 | 
            +
                def encode_image(self, image):
         | 
| 341 | 
            +
                    return self.visual(image.type(self.dtype))
         | 
| 342 | 
            +
             | 
| 343 | 
            +
                def encode_text(self, text):
         | 
| 344 | 
            +
                    x = self.token_embedding(text).type(self.dtype)  # [batch_size, n_ctx, d_model]
         | 
| 345 | 
            +
             | 
| 346 | 
            +
                    x = x + self.positional_embedding.type(self.dtype)
         | 
| 347 | 
            +
                    x = x.permute(1, 0, 2)  # NLD -> LND
         | 
| 348 | 
            +
                    x = self.transformer(x)
         | 
| 349 | 
            +
                    x = x.permute(1, 0, 2)  # LND -> NLD
         | 
| 350 | 
            +
                    x = self.ln_final(x).type(self.dtype)
         | 
| 351 | 
            +
             | 
| 352 | 
            +
                    # x.shape = [batch_size, n_ctx, transformer.width]
         | 
| 353 | 
            +
                    # take features from the eot embedding (eot_token is the highest number in each sequence)
         | 
| 354 | 
            +
                    x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection
         | 
| 355 | 
            +
             | 
| 356 | 
            +
                    return x
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                def forward(self, image, text):
         | 
| 359 | 
            +
                    image_features = self.encode_image(image)
         | 
| 360 | 
            +
                    text_features = self.encode_text(text)
         | 
| 361 | 
            +
             | 
| 362 | 
            +
                    # normalized features
         | 
| 363 | 
            +
                    image_features = image_features / image_features.norm(dim=1, keepdim=True)
         | 
| 364 | 
            +
                    text_features = text_features / text_features.norm(dim=1, keepdim=True)
         | 
| 365 | 
            +
             | 
| 366 | 
            +
                    # cosine similarity as logits
         | 
| 367 | 
            +
                    logit_scale = self.logit_scale.exp()
         | 
| 368 | 
            +
                    logits_per_image = logit_scale * image_features @ text_features.t()
         | 
| 369 | 
            +
                    logits_per_text = logits_per_image.t()
         | 
| 370 | 
            +
             | 
| 371 | 
            +
                    # shape = [global_batch_size, global_batch_size]
         | 
| 372 | 
            +
                    return logits_per_image, logits_per_text
         | 
| 373 | 
            +
             | 
| 374 | 
            +
             | 
| 375 | 
            +
            def convert_weights(model: nn.Module):
         | 
| 376 | 
            +
                """Convert applicable model parameters to fp16"""
         | 
| 377 | 
            +
             | 
| 378 | 
            +
                def _convert_weights_to_fp16(l):
         | 
| 379 | 
            +
                    if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)):
         | 
| 380 | 
            +
                        l.weight.data = l.weight.data.half()
         | 
| 381 | 
            +
                        if l.bias is not None:
         | 
| 382 | 
            +
                            l.bias.data = l.bias.data.half()
         | 
| 383 | 
            +
             | 
| 384 | 
            +
                    if isinstance(l, nn.MultiheadAttention):
         | 
| 385 | 
            +
                        for attr in [*[f"{s}_proj_weight" for s in ["in", "q", "k", "v"]], "in_proj_bias", "bias_k", "bias_v"]:
         | 
| 386 | 
            +
                            tensor = getattr(l, attr)
         | 
| 387 | 
            +
                            if tensor is not None:
         | 
| 388 | 
            +
                                tensor.data = tensor.data.half()
         | 
| 389 | 
            +
             | 
| 390 | 
            +
                    for name in ["text_projection", "proj"]:
         | 
| 391 | 
            +
                        if hasattr(l, name):
         | 
| 392 | 
            +
                            attr = getattr(l, name)
         | 
| 393 | 
            +
                            if attr is not None:
         | 
| 394 | 
            +
                                attr.data = attr.data.half()
         | 
| 395 | 
            +
             | 
| 396 | 
            +
                model.apply(_convert_weights_to_fp16)
         | 
| 397 | 
            +
             | 
| 398 | 
            +
             | 
| 399 | 
            +
            def build_model(state_dict: dict):
         | 
| 400 | 
            +
                vit = "visual.proj" in state_dict
         | 
| 401 | 
            +
             | 
| 402 | 
            +
                if vit:
         | 
| 403 | 
            +
                    vision_width = state_dict["visual.conv1.weight"].shape[0]
         | 
| 404 | 
            +
                    vision_layers = len([k for k in state_dict.keys() if k.startswith("visual.") and k.endswith(".attn.in_proj_weight")])
         | 
| 405 | 
            +
                    vision_patch_size = state_dict["visual.conv1.weight"].shape[-1]
         | 
| 406 | 
            +
                    grid_size = round((state_dict["visual.positional_embedding"].shape[0] - 1) ** 0.5)
         | 
| 407 | 
            +
                    image_resolution = vision_patch_size * grid_size
         | 
| 408 | 
            +
                else:
         | 
| 409 | 
            +
                    counts: list = [len(set(k.split(".")[2] for k in state_dict if k.startswith(f"visual.layer{b}"))) for b in [1, 2, 3, 4]]
         | 
| 410 | 
            +
                    vision_layers = tuple(counts)
         | 
| 411 | 
            +
                    vision_width = state_dict["visual.layer1.0.conv1.weight"].shape[0]
         | 
| 412 | 
            +
                    output_width = round((state_dict["visual.attnpool.positional_embedding"].shape[0] - 1) ** 0.5)
         | 
| 413 | 
            +
                    vision_patch_size = None
         | 
| 414 | 
            +
                    assert output_width ** 2 + 1 == state_dict["visual.attnpool.positional_embedding"].shape[0]
         | 
| 415 | 
            +
                    image_resolution = output_width * 32
         | 
| 416 | 
            +
             | 
| 417 | 
            +
                embed_dim = state_dict["text_projection"].shape[1]
         | 
| 418 | 
            +
                context_length = state_dict["positional_embedding"].shape[0]
         | 
| 419 | 
            +
                vocab_size = state_dict["token_embedding.weight"].shape[0]
         | 
| 420 | 
            +
                transformer_width = state_dict["ln_final.weight"].shape[0]
         | 
| 421 | 
            +
                transformer_heads = transformer_width // 64
         | 
| 422 | 
            +
                transformer_layers = len(set(k.split(".")[2] for k in state_dict if k.startswith("transformer.resblocks")))
         | 
| 423 | 
            +
             | 
| 424 | 
            +
                model = CLIP(
         | 
| 425 | 
            +
                    embed_dim,
         | 
| 426 | 
            +
                    image_resolution, vision_layers, vision_width, vision_patch_size,
         | 
| 427 | 
            +
                    context_length, vocab_size, transformer_width, transformer_heads, transformer_layers
         | 
| 428 | 
            +
                )
         | 
| 429 | 
            +
             | 
| 430 | 
            +
                for key in ["input_resolution", "context_length", "vocab_size"]:
         | 
| 431 | 
            +
                    if key in state_dict:
         | 
| 432 | 
            +
                        del state_dict[key]
         | 
| 433 | 
            +
             | 
| 434 | 
            +
                convert_weights(model)
         | 
| 435 | 
            +
                model.load_state_dict(state_dict)
         | 
| 436 | 
            +
                return model.eval()
         | 
    	
        clip/simple_tokenizer.py
    ADDED
    
    | @@ -0,0 +1,132 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gzip
         | 
| 2 | 
            +
            import html
         | 
| 3 | 
            +
            import os
         | 
| 4 | 
            +
            from functools import lru_cache
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            import ftfy
         | 
| 7 | 
            +
            import regex as re
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            @lru_cache()
         | 
| 11 | 
            +
            def default_bpe():
         | 
| 12 | 
            +
                return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            @lru_cache()
         | 
| 16 | 
            +
            def bytes_to_unicode():
         | 
| 17 | 
            +
                """
         | 
| 18 | 
            +
                Returns list of utf-8 byte and a corresponding list of unicode strings.
         | 
| 19 | 
            +
                The reversible bpe codes work on unicode strings.
         | 
| 20 | 
            +
                This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
         | 
| 21 | 
            +
                When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
         | 
| 22 | 
            +
                This is a signficant percentage of your normal, say, 32K bpe vocab.
         | 
| 23 | 
            +
                To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
         | 
| 24 | 
            +
                And avoids mapping to whitespace/control characters the bpe code barfs on.
         | 
| 25 | 
            +
                """
         | 
| 26 | 
            +
                bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
         | 
| 27 | 
            +
                cs = bs[:]
         | 
| 28 | 
            +
                n = 0
         | 
| 29 | 
            +
                for b in range(2**8):
         | 
| 30 | 
            +
                    if b not in bs:
         | 
| 31 | 
            +
                        bs.append(b)
         | 
| 32 | 
            +
                        cs.append(2**8+n)
         | 
| 33 | 
            +
                        n += 1
         | 
| 34 | 
            +
                cs = [chr(n) for n in cs]
         | 
| 35 | 
            +
                return dict(zip(bs, cs))
         | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
            def get_pairs(word):
         | 
| 39 | 
            +
                """Return set of symbol pairs in a word.
         | 
| 40 | 
            +
                Word is represented as tuple of symbols (symbols being variable-length strings).
         | 
| 41 | 
            +
                """
         | 
| 42 | 
            +
                pairs = set()
         | 
| 43 | 
            +
                prev_char = word[0]
         | 
| 44 | 
            +
                for char in word[1:]:
         | 
| 45 | 
            +
                    pairs.add((prev_char, char))
         | 
| 46 | 
            +
                    prev_char = char
         | 
| 47 | 
            +
                return pairs
         | 
| 48 | 
            +
             | 
| 49 | 
            +
             | 
| 50 | 
            +
            def basic_clean(text):
         | 
| 51 | 
            +
                text = ftfy.fix_text(text)
         | 
| 52 | 
            +
                text = html.unescape(html.unescape(text))
         | 
| 53 | 
            +
                return text.strip()
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 56 | 
            +
            def whitespace_clean(text):
         | 
| 57 | 
            +
                text = re.sub(r'\s+', ' ', text)
         | 
| 58 | 
            +
                text = text.strip()
         | 
| 59 | 
            +
                return text
         | 
| 60 | 
            +
             | 
| 61 | 
            +
             | 
| 62 | 
            +
            class SimpleTokenizer(object):
         | 
| 63 | 
            +
                def __init__(self, bpe_path: str = default_bpe()):
         | 
| 64 | 
            +
                    self.byte_encoder = bytes_to_unicode()
         | 
| 65 | 
            +
                    self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
         | 
| 66 | 
            +
                    merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
         | 
| 67 | 
            +
                    merges = merges[1:49152-256-2+1]
         | 
| 68 | 
            +
                    merges = [tuple(merge.split()) for merge in merges]
         | 
| 69 | 
            +
                    vocab = list(bytes_to_unicode().values())
         | 
| 70 | 
            +
                    vocab = vocab + [v+'</w>' for v in vocab]
         | 
| 71 | 
            +
                    for merge in merges:
         | 
| 72 | 
            +
                        vocab.append(''.join(merge))
         | 
| 73 | 
            +
                    vocab.extend(['<|startoftext|>', '<|endoftext|>'])
         | 
| 74 | 
            +
                    self.encoder = dict(zip(vocab, range(len(vocab))))
         | 
| 75 | 
            +
                    self.decoder = {v: k for k, v in self.encoder.items()}
         | 
| 76 | 
            +
                    self.bpe_ranks = dict(zip(merges, range(len(merges))))
         | 
| 77 | 
            +
                    self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'}
         | 
| 78 | 
            +
                    self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE)
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def bpe(self, token):
         | 
| 81 | 
            +
                    if token in self.cache:
         | 
| 82 | 
            +
                        return self.cache[token]
         | 
| 83 | 
            +
                    word = tuple(token[:-1]) + ( token[-1] + '</w>',)
         | 
| 84 | 
            +
                    pairs = get_pairs(word)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                    if not pairs:
         | 
| 87 | 
            +
                        return token+'</w>'
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    while True:
         | 
| 90 | 
            +
                        bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf')))
         | 
| 91 | 
            +
                        if bigram not in self.bpe_ranks:
         | 
| 92 | 
            +
                            break
         | 
| 93 | 
            +
                        first, second = bigram
         | 
| 94 | 
            +
                        new_word = []
         | 
| 95 | 
            +
                        i = 0
         | 
| 96 | 
            +
                        while i < len(word):
         | 
| 97 | 
            +
                            try:
         | 
| 98 | 
            +
                                j = word.index(first, i)
         | 
| 99 | 
            +
                                new_word.extend(word[i:j])
         | 
| 100 | 
            +
                                i = j
         | 
| 101 | 
            +
                            except:
         | 
| 102 | 
            +
                                new_word.extend(word[i:])
         | 
| 103 | 
            +
                                break
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                            if word[i] == first and i < len(word)-1 and word[i+1] == second:
         | 
| 106 | 
            +
                                new_word.append(first+second)
         | 
| 107 | 
            +
                                i += 2
         | 
| 108 | 
            +
                            else:
         | 
| 109 | 
            +
                                new_word.append(word[i])
         | 
| 110 | 
            +
                                i += 1
         | 
| 111 | 
            +
                        new_word = tuple(new_word)
         | 
| 112 | 
            +
                        word = new_word
         | 
| 113 | 
            +
                        if len(word) == 1:
         | 
| 114 | 
            +
                            break
         | 
| 115 | 
            +
                        else:
         | 
| 116 | 
            +
                            pairs = get_pairs(word)
         | 
| 117 | 
            +
                    word = ' '.join(word)
         | 
| 118 | 
            +
                    self.cache[token] = word
         | 
| 119 | 
            +
                    return word
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                def encode(self, text):
         | 
| 122 | 
            +
                    bpe_tokens = []
         | 
| 123 | 
            +
                    text = whitespace_clean(basic_clean(text)).lower()
         | 
| 124 | 
            +
                    for token in re.findall(self.pat, text):
         | 
| 125 | 
            +
                        token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
         | 
| 126 | 
            +
                        bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' '))
         | 
| 127 | 
            +
                    return bpe_tokens
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                def decode(self, tokens):
         | 
| 130 | 
            +
                    text = ''.join([self.decoder[token] for token in tokens])
         | 
| 131 | 
            +
                    text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('</w>', ' ')
         | 
| 132 | 
            +
                    return text
         | 
    	
        clip/vitseg.py
    ADDED
    
    | @@ -0,0 +1,286 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import math
         | 
| 2 | 
            +
            from posixpath import basename, dirname, join
         | 
| 3 | 
            +
            # import clip
         | 
| 4 | 
            +
            from clip.model import convert_weights
         | 
| 5 | 
            +
            import torch
         | 
| 6 | 
            +
            import json
         | 
| 7 | 
            +
            from torch import nn
         | 
| 8 | 
            +
            from torch.nn import functional as nnf
         | 
| 9 | 
            +
            from torch.nn.modules import activation
         | 
| 10 | 
            +
            from torch.nn.modules.activation import ReLU
         | 
| 11 | 
            +
            from torchvision import transforms
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            normalize = transforms.Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from torchvision.models import ResNet
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            def process_prompts(conditional, prompt_list, conditional_map):
         | 
| 19 | 
            +
                # DEPRECATED
         | 
| 20 | 
            +
                        
         | 
| 21 | 
            +
                # randomly sample a synonym
         | 
| 22 | 
            +
                words = [conditional_map[int(i)] for i in conditional]
         | 
| 23 | 
            +
                words = [syns[torch.multinomial(torch.ones(len(syns)), 1, replacement=True).item()] for syns in words]
         | 
| 24 | 
            +
                words = [w.replace('_', ' ') for w in words]
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                if prompt_list is not None:
         | 
| 27 | 
            +
                    prompt_indices = torch.multinomial(torch.ones(len(prompt_list)), len(words), replacement=True)
         | 
| 28 | 
            +
                    prompts = [prompt_list[i] for i in prompt_indices]
         | 
| 29 | 
            +
                else:
         | 
| 30 | 
            +
                    prompts = ['a photo of {}'] * (len(words))
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                return [promt.format(w) for promt, w in zip(prompts, words)]
         | 
| 33 | 
            +
             | 
| 34 | 
            +
             | 
| 35 | 
            +
            class VITDenseBase(nn.Module):
         | 
| 36 | 
            +
                
         | 
| 37 | 
            +
                def rescaled_pos_emb(self, new_size):
         | 
| 38 | 
            +
                    assert len(new_size) == 2
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                    a = self.model.positional_embedding[1:].T.view(1, 768, *self.token_shape)
         | 
| 41 | 
            +
                    b = nnf.interpolate(a, new_size, mode='bicubic', align_corners=False).squeeze(0).view(768, new_size[0]*new_size[1]).T
         | 
| 42 | 
            +
                    return torch.cat([self.model.positional_embedding[:1], b])
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def visual_forward(self, x_inp, extract_layers=(), skip=False, mask=None):
         | 
| 45 | 
            +
                    
         | 
| 46 | 
            +
                    with torch.no_grad():
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                        x_inp = nnf.interpolate(x_inp, (384, 384))
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                        x = self.model.patch_embed(x_inp)
         | 
| 51 | 
            +
                        cls_token = self.model.cls_token.expand(x.shape[0], -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
         | 
| 52 | 
            +
                        if self.model.dist_token is None:
         | 
| 53 | 
            +
                            x = torch.cat((cls_token, x), dim=1)
         | 
| 54 | 
            +
                        else:
         | 
| 55 | 
            +
                            x = torch.cat((cls_token, self.model.dist_token.expand(x.shape[0], -1, -1), x), dim=1)
         | 
| 56 | 
            +
                        x = self.model.pos_drop(x + self.model.pos_embed)
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                        activations = []
         | 
| 59 | 
            +
                        for i, block in enumerate(self.model.blocks):
         | 
| 60 | 
            +
                            x = block(x)
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                            if i in extract_layers:
         | 
| 63 | 
            +
                                # permute to be compatible with CLIP
         | 
| 64 | 
            +
                                activations += [x.permute(1,0,2)]                
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                        x = self.model.norm(x)
         | 
| 67 | 
            +
                        x = self.model.head(self.model.pre_logits(x[:, 0]))
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                        # again for CLIP compatibility
         | 
| 70 | 
            +
                        # x = x.permute(1, 0, 2)
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                    return x, activations, None
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                def sample_prompts(self, words, prompt_list=None):
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    prompt_list = prompt_list if prompt_list is not None else self.prompt_list
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    prompt_indices = torch.multinomial(torch.ones(len(prompt_list)), len(words), replacement=True)
         | 
| 79 | 
            +
                    prompts = [prompt_list[i] for i in prompt_indices]
         | 
| 80 | 
            +
                    return [promt.format(w) for promt, w in zip(prompts, words)]
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                def get_cond_vec(self, conditional, batch_size):
         | 
| 83 | 
            +
                    # compute conditional from a single string
         | 
| 84 | 
            +
                    if conditional is not None and type(conditional) == str:
         | 
| 85 | 
            +
                        cond = self.compute_conditional(conditional)
         | 
| 86 | 
            +
                        cond = cond.repeat(batch_size, 1)
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    # compute conditional from string list/tuple
         | 
| 89 | 
            +
                    elif conditional is not None and type(conditional) in {list, tuple} and type(conditional[0]) == str:
         | 
| 90 | 
            +
                        assert len(conditional) == batch_size
         | 
| 91 | 
            +
                        cond = self.compute_conditional(conditional)
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                    # use conditional directly
         | 
| 94 | 
            +
                    elif conditional is not None and type(conditional) == torch.Tensor and conditional.ndim == 2:
         | 
| 95 | 
            +
                        cond = conditional
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                    # compute conditional from image
         | 
| 98 | 
            +
                    elif conditional is not None and type(conditional) == torch.Tensor:
         | 
| 99 | 
            +
                        with torch.no_grad():
         | 
| 100 | 
            +
                            cond, _, _ = self.visual_forward(conditional)
         | 
| 101 | 
            +
                    else:
         | 
| 102 | 
            +
                        raise ValueError('invalid conditional')
         | 
| 103 | 
            +
                    return cond   
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                def compute_conditional(self, conditional):
         | 
| 106 | 
            +
                    import clip
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                    dev = next(self.parameters()).device
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                    if type(conditional) in {list, tuple}:
         | 
| 111 | 
            +
                        text_tokens = clip.tokenize(conditional).to(dev)
         | 
| 112 | 
            +
                        cond = self.clip_model.encode_text(text_tokens)
         | 
| 113 | 
            +
                    else:
         | 
| 114 | 
            +
                        if conditional in self.precomputed_prompts:
         | 
| 115 | 
            +
                            cond = self.precomputed_prompts[conditional].float().to(dev)
         | 
| 116 | 
            +
                        else:
         | 
| 117 | 
            +
                            text_tokens = clip.tokenize([conditional]).to(dev)
         | 
| 118 | 
            +
                            cond = self.clip_model.encode_text(text_tokens)[0]
         | 
| 119 | 
            +
                    
         | 
| 120 | 
            +
                    return cond
         | 
| 121 | 
            +
             | 
| 122 | 
            +
             | 
| 123 | 
            +
            class VITDensePredT(VITDenseBase):
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                def __init__(self, extract_layers=(3, 6, 9), cond_layer=0, reduce_dim=128, n_heads=4, prompt='fixed', 
         | 
| 126 | 
            +
                             depth=3, extra_blocks=0, reduce_cond=None, fix_shift=False,
         | 
| 127 | 
            +
                             learn_trans_conv_only=False, refine=None, limit_to_clip_only=False, upsample=False, 
         | 
| 128 | 
            +
                             add_calibration=False, process_cond=None, not_pretrained=False):
         | 
| 129 | 
            +
                    super().__init__()
         | 
| 130 | 
            +
                    # device = 'cpu'
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                    self.extract_layers = extract_layers
         | 
| 133 | 
            +
                    self.cond_layer = cond_layer
         | 
| 134 | 
            +
                    self.limit_to_clip_only = limit_to_clip_only
         | 
| 135 | 
            +
                    self.process_cond = None
         | 
| 136 | 
            +
                    
         | 
| 137 | 
            +
                    if add_calibration:
         | 
| 138 | 
            +
                        self.calibration_conds = 1
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                    self.upsample_proj = nn.Conv2d(reduce_dim, 1, kernel_size=1) if upsample else None
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                    self.add_activation1 = True
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                    import timm 
         | 
| 145 | 
            +
                    self.model = timm.create_model('vit_base_patch16_384', pretrained=True)
         | 
| 146 | 
            +
                    self.model.head = nn.Linear(768, 512 if reduce_cond is None else reduce_cond)
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    for p in self.model.parameters():
         | 
| 149 | 
            +
                        p.requires_grad_(False)
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    import clip
         | 
| 152 | 
            +
                    self.clip_model, _ = clip.load('ViT-B/16', device='cpu', jit=False)
         | 
| 153 | 
            +
                    # del self.clip_model.visual
         | 
| 154 | 
            +
                    
         | 
| 155 | 
            +
                    
         | 
| 156 | 
            +
                    self.token_shape = (14, 14)
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                    # conditional
         | 
| 159 | 
            +
                    if reduce_cond is not None:
         | 
| 160 | 
            +
                        self.reduce_cond = nn.Linear(512, reduce_cond)
         | 
| 161 | 
            +
                        for p in self.reduce_cond.parameters():
         | 
| 162 | 
            +
                            p.requires_grad_(False)
         | 
| 163 | 
            +
                    else:
         | 
| 164 | 
            +
                        self.reduce_cond = None
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                    # self.film = AVAILABLE_BLOCKS['film'](512, 128)
         | 
| 167 | 
            +
                    self.film_mul = nn.Linear(512 if reduce_cond is None else reduce_cond, reduce_dim)
         | 
| 168 | 
            +
                    self.film_add = nn.Linear(512 if reduce_cond is None else reduce_cond, reduce_dim)
         | 
| 169 | 
            +
                    
         | 
| 170 | 
            +
                    # DEPRECATED
         | 
| 171 | 
            +
                    # self.conditional_map = {c['id']: c['synonyms'] for c in json.load(open(cond_map))}
         | 
| 172 | 
            +
                    
         | 
| 173 | 
            +
                    assert len(self.extract_layers) == depth
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                    self.reduces = nn.ModuleList([nn.Linear(768, reduce_dim) for _ in range(depth)])
         | 
| 176 | 
            +
                    self.blocks = nn.ModuleList([nn.TransformerEncoderLayer(d_model=reduce_dim, nhead=n_heads) for _ in range(len(self.extract_layers))])
         | 
| 177 | 
            +
                    self.extra_blocks = nn.ModuleList([nn.TransformerEncoderLayer(d_model=reduce_dim, nhead=n_heads) for _ in range(extra_blocks)])
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                    trans_conv_ks = (16, 16)
         | 
| 180 | 
            +
                    self.trans_conv = nn.ConvTranspose2d(reduce_dim, 1, trans_conv_ks, stride=trans_conv_ks)
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                    # refinement and trans conv
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                    if learn_trans_conv_only:
         | 
| 185 | 
            +
                        for p in self.parameters():
         | 
| 186 | 
            +
                            p.requires_grad_(False)
         | 
| 187 | 
            +
                        
         | 
| 188 | 
            +
                        for p in self.trans_conv.parameters():
         | 
| 189 | 
            +
                            p.requires_grad_(True)
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                    if prompt == 'fixed':
         | 
| 192 | 
            +
                        self.prompt_list = ['a photo of a {}.']
         | 
| 193 | 
            +
                    elif prompt == 'shuffle':
         | 
| 194 | 
            +
                        self.prompt_list = ['a photo of a {}.', 'a photograph of a {}.', 'an image of a {}.', '{}.']
         | 
| 195 | 
            +
                    elif prompt == 'shuffle+':
         | 
| 196 | 
            +
                        self.prompt_list = ['a photo of a {}.', 'a photograph of a {}.', 'an image of a {}.', '{}.',
         | 
| 197 | 
            +
                                            'a cropped photo of a {}.', 'a good photo of a {}.', 'a photo of one {}.',
         | 
| 198 | 
            +
                                            'a bad photo of a {}.', 'a photo of the {}.']
         | 
| 199 | 
            +
                    elif prompt == 'shuffle_clip':
         | 
| 200 | 
            +
                        from models.clip_prompts import imagenet_templates
         | 
| 201 | 
            +
                        self.prompt_list = imagenet_templates
         | 
| 202 | 
            +
             | 
| 203 | 
            +
                    if process_cond is not None:
         | 
| 204 | 
            +
                        if process_cond == 'clamp' or process_cond[0] == 'clamp':
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                            val = process_cond[1] if type(process_cond) in {list, tuple} else 0.2
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                            def clamp_vec(x):
         | 
| 209 | 
            +
                                return torch.clamp(x, -val, val)
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                            self.process_cond = clamp_vec
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                        elif process_cond.endswith('.pth'):
         | 
| 214 | 
            +
                            
         | 
| 215 | 
            +
                            shift = torch.load(process_cond)
         | 
| 216 | 
            +
                            def add_shift(x):
         | 
| 217 | 
            +
                                return x + shift.to(x.device)
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                            self.process_cond = add_shift
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                    import pickle
         | 
| 222 | 
            +
                    precomp = pickle.load(open('precomputed_prompt_vectors.pickle', 'rb'))
         | 
| 223 | 
            +
                    self.precomputed_prompts = {k: torch.from_numpy(v) for k, v in precomp.items()}
         | 
| 224 | 
            +
             | 
| 225 | 
            +
             | 
| 226 | 
            +
                def forward(self, inp_image, conditional=None, return_features=False, mask=None):
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                    assert type(return_features) == bool
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    # inp_image = inp_image.to(self.model.positional_embedding.device)
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                    if mask is not None:
         | 
| 233 | 
            +
                        raise ValueError('mask not supported')
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    # x_inp = normalize(inp_image)
         | 
| 236 | 
            +
                    x_inp = inp_image
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                    bs, dev = inp_image.shape[0], x_inp.device
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                    inp_image_size = inp_image.shape[2:]
         | 
| 241 | 
            +
             | 
| 242 | 
            +
                    cond = self.get_cond_vec(conditional, bs)
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                    visual_q, activations, _ = self.visual_forward(x_inp, extract_layers=[0] + list(self.extract_layers))
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                    activation1 = activations[0]
         | 
| 247 | 
            +
                    activations = activations[1:]
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                    a = None
         | 
| 250 | 
            +
                    for i, (activation, block, reduce) in enumerate(zip(activations[::-1], self.blocks, self.reduces)):
         | 
| 251 | 
            +
                        
         | 
| 252 | 
            +
                        if a is not None:
         | 
| 253 | 
            +
                            a = reduce(activation) + a
         | 
| 254 | 
            +
                        else:
         | 
| 255 | 
            +
                            a = reduce(activation)
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                        if i == self.cond_layer:
         | 
| 258 | 
            +
                            if self.reduce_cond is not None:
         | 
| 259 | 
            +
                                cond = self.reduce_cond(cond)
         | 
| 260 | 
            +
                            
         | 
| 261 | 
            +
                            a = self.film_mul(cond) * a + self.film_add(cond)
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                        a = block(a)
         | 
| 264 | 
            +
             | 
| 265 | 
            +
                    for block in self.extra_blocks:
         | 
| 266 | 
            +
                        a = a + block(a)
         | 
| 267 | 
            +
             | 
| 268 | 
            +
                    a = a[1:].permute(1, 2, 0) # rm cls token and -> BS, Feats, Tokens
         | 
| 269 | 
            +
             | 
| 270 | 
            +
                    size = int(math.sqrt(a.shape[2]))
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                    a = a.view(bs, a.shape[1], size, size)
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                    if self.trans_conv is not None:
         | 
| 275 | 
            +
                        a = self.trans_conv(a)
         | 
| 276 | 
            +
             | 
| 277 | 
            +
                    if self.upsample_proj is not None:
         | 
| 278 | 
            +
                        a = self.upsample_proj(a)
         | 
| 279 | 
            +
                        a = nnf.interpolate(a, x_inp.shape[2:], mode='bilinear')
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                    a = nnf.interpolate(a, inp_image_size)
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                    if return_features:
         | 
| 284 | 
            +
                        return a, visual_q, cond, [activation1] + activations
         | 
| 285 | 
            +
                    else:
         | 
| 286 | 
            +
                        return a,
         | 
    	
        config.yaml
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            clear_output: true
         | 
| 2 | 
            +
            force_cpu: false
         | 
| 3 | 
            +
            live_cam_start_active: false
         | 
| 4 | 
            +
            max_threads: 3
         | 
| 5 | 
            +
            memory_limit: 0
         | 
| 6 | 
            +
            output_image_format: png
         | 
| 7 | 
            +
            output_template: '{file}_{time}'
         | 
| 8 | 
            +
            output_video_codec: libx264
         | 
| 9 | 
            +
            output_video_format: mp4
         | 
| 10 | 
            +
            provider: cuda
         | 
| 11 | 
            +
            selected_theme: Default
         | 
| 12 | 
            +
            server_name: ''
         | 
| 13 | 
            +
            server_port: 0
         | 
| 14 | 
            +
            server_share: true
         | 
| 15 | 
            +
            video_quality: 14
         | 
    	
        docs/screenshot.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        installer/installer.py
    ADDED
    
    | @@ -0,0 +1,83 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import argparse
         | 
| 2 | 
            +
            import glob
         | 
| 3 | 
            +
            import os
         | 
| 4 | 
            +
            import shutil
         | 
| 5 | 
            +
            import site
         | 
| 6 | 
            +
            import subprocess
         | 
| 7 | 
            +
            import sys
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            script_dir = os.getcwd()
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            def run_cmd(cmd, capture_output=False, env=None):
         | 
| 14 | 
            +
                # Run shell commands
         | 
| 15 | 
            +
                return subprocess.run(cmd, shell=True, capture_output=capture_output, env=env)
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            def check_env():
         | 
| 19 | 
            +
                # If we have access to conda, we are probably in an environment
         | 
| 20 | 
            +
                conda_not_exist = run_cmd("conda", capture_output=True).returncode
         | 
| 21 | 
            +
                if conda_not_exist:
         | 
| 22 | 
            +
                    print("Conda is not installed. Exiting...")
         | 
| 23 | 
            +
                    sys.exit()
         | 
| 24 | 
            +
                
         | 
| 25 | 
            +
                # Ensure this is a new environment and not the base environment
         | 
| 26 | 
            +
                if os.environ["CONDA_DEFAULT_ENV"] == "base":
         | 
| 27 | 
            +
                    print("Create an environment for this project and activate it. Exiting...")
         | 
| 28 | 
            +
                    sys.exit()
         | 
| 29 | 
            +
             | 
| 30 | 
            +
             | 
| 31 | 
            +
            def install_dependencies():
         | 
| 32 | 
            +
                # Install Git and clone repo
         | 
| 33 | 
            +
                run_cmd("conda install -y -k git")
         | 
| 34 | 
            +
                run_cmd("git clone https://github.com/C0untFloyd/roop-unleashed.git")
         | 
| 35 | 
            +
                run_cmd("git checkout 8ee085322158c4eeb0cd0126a49949f1acf0f7df")
         | 
| 36 | 
            +
                # Install the webui dependencies
         | 
| 37 | 
            +
                update_dependencies()
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 40 | 
            +
            def update_dependencies():
         | 
| 41 | 
            +
                global MY_PATH
         | 
| 42 | 
            +
                
         | 
| 43 | 
            +
                os.chdir(MY_PATH)
         | 
| 44 | 
            +
            	# do a hard reset for to update even if there are local changes
         | 
| 45 | 
            +
                run_cmd("git fetch --all")
         | 
| 46 | 
            +
                run_cmd("git reset --hard origin/main")
         | 
| 47 | 
            +
                run_cmd("git pull")
         | 
| 48 | 
            +
                # Installs/Updates dependencies from all requirements.txt
         | 
| 49 | 
            +
                run_cmd("python -m pip install -r requirements.txt")
         | 
| 50 | 
            +
             | 
| 51 | 
            +
             | 
| 52 | 
            +
            def start_app():
         | 
| 53 | 
            +
                global MY_PATH
         | 
| 54 | 
            +
                
         | 
| 55 | 
            +
                os.chdir(MY_PATH)
         | 
| 56 | 
            +
                # forward commandline arguments
         | 
| 57 | 
            +
                sys.argv.pop(0)
         | 
| 58 | 
            +
                args = ' '.join(sys.argv)
         | 
| 59 | 
            +
                print("Launching App")
         | 
| 60 | 
            +
                run_cmd(f'python run.py {args}')
         | 
| 61 | 
            +
             | 
| 62 | 
            +
             | 
| 63 | 
            +
            if __name__ == "__main__":
         | 
| 64 | 
            +
                global MY_PATH
         | 
| 65 | 
            +
                
         | 
| 66 | 
            +
                MY_PATH = "roop-unleashed"
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                
         | 
| 69 | 
            +
                # Verifies we are in a conda environment
         | 
| 70 | 
            +
                check_env()
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                # If webui has already been installed, skip and run
         | 
| 73 | 
            +
                if not os.path.exists(MY_PATH):
         | 
| 74 | 
            +
                    install_dependencies()
         | 
| 75 | 
            +
                else:
         | 
| 76 | 
            +
                    # moved update from batch to here, because of batch limitations
         | 
| 77 | 
            +
                    updatechoice = input("Check for Updates? [y/n]").lower()
         | 
| 78 | 
            +
                    if updatechoice == "y":
         | 
| 79 | 
            +
                       update_dependencies()
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                # Run the model with webui
         | 
| 82 | 
            +
                os.chdir(script_dir)
         | 
| 83 | 
            +
                start_app()
         | 
    	
        installer/windows_run.bat
    ADDED
    
    | @@ -0,0 +1,80 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            @echo off
         | 
| 2 | 
            +
            REM No CLI arguments supported anymore
         | 
| 3 | 
            +
            set COMMANDLINE_ARGS=
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            cd /D "%~dp0"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            set PATH=%PATH%;%SystemRoot%\system32
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            @rem config
         | 
| 12 | 
            +
            set INSTALL_DIR=%cd%\installer_files
         | 
| 13 | 
            +
            set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
         | 
| 14 | 
            +
            set INSTALL_ENV_DIR=%cd%\installer_files\env
         | 
| 15 | 
            +
            set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe
         | 
| 16 | 
            +
            set FFMPEG_DOWNLOAD_URL=https://github.com/GyanD/codexffmpeg/releases/download/2023-06-21-git-1bcb8a7338/ffmpeg-2023-06-21-git-1bcb8a7338-essentials_build.zip
         | 
| 17 | 
            +
            set INSTALL_FFMPEG_DIR=%cd%\installer_files\ffmpeg
         | 
| 18 | 
            +
            set conda_exists=F
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            @rem figure out whether git and conda needs to be installed
         | 
| 21 | 
            +
            call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
         | 
| 22 | 
            +
            if "%ERRORLEVEL%" EQU "0" set conda_exists=T
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            @rem (if necessary) install git and conda into a contained environment
         | 
| 25 | 
            +
            @rem download conda
         | 
| 26 | 
            +
            if "%conda_exists%" == "F" (
         | 
| 27 | 
            +
            	echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL% to %INSTALL_DIR%\miniconda_installer.exe
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            	mkdir "%INSTALL_DIR%"
         | 
| 30 | 
            +
            	call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            	echo Installing Miniconda to %CONDA_ROOT_PREFIX%
         | 
| 33 | 
            +
            	start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            	@rem test the conda binary
         | 
| 36 | 
            +
            	echo Miniconda version:
         | 
| 37 | 
            +
            	call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
         | 
| 38 | 
            +
            )
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            @rem create the installer env
         | 
| 41 | 
            +
            if not exist "%INSTALL_ENV_DIR%" (
         | 
| 42 | 
            +
              echo Packages to install: %PACKAGES_TO_INSTALL%
         | 
| 43 | 
            +
              call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.10 || ( echo. && echo Conda environment creation failed. && goto end )
         | 
| 44 | 
            +
            )
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            if not exist "%INSTALL_FFMPEG_DIR%" (
         | 
| 47 | 
            +
            	echo Downloading ffmpeg from %FFMPEG_DOWNLOAD_URL% to %INSTALL_DIR%
         | 
| 48 | 
            +
             	call curl -Lk "%FFMPEG_DOWNLOAD_URL%" > "%INSTALL_DIR%\ffmpeg.zip" || ( echo. && echo ffmpeg failed to download. && goto end )
         | 
| 49 | 
            +
            	call powershell -command "Expand-Archive -Force '%INSTALL_DIR%\ffmpeg.zip' '%INSTALL_DIR%\'"
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            	cd "installer_files"
         | 
| 52 | 
            +
            	setlocal EnableExtensions EnableDelayedExpansion
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            	for /f "tokens=*" %%f in ('dir /s /b /ad "ffmpeg*"') do (
         | 
| 55 | 
            +
            		ren "%%f" "ffmpeg"
         | 
| 56 | 
            +
            	)
         | 
| 57 | 
            +
            	endlocal
         | 
| 58 | 
            +
            	setx PATH "%INSTALL_FFMPEG_DIR%\bin\;%PATH%"
         | 
| 59 | 
            +
            	echo To use videos, you need to restart roop after this installation. 
         | 
| 60 | 
            +
            	cd ..
         | 
| 61 | 
            +
            )
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            @rem check if conda environment was actually created
         | 
| 64 | 
            +
            if not exist "%INSTALL_ENV_DIR%\python.exe" ( echo. && echo ERROR: Conda environment is empty. && goto end )
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            @rem activate installer env
         | 
| 67 | 
            +
            call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            @rem setup installer env
         | 
| 70 | 
            +
            echo Launching roop unleashed
         | 
| 71 | 
            +
            call python installer.py %COMMANDLINE_ARGS%
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            echo.
         | 
| 74 | 
            +
            echo Done!
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            :end
         | 
| 77 | 
            +
            pause
         | 
| 78 | 
            +
             | 
| 79 | 
            +
             | 
| 80 | 
            +
             | 
    	
        models/CLIP/rd64-uni-refined.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:a4956f9a7978a75630b08c9d6ec075b7c51cf43b4751b686e3a011d4012ddc9d
         | 
| 3 | 
            +
            size 4720707
         | 
    	
        models/CodeFormer/CodeFormerv0.1.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9aa48fc4b21224d85784c9a58885201284ec8e590b988126db2c07495b421d36
         | 
| 3 | 
            +
            size 376821951
         | 
    	
        models/DMDNet.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:70daeb4b1fd10f241043b587d892a941f2651d7322db02f06ff64b166537f65c
         | 
| 3 | 
            +
            size 603684323
         | 
    	
        models/GFPGANv1.4.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:5060d6c8d84851bbb8da630bea59b56414b49923a2b9304fb08f72d4c98f0aeb
         | 
| 3 | 
            +
            size 340256688
         | 
    	
        models/GPEN-BFR-512.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0960f836488735444d508b588e44fb5dfd19c68fde9163ad7878aa24d1d5115e
         | 
| 3 | 
            +
            size 284250449
         | 
    	
        models/inswapper_128.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af
         | 
| 3 | 
            +
            size 554253681
         | 
    	
        mypy.ini
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [mypy]
         | 
| 2 | 
            +
            check_untyped_defs = True
         | 
| 3 | 
            +
            disallow_any_generics = True
         | 
| 4 | 
            +
            disallow_untyped_calls = True
         | 
| 5 | 
            +
            disallow_untyped_defs = True
         | 
| 6 | 
            +
            ignore_missing_imports = True
         | 
| 7 | 
            +
            strict_optional = False
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            --extra-index-url https://download.pytorch.org/whl/cu118
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            numpy==1.24.2
         | 
| 4 | 
            +
            gradio==3.44.2
         | 
| 5 | 
            +
            opencv-python==4.8.0.76
         | 
| 6 | 
            +
            onnx==1.14.1
         | 
| 7 | 
            +
            insightface==0.7.3
         | 
| 8 | 
            +
            psutil==5.9.5
         | 
| 9 | 
            +
            pillow==10.0.1
         | 
| 10 | 
            +
            torch==2.0.1+cu118; sys_platform != 'darwin'
         | 
| 11 | 
            +
            torch==2.0.1; sys_platform == 'darwin'
         | 
| 12 | 
            +
            torchvision==0.15.2+cu118; sys_platform != 'darwin'
         | 
| 13 | 
            +
            torchvision==0.15.2; sys_platform == 'darwin'
         | 
| 14 | 
            +
            onnxruntime==1.16.0; sys_platform == 'darwin' and platform_machine != 'arm64'
         | 
| 15 | 
            +
            onnxruntime-silicon==1.13.1; sys_platform == 'darwin' and platform_machine == 'arm64'
         | 
| 16 | 
            +
            onnxruntime-gpu==1.16.1; sys_platform != 'darwin'
         | 
| 17 | 
            +
            protobuf==4.23.2
         | 
| 18 | 
            +
            tqdm==4.66.1
         | 
| 19 | 
            +
            ftfy
         | 
| 20 | 
            +
            regex
         | 
| 21 | 
            +
            pyvirtualcam
         | 
    	
        roop-unleashed.ipynb
    ADDED
    
    | @@ -0,0 +1,184 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "nbformat": 4,
         | 
| 3 | 
            +
              "nbformat_minor": 0,
         | 
| 4 | 
            +
              "metadata": {
         | 
| 5 | 
            +
                "colab": {
         | 
| 6 | 
            +
                  "provenance": [],
         | 
| 7 | 
            +
                  "gpuType": "T4",
         | 
| 8 | 
            +
                  "collapsed_sections": [
         | 
| 9 | 
            +
                    "UdQ1VHdI8lCf"
         | 
| 10 | 
            +
                  ]
         | 
| 11 | 
            +
                },
         | 
| 12 | 
            +
                "kernelspec": {
         | 
| 13 | 
            +
                  "name": "python3",
         | 
| 14 | 
            +
                  "display_name": "Python 3"
         | 
| 15 | 
            +
                },
         | 
| 16 | 
            +
                "language_info": {
         | 
| 17 | 
            +
                  "name": "python"
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                "accelerator": "GPU"
         | 
| 20 | 
            +
              },
         | 
| 21 | 
            +
              "cells": [
         | 
| 22 | 
            +
                {
         | 
| 23 | 
            +
                  "cell_type": "markdown",
         | 
| 24 | 
            +
                  "source": [
         | 
| 25 | 
            +
                    "# Colab for roop-unleashed - Gradio version\n",
         | 
| 26 | 
            +
                    "https://github.com/C0untFloyd/roop-unleashed\n"
         | 
| 27 | 
            +
                  ],
         | 
| 28 | 
            +
                  "metadata": {
         | 
| 29 | 
            +
                    "id": "G9BdiCppV6AS"
         | 
| 30 | 
            +
                  }
         | 
| 31 | 
            +
                },
         | 
| 32 | 
            +
                {
         | 
| 33 | 
            +
                  "cell_type": "markdown",
         | 
| 34 | 
            +
                  "source": [
         | 
| 35 | 
            +
                    "Installing & preparing requirements"
         | 
| 36 | 
            +
                  ],
         | 
| 37 | 
            +
                  "metadata": {
         | 
| 38 | 
            +
                    "id": "0ZYRNb0AWLLW"
         | 
| 39 | 
            +
                  }
         | 
| 40 | 
            +
                },
         | 
| 41 | 
            +
                {
         | 
| 42 | 
            +
                  "cell_type": "code",
         | 
| 43 | 
            +
                  "execution_count": null,
         | 
| 44 | 
            +
                  "metadata": {
         | 
| 45 | 
            +
                    "id": "t1yPuhdySqCq"
         | 
| 46 | 
            +
                  },
         | 
| 47 | 
            +
                  "outputs": [],
         | 
| 48 | 
            +
                  "source": [
         | 
| 49 | 
            +
                    "!git clone https://github.com/C0untFloyd/roop-unleashed.git\n",
         | 
| 50 | 
            +
                    "%cd roop-unleashed\n",
         | 
| 51 | 
            +
                    "!mv config_colab.yaml config.yaml\n",
         | 
| 52 | 
            +
                    "!pip install pip install -r requirements.txt"
         | 
| 53 | 
            +
                  ]
         | 
| 54 | 
            +
                },
         | 
| 55 | 
            +
                {
         | 
| 56 | 
            +
                  "cell_type": "markdown",
         | 
| 57 | 
            +
                  "source": [
         | 
| 58 | 
            +
                    "Running roop-unleashed with default config"
         | 
| 59 | 
            +
                  ],
         | 
| 60 | 
            +
                  "metadata": {
         | 
| 61 | 
            +
                    "id": "u_4JQiSlV9Fi"
         | 
| 62 | 
            +
                  }
         | 
| 63 | 
            +
                },
         | 
| 64 | 
            +
                {
         | 
| 65 | 
            +
                  "cell_type": "code",
         | 
| 66 | 
            +
                  "source": [
         | 
| 67 | 
            +
                    "!python run.py"
         | 
| 68 | 
            +
                  ],
         | 
| 69 | 
            +
                  "metadata": {
         | 
| 70 | 
            +
                    "id": "Is6U2huqSzLE"
         | 
| 71 | 
            +
                  },
         | 
| 72 | 
            +
                  "execution_count": null,
         | 
| 73 | 
            +
                  "outputs": []
         | 
| 74 | 
            +
                },
         | 
| 75 | 
            +
                {
         | 
| 76 | 
            +
                  "cell_type": "markdown",
         | 
| 77 | 
            +
                  "source": [
         | 
| 78 | 
            +
                    "### Download generated images folder\n",
         | 
| 79 | 
            +
                    "(only needed if you want to zip the generated output)"
         | 
| 80 | 
            +
                  ],
         | 
| 81 | 
            +
                  "metadata": {
         | 
| 82 | 
            +
                    "id": "UdQ1VHdI8lCf"
         | 
| 83 | 
            +
                  }
         | 
| 84 | 
            +
                },
         | 
| 85 | 
            +
                {
         | 
| 86 | 
            +
                  "cell_type": "code",
         | 
| 87 | 
            +
                  "source": [
         | 
| 88 | 
            +
                    "import shutil\n",
         | 
| 89 | 
            +
                    "import os\n",
         | 
| 90 | 
            +
                    "from google.colab import files\n",
         | 
| 91 | 
            +
                    "\n",
         | 
| 92 | 
            +
                    "def zip_directory(directory_path, zip_path):\n",
         | 
| 93 | 
            +
                    "    shutil.make_archive(zip_path, 'zip', directory_path)\n",
         | 
| 94 | 
            +
                    "\n",
         | 
| 95 | 
            +
                    "# Set the directory path you want to download\n",
         | 
| 96 | 
            +
                    "directory_path = '/content/roop-unleashed/output'\n",
         | 
| 97 | 
            +
                    "\n",
         | 
| 98 | 
            +
                    "# Set the zip file name\n",
         | 
| 99 | 
            +
                    "zip_filename = 'fake_output.zip'\n",
         | 
| 100 | 
            +
                    "\n",
         | 
| 101 | 
            +
                    "# Zip the directory\n",
         | 
| 102 | 
            +
                    "zip_directory(directory_path, zip_filename)\n",
         | 
| 103 | 
            +
                    "\n",
         | 
| 104 | 
            +
                    "# Download the zip file\n",
         | 
| 105 | 
            +
                    "files.download(zip_filename+'.zip')\n"
         | 
| 106 | 
            +
                  ],
         | 
| 107 | 
            +
                  "metadata": {
         | 
| 108 | 
            +
                    "colab": {
         | 
| 109 | 
            +
                      "base_uri": "https://localhost:8080/",
         | 
| 110 | 
            +
                      "height": 17
         | 
| 111 | 
            +
                    },
         | 
| 112 | 
            +
                    "id": "oYjWveAmw10X",
         | 
| 113 | 
            +
                    "outputId": "5b4c3650-f951-434a-c650-5525a8a70c1e"
         | 
| 114 | 
            +
                  },
         | 
| 115 | 
            +
                  "execution_count": null,
         | 
| 116 | 
            +
                  "outputs": [
         | 
| 117 | 
            +
                    {
         | 
| 118 | 
            +
                      "output_type": "display_data",
         | 
| 119 | 
            +
                      "data": {
         | 
| 120 | 
            +
                        "text/plain": [
         | 
| 121 | 
            +
                          "<IPython.core.display.Javascript object>"
         | 
| 122 | 
            +
                        ],
         | 
| 123 | 
            +
                        "application/javascript": [
         | 
| 124 | 
            +
                          "\n",
         | 
| 125 | 
            +
                          "    async function download(id, filename, size) {\n",
         | 
| 126 | 
            +
                          "      if (!google.colab.kernel.accessAllowed) {\n",
         | 
| 127 | 
            +
                          "        return;\n",
         | 
| 128 | 
            +
                          "      }\n",
         | 
| 129 | 
            +
                          "      const div = document.createElement('div');\n",
         | 
| 130 | 
            +
                          "      const label = document.createElement('label');\n",
         | 
| 131 | 
            +
                          "      label.textContent = `Downloading \"${filename}\": `;\n",
         | 
| 132 | 
            +
                          "      div.appendChild(label);\n",
         | 
| 133 | 
            +
                          "      const progress = document.createElement('progress');\n",
         | 
| 134 | 
            +
                          "      progress.max = size;\n",
         | 
| 135 | 
            +
                          "      div.appendChild(progress);\n",
         | 
| 136 | 
            +
                          "      document.body.appendChild(div);\n",
         | 
| 137 | 
            +
                          "\n",
         | 
| 138 | 
            +
                          "      const buffers = [];\n",
         | 
| 139 | 
            +
                          "      let downloaded = 0;\n",
         | 
| 140 | 
            +
                          "\n",
         | 
| 141 | 
            +
                          "      const channel = await google.colab.kernel.comms.open(id);\n",
         | 
| 142 | 
            +
                          "      // Send a message to notify the kernel that we're ready.\n",
         | 
| 143 | 
            +
                          "      channel.send({})\n",
         | 
| 144 | 
            +
                          "\n",
         | 
| 145 | 
            +
                          "      for await (const message of channel.messages) {\n",
         | 
| 146 | 
            +
                          "        // Send a message to notify the kernel that we're ready.\n",
         | 
| 147 | 
            +
                          "        channel.send({})\n",
         | 
| 148 | 
            +
                          "        if (message.buffers) {\n",
         | 
| 149 | 
            +
                          "          for (const buffer of message.buffers) {\n",
         | 
| 150 | 
            +
                          "            buffers.push(buffer);\n",
         | 
| 151 | 
            +
                          "            downloaded += buffer.byteLength;\n",
         | 
| 152 | 
            +
                          "            progress.value = downloaded;\n",
         | 
| 153 | 
            +
                          "          }\n",
         | 
| 154 | 
            +
                          "        }\n",
         | 
| 155 | 
            +
                          "      }\n",
         | 
| 156 | 
            +
                          "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
         | 
| 157 | 
            +
                          "      const a = document.createElement('a');\n",
         | 
| 158 | 
            +
                          "      a.href = window.URL.createObjectURL(blob);\n",
         | 
| 159 | 
            +
                          "      a.download = filename;\n",
         | 
| 160 | 
            +
                          "      div.appendChild(a);\n",
         | 
| 161 | 
            +
                          "      a.click();\n",
         | 
| 162 | 
            +
                          "      div.remove();\n",
         | 
| 163 | 
            +
                          "    }\n",
         | 
| 164 | 
            +
                          "  "
         | 
| 165 | 
            +
                        ]
         | 
| 166 | 
            +
                      },
         | 
| 167 | 
            +
                      "metadata": {}
         | 
| 168 | 
            +
                    },
         | 
| 169 | 
            +
                    {
         | 
| 170 | 
            +
                      "output_type": "display_data",
         | 
| 171 | 
            +
                      "data": {
         | 
| 172 | 
            +
                        "text/plain": [
         | 
| 173 | 
            +
                          "<IPython.core.display.Javascript object>"
         | 
| 174 | 
            +
                        ],
         | 
| 175 | 
            +
                        "application/javascript": [
         | 
| 176 | 
            +
                          "download(\"download_789eab11-93d2-4880-adf3-6aceee0cc5f9\", \"fake_output.zip.zip\", 80125)"
         | 
| 177 | 
            +
                        ]
         | 
| 178 | 
            +
                      },
         | 
| 179 | 
            +
                      "metadata": {}
         | 
| 180 | 
            +
                    }
         | 
| 181 | 
            +
                  ]
         | 
| 182 | 
            +
                }
         | 
| 183 | 
            +
              ]
         | 
| 184 | 
            +
            }
         | 
    	
        roop/FaceSet.py
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import numpy as np
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class FaceSet:
         | 
| 4 | 
            +
                faces = []
         | 
| 5 | 
            +
                ref_images = []
         | 
| 6 | 
            +
                embedding_average = 'None'
         | 
| 7 | 
            +
                embeddings_backup = None
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def __init__(self):
         | 
| 10 | 
            +
                    self.faces = []
         | 
| 11 | 
            +
                    self.ref_images = []
         | 
| 12 | 
            +
                    self.embeddings_backup = None
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def AverageEmbeddings(self):
         | 
| 15 | 
            +
                    if len(self.faces) > 1 and self.embeddings_backup is None:
         | 
| 16 | 
            +
                        self.embeddings_backup = self.faces[0]['embedding']
         | 
| 17 | 
            +
                        embeddings = [face.embedding for face in self.faces]
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                        self.faces[0]['embedding'] = np.mean(embeddings, axis=0)
         | 
| 20 | 
            +
                        # try median too?
         | 
    	
        roop/ProcessEntry.py
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            class ProcessEntry:
         | 
| 2 | 
            +
                def __init__(self, filename: str, start: int, end: int, fps: float):
         | 
| 3 | 
            +
                    self.filename = filename
         | 
| 4 | 
            +
                    self.finalname = None
         | 
| 5 | 
            +
                    self.startframe = start
         | 
| 6 | 
            +
                    self.endframe = end
         | 
| 7 | 
            +
                    self.fps = fps
         | 
    	
        roop/ProcessMgr.py
    ADDED
    
    | @@ -0,0 +1,457 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import cv2 
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
            import psutil
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            from roop.ProcessOptions import ProcessOptions
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from roop.face_util import get_first_face, get_all_faces, rotate_image_180
         | 
| 9 | 
            +
            from roop.utilities import compute_cosine_distance, get_device, str_to_class
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            from typing import Any, List, Callable
         | 
| 12 | 
            +
            from roop.typing import Frame
         | 
| 13 | 
            +
            from concurrent.futures import ThreadPoolExecutor, as_completed
         | 
| 14 | 
            +
            from threading import Thread, Lock
         | 
| 15 | 
            +
            from queue import Queue
         | 
| 16 | 
            +
            from tqdm import tqdm
         | 
| 17 | 
            +
            from roop.ffmpeg_writer import FFMPEG_VideoWriter
         | 
| 18 | 
            +
            import roop.globals
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            def create_queue(temp_frame_paths: List[str]) -> Queue[str]:
         | 
| 22 | 
            +
                queue: Queue[str] = Queue()
         | 
| 23 | 
            +
                for frame_path in temp_frame_paths:
         | 
| 24 | 
            +
                    queue.put(frame_path)
         | 
| 25 | 
            +
                return queue
         | 
| 26 | 
            +
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
         | 
| 29 | 
            +
                queues = []
         | 
| 30 | 
            +
                for _ in range(queue_per_future):
         | 
| 31 | 
            +
                    if not queue.empty():
         | 
| 32 | 
            +
                        queues.append(queue.get())
         | 
| 33 | 
            +
                return queues
         | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 36 | 
            +
            class ProcessMgr():
         | 
| 37 | 
            +
                input_face_datas = []
         | 
| 38 | 
            +
                target_face_datas = []
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                processors = []
         | 
| 41 | 
            +
                options : ProcessOptions = None
         | 
| 42 | 
            +
                
         | 
| 43 | 
            +
                num_threads = 1
         | 
| 44 | 
            +
                current_index = 0
         | 
| 45 | 
            +
                processing_threads = 1
         | 
| 46 | 
            +
                buffer_wait_time = 0.1
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                lock = Lock()
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                frames_queue = None
         | 
| 51 | 
            +
                processed_queue = None
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                videowriter= None
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                progress_gradio = None
         | 
| 56 | 
            +
                total_frames = 0
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                
         | 
| 59 | 
            +
             | 
| 60 | 
            +
             | 
| 61 | 
            +
                plugins =  { 
         | 
| 62 | 
            +
                'faceswap'      : 'FaceSwapInsightFace',
         | 
| 63 | 
            +
                'mask_clip2seg' : 'Mask_Clip2Seg',
         | 
| 64 | 
            +
                'codeformer'    : 'Enhance_CodeFormer',
         | 
| 65 | 
            +
                'gfpgan'        : 'Enhance_GFPGAN',
         | 
| 66 | 
            +
                'dmdnet'        : 'Enhance_DMDNet',
         | 
| 67 | 
            +
                'gpen'          : 'Enhance_GPEN',
         | 
| 68 | 
            +
                }
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                def __init__(self, progress):
         | 
| 71 | 
            +
                    if progress is not None:
         | 
| 72 | 
            +
                        self.progress_gradio = progress
         | 
| 73 | 
            +
             | 
| 74 | 
            +
             | 
| 75 | 
            +
                def initialize(self, input_faces, target_faces, options):
         | 
| 76 | 
            +
                    self.input_face_datas = input_faces
         | 
| 77 | 
            +
                    self.target_face_datas = target_faces
         | 
| 78 | 
            +
                    self.options = options
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    processornames = options.processors.split(",")
         | 
| 81 | 
            +
                    devicename = get_device()
         | 
| 82 | 
            +
                    if len(self.processors) < 1:
         | 
| 83 | 
            +
                        for pn in processornames:
         | 
| 84 | 
            +
                            classname = self.plugins[pn]
         | 
| 85 | 
            +
                            module = 'roop.processors.' + classname
         | 
| 86 | 
            +
                            p = str_to_class(module, classname)
         | 
| 87 | 
            +
                            p.Initialize(devicename)
         | 
| 88 | 
            +
                            self.processors.append(p)
         | 
| 89 | 
            +
                    else:
         | 
| 90 | 
            +
                        for i in range(len(self.processors) -1, -1, -1):
         | 
| 91 | 
            +
                            if not self.processors[i].processorname in processornames:
         | 
| 92 | 
            +
                                self.processors[i].Release()
         | 
| 93 | 
            +
                                del self.processors[i]
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                        for i,pn in enumerate(processornames):
         | 
| 96 | 
            +
                            if i >= len(self.processors) or self.processors[i].processorname != pn:
         | 
| 97 | 
            +
                                p = None
         | 
| 98 | 
            +
                                classname = self.plugins[pn]
         | 
| 99 | 
            +
                                module = 'roop.processors.' + classname
         | 
| 100 | 
            +
                                p = str_to_class(module, classname)
         | 
| 101 | 
            +
                                p.Initialize(devicename)
         | 
| 102 | 
            +
                                if p is not None:
         | 
| 103 | 
            +
                                    self.processors.insert(i, p)
         | 
| 104 | 
            +
             | 
| 105 | 
            +
             | 
| 106 | 
            +
             | 
| 107 | 
            +
                def run_batch(self, source_files, target_files, threads:int = 1):
         | 
| 108 | 
            +
                    progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
         | 
| 109 | 
            +
                    self.total_frames = len(source_files)
         | 
| 110 | 
            +
                    self.num_threads = threads
         | 
| 111 | 
            +
                    with tqdm(total=self.total_frames, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
         | 
| 112 | 
            +
                        with ThreadPoolExecutor(max_workers=threads) as executor:
         | 
| 113 | 
            +
                            futures = []
         | 
| 114 | 
            +
                            queue = create_queue(source_files)
         | 
| 115 | 
            +
                            queue_per_future = max(len(source_files) // threads, 1)
         | 
| 116 | 
            +
                            while not queue.empty():
         | 
| 117 | 
            +
                                future = executor.submit(self.process_frames, source_files, target_files, pick_queue(queue, queue_per_future), lambda: self.update_progress(progress))
         | 
| 118 | 
            +
                                futures.append(future)
         | 
| 119 | 
            +
                            for future in as_completed(futures):
         | 
| 120 | 
            +
                                future.result()
         | 
| 121 | 
            +
             | 
| 122 | 
            +
             | 
| 123 | 
            +
                def process_frames(self, source_files: List[str], target_files: List[str], current_files, update: Callable[[], None]) -> None:
         | 
| 124 | 
            +
                    for f in current_files:
         | 
| 125 | 
            +
                        if not roop.globals.processing:
         | 
| 126 | 
            +
                            return
         | 
| 127 | 
            +
                        
         | 
| 128 | 
            +
                        temp_frame = cv2.imread(f)
         | 
| 129 | 
            +
                        if temp_frame is not None:
         | 
| 130 | 
            +
                            resimg = self.process_frame(temp_frame)
         | 
| 131 | 
            +
                            if resimg is not None:
         | 
| 132 | 
            +
                                i = source_files.index(f)
         | 
| 133 | 
            +
                                cv2.imwrite(target_files[i], resimg)
         | 
| 134 | 
            +
                        if update:
         | 
| 135 | 
            +
                            update()
         | 
| 136 | 
            +
             | 
| 137 | 
            +
             | 
| 138 | 
            +
             | 
| 139 | 
            +
                def read_frames_thread(self, cap, frame_start, frame_end, num_threads):
         | 
| 140 | 
            +
                    num_frame = 0
         | 
| 141 | 
            +
                    total_num = frame_end - frame_start
         | 
| 142 | 
            +
                    if frame_start > 0:
         | 
| 143 | 
            +
                        cap.set(cv2.CAP_PROP_POS_FRAMES,frame_start)
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    while True and roop.globals.processing:
         | 
| 146 | 
            +
                        ret, frame = cap.read()
         | 
| 147 | 
            +
                        if not ret:
         | 
| 148 | 
            +
                            break
         | 
| 149 | 
            +
                            
         | 
| 150 | 
            +
                        self.frames_queue[num_frame % num_threads].put(frame, block=True)
         | 
| 151 | 
            +
                        num_frame += 1
         | 
| 152 | 
            +
                        if num_frame == total_num:
         | 
| 153 | 
            +
                            break
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                    for i in range(num_threads):
         | 
| 156 | 
            +
                        self.frames_queue[i].put(None)
         | 
| 157 | 
            +
             | 
| 158 | 
            +
             | 
| 159 | 
            +
             | 
| 160 | 
            +
                def process_videoframes(self, threadindex, progress) -> None:
         | 
| 161 | 
            +
                    while True:
         | 
| 162 | 
            +
                        frame = self.frames_queue[threadindex].get()
         | 
| 163 | 
            +
                        if frame is None:
         | 
| 164 | 
            +
                            self.processing_threads -= 1
         | 
| 165 | 
            +
                            self.processed_queue[threadindex].put((False, None))
         | 
| 166 | 
            +
                            return
         | 
| 167 | 
            +
                        else:
         | 
| 168 | 
            +
                            resimg = self.process_frame(frame)
         | 
| 169 | 
            +
                            self.processed_queue[threadindex].put((True, resimg))
         | 
| 170 | 
            +
                            del frame
         | 
| 171 | 
            +
                            progress()
         | 
| 172 | 
            +
             | 
| 173 | 
            +
             | 
| 174 | 
            +
                def write_frames_thread(self):
         | 
| 175 | 
            +
                    nextindex = 0
         | 
| 176 | 
            +
                    num_producers = self.num_threads
         | 
| 177 | 
            +
                    
         | 
| 178 | 
            +
                    while True:
         | 
| 179 | 
            +
                        process, frame = self.processed_queue[nextindex % self.num_threads].get()
         | 
| 180 | 
            +
                        nextindex += 1
         | 
| 181 | 
            +
                        if frame is not None:
         | 
| 182 | 
            +
                            self.videowriter.write_frame(frame)
         | 
| 183 | 
            +
                            del frame
         | 
| 184 | 
            +
                        elif process == False:
         | 
| 185 | 
            +
                            num_producers -= 1
         | 
| 186 | 
            +
                            if num_producers < 1:
         | 
| 187 | 
            +
                                return
         | 
| 188 | 
            +
                        
         | 
| 189 | 
            +
             | 
| 190 | 
            +
             | 
| 191 | 
            +
                def run_batch_inmem(self, source_video, target_video, frame_start, frame_end, fps, threads:int = 1, skip_audio=False):
         | 
| 192 | 
            +
                    cap = cv2.VideoCapture(source_video)
         | 
| 193 | 
            +
                    # frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         | 
| 194 | 
            +
                    frame_count = (frame_end - frame_start) + 1
         | 
| 195 | 
            +
                    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         | 
| 196 | 
            +
                    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    self.total_frames = frame_count
         | 
| 199 | 
            +
                    self.num_threads = threads
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                    self.processing_threads = self.num_threads
         | 
| 202 | 
            +
                    self.frames_queue = []
         | 
| 203 | 
            +
                    self.processed_queue = []
         | 
| 204 | 
            +
                    for _ in range(threads):
         | 
| 205 | 
            +
                        self.frames_queue.append(Queue(1))
         | 
| 206 | 
            +
                        self.processed_queue.append(Queue(1))
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                    self.videowriter =  FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                    readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
         | 
| 211 | 
            +
                    readthread.start()
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    writethread = Thread(target=self.write_frames_thread)
         | 
| 214 | 
            +
                    writethread.start()
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                    progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
         | 
| 217 | 
            +
                    with tqdm(total=self.total_frames, desc='Processing', unit='frames', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
         | 
| 218 | 
            +
                        with ThreadPoolExecutor(thread_name_prefix='swap_proc', max_workers=self.num_threads) as executor:
         | 
| 219 | 
            +
                            futures = []
         | 
| 220 | 
            +
                            
         | 
| 221 | 
            +
                            for threadindex in range(threads):
         | 
| 222 | 
            +
                                future = executor.submit(self.process_videoframes, threadindex, lambda: self.update_progress(progress))
         | 
| 223 | 
            +
                                futures.append(future)
         | 
| 224 | 
            +
                            
         | 
| 225 | 
            +
                            for future in as_completed(futures):
         | 
| 226 | 
            +
                                future.result()
         | 
| 227 | 
            +
                    # wait for the task to complete
         | 
| 228 | 
            +
                    readthread.join()
         | 
| 229 | 
            +
                    writethread.join()
         | 
| 230 | 
            +
                    cap.release()
         | 
| 231 | 
            +
                    self.videowriter.close()
         | 
| 232 | 
            +
                    self.frames_queue.clear()
         | 
| 233 | 
            +
                    self.processed_queue.clear()
         | 
| 234 | 
            +
             | 
| 235 | 
            +
             | 
| 236 | 
            +
             | 
| 237 | 
            +
             | 
| 238 | 
            +
                def update_progress(self, progress: Any = None) -> None:
         | 
| 239 | 
            +
                    process = psutil.Process(os.getpid())
         | 
| 240 | 
            +
                    memory_usage = process.memory_info().rss / 1024 / 1024 / 1024
         | 
| 241 | 
            +
                    msg = 'memory_usage: ' + '{:.2f}'.format(memory_usage).zfill(5) + f' GB execution_threads {self.num_threads}'
         | 
| 242 | 
            +
                    progress.set_postfix({
         | 
| 243 | 
            +
                        'memory_usage': '{:.2f}'.format(memory_usage).zfill(5) + 'GB',
         | 
| 244 | 
            +
                        'execution_threads': self.num_threads
         | 
| 245 | 
            +
                    })
         | 
| 246 | 
            +
                    progress.update(1)
         | 
| 247 | 
            +
                    self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
         | 
| 248 | 
            +
             | 
| 249 | 
            +
             | 
| 250 | 
            +
                def on_no_face_action(self, frame:Frame):
         | 
| 251 | 
            +
                    if roop.globals.no_face_action == 0:
         | 
| 252 | 
            +
                        return None, frame
         | 
| 253 | 
            +
                    elif roop.globals.no_face_action == 2:
         | 
| 254 | 
            +
                        return None, None
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                    
         | 
| 257 | 
            +
                    faces = get_all_faces(frame)
         | 
| 258 | 
            +
                    if faces is not None:
         | 
| 259 | 
            +
                        return faces, frame
         | 
| 260 | 
            +
                    return None, frame
         | 
| 261 | 
            +
                  
         | 
| 262 | 
            +
             | 
| 263 | 
            +
             | 
| 264 | 
            +
             | 
| 265 | 
            +
                def process_frame(self, frame:Frame):
         | 
| 266 | 
            +
                    if len(self.input_face_datas) < 1:
         | 
| 267 | 
            +
                        return frame
         | 
| 268 | 
            +
                
         | 
| 269 | 
            +
                    temp_frame = frame.copy()
         | 
| 270 | 
            +
                    num_swapped, temp_frame = self.swap_faces(frame, temp_frame)
         | 
| 271 | 
            +
                    if num_swapped > 0:
         | 
| 272 | 
            +
                        return temp_frame
         | 
| 273 | 
            +
                    if roop.globals.no_face_action == 0:
         | 
| 274 | 
            +
                        return frame
         | 
| 275 | 
            +
                    if roop.globals.no_face_action == 2:
         | 
| 276 | 
            +
                        return None
         | 
| 277 | 
            +
                    else:
         | 
| 278 | 
            +
                        copyframe = frame.copy()
         | 
| 279 | 
            +
                        copyframe = rotate_image_180(copyframe)
         | 
| 280 | 
            +
                        temp_frame = copyframe.copy()
         | 
| 281 | 
            +
                        num_swapped, temp_frame = self.swap_faces(copyframe, temp_frame)
         | 
| 282 | 
            +
                        if num_swapped == 0:
         | 
| 283 | 
            +
                            return frame
         | 
| 284 | 
            +
                        temp_frame = rotate_image_180(temp_frame)
         | 
| 285 | 
            +
                        return temp_frame
         | 
| 286 | 
            +
             | 
| 287 | 
            +
             | 
| 288 | 
            +
             | 
| 289 | 
            +
                def swap_faces(self, frame, temp_frame):
         | 
| 290 | 
            +
                    num_faces_found = 0
         | 
| 291 | 
            +
                    if self.options.swap_mode == "first":
         | 
| 292 | 
            +
                        face = get_first_face(frame)
         | 
| 293 | 
            +
                        if face is None:
         | 
| 294 | 
            +
                            return num_faces_found, frame
         | 
| 295 | 
            +
                        num_faces_found += 1
         | 
| 296 | 
            +
                        temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
         | 
| 297 | 
            +
             | 
| 298 | 
            +
                    else:
         | 
| 299 | 
            +
                        faces = get_all_faces(frame)
         | 
| 300 | 
            +
                        if faces is None:
         | 
| 301 | 
            +
                            return num_faces_found, frame
         | 
| 302 | 
            +
                        
         | 
| 303 | 
            +
                        if self.options.swap_mode == "all":
         | 
| 304 | 
            +
                            for face in faces:
         | 
| 305 | 
            +
                                num_faces_found += 1
         | 
| 306 | 
            +
                                temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
         | 
| 307 | 
            +
                                del face
         | 
| 308 | 
            +
                        
         | 
| 309 | 
            +
                        elif self.options.swap_mode == "selected":
         | 
| 310 | 
            +
                            for i,tf in enumerate(self.target_face_datas):
         | 
| 311 | 
            +
                                for face in faces:
         | 
| 312 | 
            +
                                    if compute_cosine_distance(tf.embedding, face.embedding) <= self.options.face_distance_threshold:
         | 
| 313 | 
            +
                                        if i < len(self.input_face_datas):
         | 
| 314 | 
            +
                                            temp_frame = self.process_face(i, face, temp_frame)
         | 
| 315 | 
            +
                                            num_faces_found += 1
         | 
| 316 | 
            +
                                        break
         | 
| 317 | 
            +
                                    del face
         | 
| 318 | 
            +
                        elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
         | 
| 319 | 
            +
                            gender = 'F' if self.options.swap_mode == "all_female" else 'M'
         | 
| 320 | 
            +
                            for face in faces:
         | 
| 321 | 
            +
                                if face.sex == gender:
         | 
| 322 | 
            +
                                    num_faces_found += 1
         | 
| 323 | 
            +
                                    temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
         | 
| 324 | 
            +
                                del face
         | 
| 325 | 
            +
             | 
| 326 | 
            +
                    if num_faces_found == 0:
         | 
| 327 | 
            +
                        return num_faces_found, frame
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                    maskprocessor = next((x for x in self.processors if x.processorname == 'clip2seg'), None)
         | 
| 330 | 
            +
                    if maskprocessor is not None:
         | 
| 331 | 
            +
                        temp_frame = self.process_mask(maskprocessor, frame, temp_frame)
         | 
| 332 | 
            +
                    return num_faces_found, temp_frame
         | 
| 333 | 
            +
             | 
| 334 | 
            +
             | 
| 335 | 
            +
                def process_face(self,face_index, target_face, frame:Frame):
         | 
| 336 | 
            +
                    enhanced_frame = None
         | 
| 337 | 
            +
                    inputface = self.input_face_datas[face_index].faces[0]
         | 
| 338 | 
            +
             | 
| 339 | 
            +
                    for p in self.processors:
         | 
| 340 | 
            +
                        if p.type == 'swap':
         | 
| 341 | 
            +
                            fake_frame = p.Run(inputface, target_face, frame)
         | 
| 342 | 
            +
                            scale_factor = 0.0
         | 
| 343 | 
            +
                        elif p.type == 'mask':
         | 
| 344 | 
            +
                            continue
         | 
| 345 | 
            +
                        else:
         | 
| 346 | 
            +
                            enhanced_frame, scale_factor = p.Run(self.input_face_datas[face_index], target_face, fake_frame)
         | 
| 347 | 
            +
             | 
| 348 | 
            +
                    upscale = 512
         | 
| 349 | 
            +
                    orig_width = fake_frame.shape[1]
         | 
| 350 | 
            +
                    fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
         | 
| 351 | 
            +
                    mask_offsets = inputface.mask_offsets
         | 
| 352 | 
            +
                    
         | 
| 353 | 
            +
                    if enhanced_frame is None:
         | 
| 354 | 
            +
                        scale_factor = int(upscale / orig_width)
         | 
| 355 | 
            +
                        result = self.paste_upscale(fake_frame, fake_frame, target_face.matrix, frame, scale_factor, mask_offsets)
         | 
| 356 | 
            +
                    else:
         | 
| 357 | 
            +
                        result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
         | 
| 358 | 
            +
                    return result
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                    
         | 
| 361 | 
            +
             | 
| 362 | 
            +
             | 
| 363 | 
            +
                def cutout(self, frame:Frame, start_x, start_y, end_x, end_y):
         | 
| 364 | 
            +
                    if start_x < 0:
         | 
| 365 | 
            +
                        start_x = 0
         | 
| 366 | 
            +
                    if start_y < 0:
         | 
| 367 | 
            +
                        start_y = 0
         | 
| 368 | 
            +
                    if end_x > frame.shape[1]:
         | 
| 369 | 
            +
                        end_x = frame.shape[1]
         | 
| 370 | 
            +
                    if end_y > frame.shape[0]:
         | 
| 371 | 
            +
                        end_y = frame.shape[0]
         | 
| 372 | 
            +
                    return frame[start_y:end_y, start_x:end_x], start_x, start_y, end_x, end_y
         | 
| 373 | 
            +
             | 
| 374 | 
            +
                    
         | 
| 375 | 
            +
                
         | 
| 376 | 
            +
                # Paste back adapted from here
         | 
| 377 | 
            +
                # https://github.com/fAIseh00d/refacer/blob/main/refacer.py
         | 
| 378 | 
            +
                # which is revised insightface paste back code
         | 
| 379 | 
            +
             | 
| 380 | 
            +
                def paste_upscale(self, fake_face, upsk_face, M, target_img, scale_factor, mask_offsets):
         | 
| 381 | 
            +
                    M_scale = M * scale_factor
         | 
| 382 | 
            +
                    IM = cv2.invertAffineTransform(M_scale)
         | 
| 383 | 
            +
             | 
| 384 | 
            +
                    face_matte = np.full((target_img.shape[0],target_img.shape[1]), 255, dtype=np.uint8)
         | 
| 385 | 
            +
                    ##Generate white square sized as a upsk_face
         | 
| 386 | 
            +
                    img_matte = np.full((upsk_face.shape[0],upsk_face.shape[1]), 255, dtype=np.uint8)
         | 
| 387 | 
            +
                    if mask_offsets[0] > 0:
         | 
| 388 | 
            +
                        img_matte[:mask_offsets[0],:] = 0
         | 
| 389 | 
            +
                    if mask_offsets[1] > 0:
         | 
| 390 | 
            +
                        img_matte[-mask_offsets[1]:,:] = 0
         | 
| 391 | 
            +
             | 
| 392 | 
            +
                    ##Transform white square back to target_img
         | 
| 393 | 
            +
                    img_matte = cv2.warpAffine(img_matte, IM, (target_img.shape[1], target_img.shape[0]), flags=cv2.INTER_NEAREST, borderValue=0.0) 
         | 
| 394 | 
            +
                    ##Blacken the edges of face_matte by 1 pixels (so the mask in not expanded on the image edges)
         | 
| 395 | 
            +
                    img_matte[:1,:] = img_matte[-1:,:] = img_matte[:,:1] = img_matte[:,-1:] = 0
         | 
| 396 | 
            +
             | 
| 397 | 
            +
                    #Detect the affine transformed white area
         | 
| 398 | 
            +
                    mask_h_inds, mask_w_inds = np.where(img_matte==255) 
         | 
| 399 | 
            +
                    #Calculate the size (and diagonal size) of transformed white area width and height boundaries
         | 
| 400 | 
            +
                    mask_h = np.max(mask_h_inds) - np.min(mask_h_inds) 
         | 
| 401 | 
            +
                    mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
         | 
| 402 | 
            +
                    mask_size = int(np.sqrt(mask_h*mask_w))
         | 
| 403 | 
            +
                    #Calculate the kernel size for eroding img_matte by kernel (insightface empirical guess for best size was max(mask_size//10,10))
         | 
| 404 | 
            +
                    # k = max(mask_size//12, 8)
         | 
| 405 | 
            +
                    k = max(mask_size//10, 10)
         | 
| 406 | 
            +
                    kernel = np.ones((k,k),np.uint8)
         | 
| 407 | 
            +
                    img_matte = cv2.erode(img_matte,kernel,iterations = 1)
         | 
| 408 | 
            +
                    #Calculate the kernel size for blurring img_matte by blur_size (insightface empirical guess for best size was max(mask_size//20, 5))
         | 
| 409 | 
            +
                    # k = max(mask_size//24, 4) 
         | 
| 410 | 
            +
                    k = max(mask_size//20, 5) 
         | 
| 411 | 
            +
                    kernel_size = (k, k)
         | 
| 412 | 
            +
                    blur_size = tuple(2*i+1 for i in kernel_size)
         | 
| 413 | 
            +
                    img_matte = cv2.GaussianBlur(img_matte, blur_size, 0)
         | 
| 414 | 
            +
                    
         | 
| 415 | 
            +
                    #Normalize images to float values and reshape
         | 
| 416 | 
            +
                    img_matte = img_matte.astype(np.float32)/255
         | 
| 417 | 
            +
                    face_matte = face_matte.astype(np.float32)/255
         | 
| 418 | 
            +
                    img_matte = np.minimum(face_matte, img_matte)
         | 
| 419 | 
            +
                    img_matte = np.reshape(img_matte, [img_matte.shape[0],img_matte.shape[1],1]) 
         | 
| 420 | 
            +
                    ##Transform upcaled face back to target_img
         | 
| 421 | 
            +
                    paste_face = cv2.warpAffine(upsk_face, IM, (target_img.shape[1], target_img.shape[0]), borderMode=cv2.BORDER_REPLICATE)
         | 
| 422 | 
            +
                    if upsk_face is not fake_face:
         | 
| 423 | 
            +
                        fake_face = cv2.warpAffine(fake_face, IM, (target_img.shape[1], target_img.shape[0]), borderMode=cv2.BORDER_REPLICATE)
         | 
| 424 | 
            +
                        paste_face = cv2.addWeighted(paste_face, self.options.blend_ratio, fake_face, 1.0 - self.options.blend_ratio, 0)
         | 
| 425 | 
            +
             | 
| 426 | 
            +
                    ##Re-assemble image
         | 
| 427 | 
            +
                    paste_face = img_matte * paste_face
         | 
| 428 | 
            +
                    paste_face = paste_face + (1-img_matte) * target_img.astype(np.float32)
         | 
| 429 | 
            +
                    del img_matte
         | 
| 430 | 
            +
                    del face_matte
         | 
| 431 | 
            +
                    del upsk_face
         | 
| 432 | 
            +
                    del fake_face
         | 
| 433 | 
            +
                    return paste_face.astype(np.uint8)
         | 
| 434 | 
            +
             | 
| 435 | 
            +
             | 
| 436 | 
            +
                def process_mask(self, processor, frame:Frame, target:Frame):
         | 
| 437 | 
            +
                    img_mask = processor.Run(frame, self.options.masking_text)
         | 
| 438 | 
            +
                    img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
         | 
| 439 | 
            +
                    img_mask = np.reshape(img_mask, [img_mask.shape[0],img_mask.shape[1],1])
         | 
| 440 | 
            +
             | 
| 441 | 
            +
                    target = target.astype(np.float32)
         | 
| 442 | 
            +
                    result = (1-img_mask) * target
         | 
| 443 | 
            +
                    result += img_mask * frame.astype(np.float32)
         | 
| 444 | 
            +
                    return np.uint8(result)
         | 
| 445 | 
            +
             | 
| 446 | 
            +
                        
         | 
| 447 | 
            +
             | 
| 448 | 
            +
             | 
| 449 | 
            +
                def unload_models():
         | 
| 450 | 
            +
                    pass
         | 
| 451 | 
            +
             | 
| 452 | 
            +
             | 
| 453 | 
            +
                def release_resources(self):
         | 
| 454 | 
            +
                    for p in self.processors:
         | 
| 455 | 
            +
                        p.Release()
         | 
| 456 | 
            +
                    self.processors.clear()
         | 
| 457 | 
            +
             | 
    	
        roop/ProcessOptions.py
    ADDED
    
    | @@ -0,0 +1,9 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            class ProcessOptions:
         | 
| 2 | 
            +
             | 
| 3 | 
            +
                def __init__(self,processors, face_distance,  blend_ratio, swap_mode, selected_index, masking_text):
         | 
| 4 | 
            +
                    self.processors = processors
         | 
| 5 | 
            +
                    self.face_distance_threshold = face_distance
         | 
| 6 | 
            +
                    self.blend_ratio = blend_ratio
         | 
| 7 | 
            +
                    self.swap_mode = swap_mode
         | 
| 8 | 
            +
                    self.selected_index = selected_index
         | 
| 9 | 
            +
                    self.masking_text = masking_text
         | 
    	
        roop/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        roop/__pycache__/FaceSet.cpython-310.pyc
    ADDED
    
    | Binary file (1 kB). View file | 
|  | 
    	
        roop/__pycache__/ProcessEntry.cpython-310.pyc
    ADDED
    
    | Binary file (575 Bytes). View file | 
|  | 
    	
        roop/__pycache__/ProcessMgr.cpython-310.pyc
    ADDED
    
    | Binary file (12.2 kB). View file | 
|  | 
    	
        roop/__pycache__/ProcessOptions.cpython-310.pyc
    ADDED
    
    | Binary file (595 Bytes). View file | 
|  | 
    	
        roop/__pycache__/__init__.cpython-310.pyc
    ADDED
    
    | Binary file (133 Bytes). View file | 
|  | 
    	
        roop/__pycache__/capturer.cpython-310.pyc
    ADDED
    
    | Binary file (1.08 kB). View file | 
|  | 
    	
        roop/__pycache__/core.cpython-310.pyc
    ADDED
    
    | Binary file (10.5 kB). View file | 
|  | 
    	
        roop/__pycache__/face_util.cpython-310.pyc
    ADDED
    
    | Binary file (8.1 kB). View file | 
|  | 
    	
        roop/__pycache__/ffmpeg_writer.cpython-310.pyc
    ADDED
    
    | Binary file (5.62 kB). View file | 
|  | 
    	
        roop/__pycache__/globals.cpython-310.pyc
    ADDED
    
    | Binary file (1.07 kB). View file | 
|  | 
    	
        roop/__pycache__/metadata.cpython-310.pyc
    ADDED
    
    | Binary file (178 Bytes). View file | 
|  | 
    	
        roop/__pycache__/template_parser.cpython-310.pyc
    ADDED
    
    | Binary file (1.09 kB). View file | 
|  | 
    	
        roop/__pycache__/typing.cpython-310.pyc
    ADDED
    
    | Binary file (321 Bytes). View file | 
|  | 
    	
        roop/__pycache__/util_ffmpeg.cpython-310.pyc
    ADDED
    
    | Binary file (3.84 kB). View file | 
|  | 
    	
        roop/__pycache__/utilities.cpython-310.pyc
    ADDED
    
    | Binary file (10.9 kB). View file | 
|  | 
    	
        roop/capturer.py
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from typing import Optional
         | 
| 2 | 
            +
            import cv2
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            from roop.typing import Frame
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            def get_image_frame(filename: str):
         | 
| 7 | 
            +
                try:
         | 
| 8 | 
            +
                    frame = cv2.imread(filename)
         | 
| 9 | 
            +
                    return frame
         | 
| 10 | 
            +
                except:
         | 
| 11 | 
            +
                    print(f"Exception reading {filename}")
         | 
| 12 | 
            +
                return None
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                
         | 
| 15 | 
            +
            def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
         | 
| 16 | 
            +
                capture = cv2.VideoCapture(video_path)
         | 
| 17 | 
            +
                frame_total = capture.get(cv2.CAP_PROP_FRAME_COUNT)
         | 
| 18 | 
            +
                capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1))
         | 
| 19 | 
            +
                has_frame, frame = capture.read()
         | 
| 20 | 
            +
                capture.release()
         | 
| 21 | 
            +
                if has_frame:
         | 
| 22 | 
            +
                    return frame
         | 
| 23 | 
            +
                return None
         | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            def get_video_frame_total(video_path: str) -> int:
         | 
| 27 | 
            +
                capture = cv2.VideoCapture(video_path)
         | 
| 28 | 
            +
                video_frame_total = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
         | 
| 29 | 
            +
                capture.release()
         | 
| 30 | 
            +
                return video_frame_total
         | 
    	
        roop/core.py
    ADDED
    
    | @@ -0,0 +1,360 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/env python3
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import os
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            import shutil
         | 
| 6 | 
            +
            # single thread doubles cuda performance - needs to be set before torch import
         | 
| 7 | 
            +
            if any(arg.startswith('--execution-provider') for arg in sys.argv):
         | 
| 8 | 
            +
                os.environ['OMP_NUM_THREADS'] = '1'
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            import warnings
         | 
| 11 | 
            +
            from typing import List
         | 
| 12 | 
            +
            import platform
         | 
| 13 | 
            +
            import signal
         | 
| 14 | 
            +
            import torch
         | 
| 15 | 
            +
            import onnxruntime
         | 
| 16 | 
            +
            import pathlib
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            from time import time
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            import roop.globals
         | 
| 21 | 
            +
            import roop.metadata
         | 
| 22 | 
            +
            import roop.utilities as util
         | 
| 23 | 
            +
            import roop.util_ffmpeg as ffmpeg
         | 
| 24 | 
            +
            import ui.main as main
         | 
| 25 | 
            +
            from settings import Settings
         | 
| 26 | 
            +
            from roop.face_util import extract_face_images
         | 
| 27 | 
            +
            from roop.ProcessEntry import ProcessEntry
         | 
| 28 | 
            +
            from roop.ProcessMgr import ProcessMgr
         | 
| 29 | 
            +
            from roop.ProcessOptions import ProcessOptions
         | 
| 30 | 
            +
            from roop.capturer import get_video_frame_total
         | 
| 31 | 
            +
             | 
| 32 | 
            +
             | 
| 33 | 
            +
            clip_text = None
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            call_display_ui = None
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            process_mgr = None
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 40 | 
            +
            if 'ROCMExecutionProvider' in roop.globals.execution_providers:
         | 
| 41 | 
            +
                del torch
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            warnings.filterwarnings('ignore', category=FutureWarning, module='insightface')
         | 
| 44 | 
            +
            warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
         | 
| 45 | 
            +
             | 
| 46 | 
            +
             | 
| 47 | 
            +
            def parse_args() -> None:
         | 
| 48 | 
            +
                signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
         | 
| 49 | 
            +
                roop.globals.headless = False
         | 
| 50 | 
            +
                # Always enable all processors when using GUI
         | 
| 51 | 
            +
                if len(sys.argv) > 1:
         | 
| 52 | 
            +
                    print('No CLI args supported - use Settings Tab instead')
         | 
| 53 | 
            +
                roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 56 | 
            +
            def encode_execution_providers(execution_providers: List[str]) -> List[str]:
         | 
| 57 | 
            +
                return [execution_provider.replace('ExecutionProvider', '').lower() for execution_provider in execution_providers]
         | 
| 58 | 
            +
             | 
| 59 | 
            +
             | 
| 60 | 
            +
            def decode_execution_providers(execution_providers: List[str]) -> List[str]:
         | 
| 61 | 
            +
                return [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
         | 
| 62 | 
            +
                        if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
         | 
| 63 | 
            +
             | 
| 64 | 
            +
             | 
| 65 | 
            +
            def suggest_max_memory() -> int:
         | 
| 66 | 
            +
                if platform.system().lower() == 'darwin':
         | 
| 67 | 
            +
                    return 4
         | 
| 68 | 
            +
                return 16
         | 
| 69 | 
            +
             | 
| 70 | 
            +
             | 
| 71 | 
            +
            def suggest_execution_providers() -> List[str]:
         | 
| 72 | 
            +
                return encode_execution_providers(onnxruntime.get_available_providers())
         | 
| 73 | 
            +
             | 
| 74 | 
            +
             | 
| 75 | 
            +
            def suggest_execution_threads() -> int:
         | 
| 76 | 
            +
                if 'DmlExecutionProvider' in roop.globals.execution_providers:
         | 
| 77 | 
            +
                    return 1
         | 
| 78 | 
            +
                if 'ROCMExecutionProvider' in roop.globals.execution_providers:
         | 
| 79 | 
            +
                    return 1
         | 
| 80 | 
            +
                return 8
         | 
| 81 | 
            +
             | 
| 82 | 
            +
             | 
| 83 | 
            +
            def limit_resources() -> None:
         | 
| 84 | 
            +
                # limit memory usage
         | 
| 85 | 
            +
                if roop.globals.max_memory:
         | 
| 86 | 
            +
                    memory = roop.globals.max_memory * 1024 ** 3
         | 
| 87 | 
            +
                    if platform.system().lower() == 'darwin':
         | 
| 88 | 
            +
                        memory = roop.globals.max_memory * 1024 ** 6
         | 
| 89 | 
            +
                    if platform.system().lower() == 'windows':
         | 
| 90 | 
            +
                        import ctypes
         | 
| 91 | 
            +
                        kernel32 = ctypes.windll.kernel32  # type: ignore[attr-defined]
         | 
| 92 | 
            +
                        kernel32.SetProcessWorkingSetSize(-1, ctypes.c_size_t(memory), ctypes.c_size_t(memory))
         | 
| 93 | 
            +
                    else:
         | 
| 94 | 
            +
                        import resource
         | 
| 95 | 
            +
                        resource.setrlimit(resource.RLIMIT_DATA, (memory, memory))
         | 
| 96 | 
            +
             | 
| 97 | 
            +
             | 
| 98 | 
            +
             | 
| 99 | 
            +
            def release_resources() -> None:
         | 
| 100 | 
            +
                import gc
         | 
| 101 | 
            +
                global process_mgr
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                if process_mgr is not None:
         | 
| 104 | 
            +
                    process_mgr.release_resources()
         | 
| 105 | 
            +
                    process_mgr = None
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                gc.collect()
         | 
| 108 | 
            +
                # if 'CUDAExecutionProvider' in roop.globals.execution_providers and torch.cuda.is_available():
         | 
| 109 | 
            +
                #     with torch.cuda.device('cuda'):
         | 
| 110 | 
            +
                #         torch.cuda.empty_cache()
         | 
| 111 | 
            +
                #         torch.cuda.ipc_collect()
         | 
| 112 | 
            +
             | 
| 113 | 
            +
             | 
| 114 | 
            +
            def pre_check() -> bool:
         | 
| 115 | 
            +
                if sys.version_info < (3, 9):
         | 
| 116 | 
            +
                    update_status('Python version is not supported - please upgrade to 3.9 or higher.')
         | 
| 117 | 
            +
                    return False
         | 
| 118 | 
            +
                
         | 
| 119 | 
            +
                download_directory_path = util.resolve_relative_path('../models')
         | 
| 120 | 
            +
                util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx'])
         | 
| 121 | 
            +
                util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GFPGANv1.4.onnx'])
         | 
| 122 | 
            +
                util.conditional_download(download_directory_path, ['https://github.com/csxmli2016/DMDNet/releases/download/v1/DMDNet.pth'])
         | 
| 123 | 
            +
                util.conditional_download(download_directory_path, ['https://github.com/facefusion/facefusion-assets/releases/download/models/GPEN-BFR-512.onnx'])
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                download_directory_path = util.resolve_relative_path('../models/CLIP')
         | 
| 126 | 
            +
                util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/rd64-uni-refined.pth'])
         | 
| 127 | 
            +
                download_directory_path = util.resolve_relative_path('../models/CodeFormer')
         | 
| 128 | 
            +
                util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/CodeFormerv0.1.onnx'])
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                if not shutil.which('ffmpeg'):
         | 
| 131 | 
            +
                   update_status('ffmpeg is not installed.')
         | 
| 132 | 
            +
                return True
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            def set_display_ui(function):
         | 
| 135 | 
            +
                global call_display_ui
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                call_display_ui = function
         | 
| 138 | 
            +
             | 
| 139 | 
            +
             | 
| 140 | 
            +
            def update_status(message: str) -> None:
         | 
| 141 | 
            +
                global call_display_ui
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                print(message)
         | 
| 144 | 
            +
                if call_display_ui is not None:
         | 
| 145 | 
            +
                    call_display_ui(message)
         | 
| 146 | 
            +
             | 
| 147 | 
            +
             | 
| 148 | 
            +
             | 
| 149 | 
            +
             | 
| 150 | 
            +
            def start() -> None:
         | 
| 151 | 
            +
                if roop.globals.headless:
         | 
| 152 | 
            +
                    print('Headless mode currently unsupported - starting UI!')
         | 
| 153 | 
            +
                    # faces = extract_face_images(roop.globals.source_path,  (False, 0))
         | 
| 154 | 
            +
                    # roop.globals.INPUT_FACES.append(faces[roop.globals.source_face_index])
         | 
| 155 | 
            +
                    # faces = extract_face_images(roop.globals.target_path,  (False, util.has_image_extension(roop.globals.target_path)))
         | 
| 156 | 
            +
                    # roop.globals.TARGET_FACES.append(faces[roop.globals.target_face_index])
         | 
| 157 | 
            +
                    # if 'face_enhancer' in roop.globals.frame_processors:
         | 
| 158 | 
            +
                    #     roop.globals.selected_enhancer = 'GFPGAN'
         | 
| 159 | 
            +
                   
         | 
| 160 | 
            +
                batch_process(None, False, None)
         | 
| 161 | 
            +
             | 
| 162 | 
            +
             | 
| 163 | 
            +
            def get_processing_plugins(use_clip):
         | 
| 164 | 
            +
                processors = "faceswap"
         | 
| 165 | 
            +
                if use_clip:
         | 
| 166 | 
            +
                    processors += ",mask_clip2seg"
         | 
| 167 | 
            +
                
         | 
| 168 | 
            +
                if roop.globals.selected_enhancer == 'GFPGAN':
         | 
| 169 | 
            +
                    processors += ",gfpgan"
         | 
| 170 | 
            +
                elif roop.globals.selected_enhancer == 'Codeformer':
         | 
| 171 | 
            +
                    processors += ",codeformer"
         | 
| 172 | 
            +
                elif roop.globals.selected_enhancer == 'DMDNet':
         | 
| 173 | 
            +
                    processors += ",dmdnet"
         | 
| 174 | 
            +
                elif roop.globals.selected_enhancer == 'GPEN':
         | 
| 175 | 
            +
                    processors += ",gpen"
         | 
| 176 | 
            +
                return processors
         | 
| 177 | 
            +
             | 
| 178 | 
            +
             | 
| 179 | 
            +
            def live_swap(frame, swap_mode, use_clip, clip_text, selected_index = 0):
         | 
| 180 | 
            +
                global process_mgr
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                if frame is None:
         | 
| 183 | 
            +
                    return frame
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                if process_mgr is None:
         | 
| 186 | 
            +
                    process_mgr = ProcessMgr(None)
         | 
| 187 | 
            +
                
         | 
| 188 | 
            +
                options = ProcessOptions(get_processing_plugins(use_clip), roop.globals.distance_threshold, roop.globals.blend_ratio, swap_mode, selected_index, clip_text)
         | 
| 189 | 
            +
                process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
         | 
| 190 | 
            +
                newframe = process_mgr.process_frame(frame)
         | 
| 191 | 
            +
                if newframe is None:
         | 
| 192 | 
            +
                    return frame
         | 
| 193 | 
            +
                return newframe
         | 
| 194 | 
            +
             | 
| 195 | 
            +
             | 
| 196 | 
            +
            def preview_mask(frame, clip_text):
         | 
| 197 | 
            +
                import numpy as np
         | 
| 198 | 
            +
                global process_mgr
         | 
| 199 | 
            +
                
         | 
| 200 | 
            +
                maskimage = np.zeros((frame.shape), np.uint8)
         | 
| 201 | 
            +
                if process_mgr is None:
         | 
| 202 | 
            +
                    process_mgr = ProcessMgr(None)
         | 
| 203 | 
            +
                options = ProcessOptions("mask_clip2seg", roop.globals.distance_threshold, roop.globals.blend_ratio, "None", 0, clip_text)
         | 
| 204 | 
            +
                process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
         | 
| 205 | 
            +
                maskprocessor = next((x for x in process_mgr.processors if x.processorname == 'clip2seg'), None)
         | 
| 206 | 
            +
                return process_mgr.process_mask(maskprocessor, frame, maskimage)
         | 
| 207 | 
            +
                
         | 
| 208 | 
            +
             | 
| 209 | 
            +
             | 
| 210 | 
            +
             | 
| 211 | 
            +
             | 
| 212 | 
            +
            def batch_process(files:list[ProcessEntry], use_clip, new_clip_text, use_new_method, progress) -> None:
         | 
| 213 | 
            +
                global clip_text, process_mgr
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                roop.globals.processing = True
         | 
| 216 | 
            +
                release_resources()
         | 
| 217 | 
            +
                limit_resources()
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                # limit threads for some providers
         | 
| 220 | 
            +
                max_threads = suggest_execution_threads()
         | 
| 221 | 
            +
                if max_threads == 1:
         | 
| 222 | 
            +
                    roop.globals.execution_threads = 1
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                imagefiles:list[ProcessEntry] = []
         | 
| 225 | 
            +
                videofiles:list[ProcessEntry] = []
         | 
| 226 | 
            +
                       
         | 
| 227 | 
            +
                update_status('Sorting videos/images')
         | 
| 228 | 
            +
             | 
| 229 | 
            +
             | 
| 230 | 
            +
                for index, f in enumerate(files):
         | 
| 231 | 
            +
                    fullname = f.filename
         | 
| 232 | 
            +
                    if util.has_image_extension(fullname):
         | 
| 233 | 
            +
                        destination = util.get_destfilename_from_path(fullname, roop.globals.output_path, f'.{roop.globals.CFG.output_image_format}')
         | 
| 234 | 
            +
                        destination = util.replace_template(destination, index=index)
         | 
| 235 | 
            +
                        pathlib.Path(os.path.dirname(destination)).mkdir(parents=True, exist_ok=True)
         | 
| 236 | 
            +
                        f.finalname = destination
         | 
| 237 | 
            +
                        imagefiles.append(f)
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                    elif util.is_video(fullname) or util.has_extension(fullname, ['gif']):
         | 
| 240 | 
            +
                        destination = util.get_destfilename_from_path(fullname, roop.globals.output_path, f'__temp.{roop.globals.CFG.output_video_format}')
         | 
| 241 | 
            +
                        f.finalname = destination
         | 
| 242 | 
            +
                        videofiles.append(f)
         | 
| 243 | 
            +
             | 
| 244 | 
            +
             | 
| 245 | 
            +
                if process_mgr is None:
         | 
| 246 | 
            +
                    process_mgr = ProcessMgr(progress)
         | 
| 247 | 
            +
                
         | 
| 248 | 
            +
                options = ProcessOptions(get_processing_plugins(use_clip), roop.globals.distance_threshold, roop.globals.blend_ratio, roop.globals.face_swap_mode, 0, new_clip_text)
         | 
| 249 | 
            +
                process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                if(len(imagefiles) > 0):
         | 
| 252 | 
            +
                    update_status('Processing image(s)')
         | 
| 253 | 
            +
                    origimages = []
         | 
| 254 | 
            +
                    fakeimages = []
         | 
| 255 | 
            +
                    for f in imagefiles:
         | 
| 256 | 
            +
                        origimages.append(f.filename)
         | 
| 257 | 
            +
                        fakeimages.append(f.finalname)
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                    process_mgr.run_batch(origimages, fakeimages, roop.globals.execution_threads)
         | 
| 260 | 
            +
                    origimages.clear()
         | 
| 261 | 
            +
                    fakeimages.clear()
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                if(len(videofiles) > 0):
         | 
| 264 | 
            +
                    for index,v in enumerate(videofiles):
         | 
| 265 | 
            +
                        if not roop.globals.processing:
         | 
| 266 | 
            +
                            end_processing('Processing stopped!')
         | 
| 267 | 
            +
                            return
         | 
| 268 | 
            +
                        fps = v.fps if v.fps > 0 else util.detect_fps(v.filename)
         | 
| 269 | 
            +
                        if v.endframe == 0:
         | 
| 270 | 
            +
                            v.endframe = get_video_frame_total(v.filename)
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                        update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
         | 
| 273 | 
            +
                        start_processing = time()
         | 
| 274 | 
            +
                        if roop.globals.keep_frames or not use_new_method:
         | 
| 275 | 
            +
                            util.create_temp(v.filename)
         | 
| 276 | 
            +
                            update_status('Extracting frames...')
         | 
| 277 | 
            +
                            ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
         | 
| 278 | 
            +
                            if not roop.globals.processing:
         | 
| 279 | 
            +
                                end_processing('Processing stopped!')
         | 
| 280 | 
            +
                                return
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                            temp_frame_paths = util.get_temp_frame_paths(v.filename)
         | 
| 283 | 
            +
                            process_mgr.run_batch(temp_frame_paths, temp_frame_paths, roop.globals.execution_threads)
         | 
| 284 | 
            +
                            if not roop.globals.processing:
         | 
| 285 | 
            +
                                end_processing('Processing stopped!')
         | 
| 286 | 
            +
                                return
         | 
| 287 | 
            +
                            if roop.globals.wait_after_extraction:
         | 
| 288 | 
            +
                                extract_path = os.path.dirname(temp_frame_paths[0])
         | 
| 289 | 
            +
                                util.open_folder(extract_path)
         | 
| 290 | 
            +
                                input("Press any key to continue...")
         | 
| 291 | 
            +
                                print("Resorting frames to create video")
         | 
| 292 | 
            +
                                util.sort_rename_frames(extract_path)                                    
         | 
| 293 | 
            +
                            
         | 
| 294 | 
            +
                            ffmpeg.create_video(v.filename, f.finalname, fps)
         | 
| 295 | 
            +
                            if not roop.globals.keep_frames:
         | 
| 296 | 
            +
                                util.delete_temp_frames(temp_frame_paths[0])
         | 
| 297 | 
            +
                        else:
         | 
| 298 | 
            +
                            if util.has_extension(v.filename, ['gif']):
         | 
| 299 | 
            +
                                skip_audio = True
         | 
| 300 | 
            +
                            else:
         | 
| 301 | 
            +
                                skip_audio = roop.globals.skip_audio
         | 
| 302 | 
            +
                            process_mgr.run_batch_inmem(v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads, skip_audio)
         | 
| 303 | 
            +
                            
         | 
| 304 | 
            +
                        if not roop.globals.processing:
         | 
| 305 | 
            +
                            end_processing('Processing stopped!')
         | 
| 306 | 
            +
                            return
         | 
| 307 | 
            +
                        
         | 
| 308 | 
            +
                        video_file_name = v.finalname
         | 
| 309 | 
            +
                        if os.path.isfile(video_file_name):
         | 
| 310 | 
            +
                            destination = ''
         | 
| 311 | 
            +
                            if util.has_extension(v.filename, ['gif']):
         | 
| 312 | 
            +
                                gifname = util.get_destfilename_from_path(v.filename, roop.globals.output_path, '.gif')
         | 
| 313 | 
            +
                                destination = util.replace_template(gifname, index=index)
         | 
| 314 | 
            +
                                pathlib.Path(os.path.dirname(destination)).mkdir(parents=True, exist_ok=True)
         | 
| 315 | 
            +
             | 
| 316 | 
            +
                                update_status('Creating final GIF')
         | 
| 317 | 
            +
                                ffmpeg.create_gif_from_video(video_file_name, destination)
         | 
| 318 | 
            +
                                if os.path.isfile(destination):
         | 
| 319 | 
            +
                                    os.remove(video_file_name)
         | 
| 320 | 
            +
                            else:
         | 
| 321 | 
            +
                                skip_audio = roop.globals.skip_audio
         | 
| 322 | 
            +
                                destination = util.replace_template(video_file_name, index=index)
         | 
| 323 | 
            +
                                pathlib.Path(os.path.dirname(destination)).mkdir(parents=True, exist_ok=True)
         | 
| 324 | 
            +
             | 
| 325 | 
            +
                                if not skip_audio:
         | 
| 326 | 
            +
                                    ffmpeg.restore_audio(video_file_name, v.filename, v.startframe, v.endframe, destination)
         | 
| 327 | 
            +
                                    if os.path.isfile(destination):
         | 
| 328 | 
            +
                                        os.remove(video_file_name)
         | 
| 329 | 
            +
                                else:
         | 
| 330 | 
            +
                                    shutil.move(video_file_name, destination)
         | 
| 331 | 
            +
                            update_status(f'\nProcessing {os.path.basename(destination)} took {time() - start_processing} secs')
         | 
| 332 | 
            +
             | 
| 333 | 
            +
                        else:
         | 
| 334 | 
            +
                            update_status(f'Failed processing {os.path.basename(v.finalname)}!')
         | 
| 335 | 
            +
                end_processing('Finished')
         | 
| 336 | 
            +
             | 
| 337 | 
            +
             | 
| 338 | 
            +
            def end_processing(msg:str):
         | 
| 339 | 
            +
                update_status(msg)
         | 
| 340 | 
            +
                roop.globals.target_folder_path = None
         | 
| 341 | 
            +
                release_resources()
         | 
| 342 | 
            +
             | 
| 343 | 
            +
             | 
| 344 | 
            +
            def destroy() -> None:
         | 
| 345 | 
            +
                if roop.globals.target_path:
         | 
| 346 | 
            +
                    util.clean_temp(roop.globals.target_path)
         | 
| 347 | 
            +
                release_resources()        
         | 
| 348 | 
            +
                sys.exit()
         | 
| 349 | 
            +
             | 
| 350 | 
            +
             | 
| 351 | 
            +
            def run() -> None:
         | 
| 352 | 
            +
                parse_args()
         | 
| 353 | 
            +
                if not pre_check():
         | 
| 354 | 
            +
                    return
         | 
| 355 | 
            +
                roop.globals.CFG = Settings('config.yaml')
         | 
| 356 | 
            +
                roop.globals.execution_threads = roop.globals.CFG.max_threads
         | 
| 357 | 
            +
                roop.globals.video_encoder = roop.globals.CFG.output_video_codec
         | 
| 358 | 
            +
                roop.globals.video_quality = roop.globals.CFG.video_quality
         | 
| 359 | 
            +
                roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
         | 
| 360 | 
            +
                main.run()
         |