Spaces:

leonelhs
/

faceparser

Runtime error

App Files Files Community

leonelhs commited on 19 days ago

Commit

75b33dc

1 Parent(s): f2d7a5e

source bisnet is added internally

Browse files

Files changed (7) hide show

.gitignore +3 -0
README.md +3 -4
app.py +54 -29
bisnet/__init__.py +39 -0
bisnet/model.py +321 -0
bisnet/resnet.py +145 -0
requirements.txt +7 -5

.gitignore CHANGED Viewed

@@ -1,2 +1,5 @@
 .idea/
 __pycache__/

 .idea/
 __pycache__/
+.gradio
+playground.py
+resnet18-5c106cde.pth

README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 ---
 title: Face Parser
 emoji: 👁
-colorFrom: blue
 colorTo: green
 sdk: gradio
-sdk_version: 3.34.0
 app_file: app.py
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Face Parser
 emoji: 👁
+colorFrom: green
 colorTo: green
 sdk: gradio
+sdk_version: 5.46.1
 app_file: app.py
 pinned: false
 license: mit
+short_description: Extracts facial features (hair, nose, eyes, etc.)
 ---

app.py CHANGED Viewed

@@ -1,23 +1,42 @@
-import os
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
-from bisnet import BiSeNet
-from huggingface_hub import snapshot_download
 from utils import vis_parsing_maps, decode_segmentation_masks, image_to_tensor
-os.system("pip freeze")
 REPO_ID = "leonelhs/faceparser"
 MODEL_NAME = "79999_iter.pth"
 model = BiSeNet(n_classes=19)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-snapshot_folder = snapshot_download(repo_id=REPO_ID)
-model_path = os.path.join(snapshot_folder, MODEL_NAME)
 model.load_state_dict(torch.load(model_path, map_location=device))
 model.eval()
@@ -47,33 +66,39 @@ def predict(image):
     return overlay
-title = "Face Parser"
-description = r"""
-## Image face parser for research
-This is an implementation of <a href='https://github.com/zllrunning/face-parsing.PyTorch' target='_blank'>face-parsing.PyTorch</a>.
-It has no any particular purpose than start research on AI models.
 """
-article = r"""
-Questions, doubts, comments, please email 📧 `[email protected]`
-This demo is running on a CPU, if you like this project please make us a donation to run on a GPU or just give us a <a href='https://github.com/leonelhs/zeroscratches/' target='_blank'>Github ⭐</a>
-<a href="https://www.buymeacoffee.com/leonelhs"><img src="https://img.buymeacoffee.com/button-api/?text=Buy me a coffee&emoji=&slug=leonelhs&button_colour=FFDD00&font_colour=000000&font_family=Cookie&outline_colour=000000&coffee_colour=ffffff" /></a>
-<center><img src='https://visitor-badge.glitch.me/badge?page_id=zeroscratches.visitor-badge' alt='visitor badge'></center>
-"""
-demo = gr.Interface(
-    predict, [
-        gr.Image(type="pil", label="Input"),
-    ], [
-        gr.Image(type="numpy", label="Image face parsed")
-    ],
-    title=title,
-    description=description,
-    article=article)
-demo.queue().launch()

+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+#######################################################################################
+#
+# This project is one of several repositories exploring image segmentation techniques.
+# All related projects and interactive demos can be found at:
+# https://huggingface.co/spaces/leonelhs/removators
+# Self app: https://huggingface.co/spaces/leonelhs/rembg
+#
+# Source code is based on or inspired by several projects.
+# For more details and proper attribution, please refer to the following resources:
+#
+# - [face-makeup.PyTorch] - [https://github.com/zllrunning/face-makeup.PyTorch]
+# - [BiSeNet] [https://github.com/CoinCheung/BiSeNet]
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
+from huggingface_hub import hf_hub_download
+from bisnet import BiSeNet
 from utils import vis_parsing_maps, decode_segmentation_masks, image_to_tensor
 REPO_ID = "leonelhs/faceparser"
 MODEL_NAME = "79999_iter.pth"
 model = BiSeNet(n_classes=19)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME)
 model.load_state_dict(torch.load(model_path, map_location=device))
 model.eval()
     return overlay
+aboutme = r"""
+# PyTorch Image Face Parser
+Extracts facial features (hair, nose, eyes, etc.) from images using image segmentation.
+This project is part of a larger collection of repositories exploring image segmentation techniques.
+Related projects and interactive demos are available at: [Removators](https://huggingface.co/spaces/leonelhs/removators)
+## Acknowledgments
+The source code is based on or inspired by the following projects:
+- [face-makeup.PyTorch](https://github.com/zllrunning/face-makeup.PyTorch)
+- [BiSeNet](https://github.com/CoinCheung/BiSeNet)
+## Contact
+For questions, comments, or feedback, please contact:
+📧 [email protected]
 """
+with gr.Blocks(title="Face Parser") as app:
+    navbar = gr.Navbar(visible=True, main_page_name="Workspace")
+    gr.Markdown("## Face Parser Tool")
+    with gr.Row():
+        with gr.Column(scale=1):
+            inp = gr.Image(type="pil", label="Upload Image")
+            btn_predict = gr.Button("Parse")
+        with gr.Column(scale=2):
+            out = gr.Image(type="pil", label="Output image")
+    btn_predict.click(predict, inputs=[inp], outputs=[out])
+with app.route("About this", "/about"):
+    gr.Markdown(aboutme)
+app.launch()

bisnet/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# MIT License
+#
+# Copyright (c) [2025] [[email protected]]
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+#######################################################################################
+#
+# This project is one of several repositories exploring image segmentation techniques.
+# All related projects and interactive demos can be found at:
+# https://huggingface.co/spaces/leonelhs/removatorsau
+# Self app: https://huggingface.co/spaces/leonelhs/rembg
+#
+# Source code is based on or inspired by several projects.
+# For more details and proper attribution, please refer to the following resources:
+#
+# - [face-makeup.PyTorch] - [https://github.com/zllrunning/face-makeup.PyTorch]
+# - [BiSeNet] [https://github.com/CoinCheung/BiSeNet]
+from .model import BiSeNet
+__version__ = "1.0.1"

bisnet/model.py ADDED Viewed

	@@ -0,0 +1,321 @@

+# MIT License
+#
+# Copyright (c) [2025] [[email protected]]
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+#######################################################################################
+#
+# This project is one of several repositories exploring image segmentation techniques.
+# All related projects and interactive demos can be found at:
+# https://huggingface.co/spaces/leonelhs/removators
+# Self app: https://huggingface.co/spaces/leonelhs/rembg
+#
+# Source code is based on or inspired by several projects.
+# For more details and proper attribution, please refer to the following resources:
+#
+# - [face-makeup.PyTorch] - [https://github.com/zllrunning/face-makeup.PyTorch]
+# - [BiSeNet] [https://github.com/CoinCheung/BiSeNet]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .resnet import Resnet18
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+class ConvBNReLU(nn.Module):
+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(in_chan,
+                              out_chan,
+                              kernel_size=ks,
+                              stride=stride,
+                              padding=padding,
+                              bias=False)
+        self.bn = nn.BatchNorm2d(out_chan)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv(x)
+        x = F.relu(self.bn(x))
+        return x
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+class BiSeNetOutput(nn.Module):
+    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+        super(BiSeNetOutput, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if module.bias is not None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class AttentionRefinementModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(AttentionRefinementModule, self).__init__()
+        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size=1, bias=False)
+        self.bn_atten = nn.BatchNorm2d(out_chan)
+        self.sigmoid_atten = nn.Sigmoid()
+        self.init_weight()
+    def forward(self, x):
+        feat = self.conv(x)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv_atten(atten)
+        atten = self.bn_atten(atten)
+        atten = self.sigmoid_atten(atten)
+        return torch.mul(feat, atten)
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+class ContextPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(ContextPath, self).__init__()
+        self.resnet = Resnet18()
+        self.arm16 = AttentionRefinementModule(256, 128)
+        self.arm32 = AttentionRefinementModule(512, 128)
+        self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+    def forward(self, x):
+        H0, W0 = x.size()[2:]
+        feat8, feat16, feat32 = self.resnet(x)
+        H8, W8 = feat8.size()[2:]
+        H16, W16 = feat16.size()[2:]
+        H32, W32 = feat32.size()[2:]
+        avg = F.avg_pool2d(feat32, feat32.size()[2:])
+        avg = self.conv_avg(avg)
+        avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
+        feat32_arm = self.arm32(feat32)
+        feat32_sum = feat32_arm + avg_up
+        feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
+        feat32_up = self.conv_head32(feat32_up)
+        feat16_arm = self.arm16(feat16)
+        feat16_sum = feat16_arm + feat32_up
+        feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
+        feat16_up = self.conv_head16(feat16_up)
+        return feat8, feat16_up, feat32_up  # x8, x8, x16
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if module.bias is not None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+# This is not used, since I replace this with the resnet feature with the same size
+class SpatialPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(SpatialPath, self).__init__()
+        self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
+        self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.conv2(feat)
+        feat = self.conv3(feat)
+        feat = self.conv_out(feat)
+        return feat
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if module.bias is not None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class FeatureFusionModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(FeatureFusionModule, self).__init__()
+        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+        self.conv1 = nn.Conv2d(out_chan,
+                               out_chan // 4,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.conv2 = nn.Conv2d(out_chan // 4,
+                               out_chan,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.init_weight()
+    def forward(self, fsp, fcp):
+        fcat = torch.cat([fsp, fcp], dim=1)
+        feat = self.convblk(fcat)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv1(atten)
+        atten = self.relu(atten)
+        atten = self.conv2(atten)
+        atten = self.sigmoid(atten)
+        feat_atten = torch.mul(feat, atten)
+        feat_out = feat_atten + feat
+        return feat_out
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if module.bias is not None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class BiSeNet(nn.Module):
+    def __init__(self, n_classes, *args, **kwargs):
+        super(BiSeNet, self).__init__()
+        self.cp = ContextPath()
+        # here self.sp is deleted
+        self.ffm = FeatureFusionModule(256, 256)
+        self.conv_out = BiSeNetOutput(256, 256, n_classes)
+        self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
+        self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
+        self.init_weight()
+    def forward(self, x):
+        H, W = x.size()[2:]
+        feat_res8, feat_cp8, feat_cp16 = self.cp(x)  # here return res3b1 feature
+        feat_sp = feat_res8  # use res3b1 feature to replace spatial path feature
+        feat_fuse = self.ffm(feat_sp, feat_cp8)
+        feat_out = self.conv_out(feat_fuse)
+        feat_out16 = self.conv_out16(feat_cp8)
+        feat_out32 = self.conv_out32(feat_cp16)
+        feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
+        feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
+        feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
+        return feat_out, feat_out16, feat_out32
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if ly.bias is not None:
+                    nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+        for name, child in self.named_children():
+            child_wd_params, child_nowd_params = child.get_params()
+            if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
+                lr_mul_wd_params += child_wd_params
+                lr_mul_nowd_params += child_nowd_params
+            else:
+                wd_params += child_wd_params
+                nowd_params += child_nowd_params
+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+if __name__ == "__main__":
+    net = BiSeNet(19)
+    net.cuda()
+    net.eval()
+    in_ten = torch.randn(16, 3, 640, 480).cuda()
+    out, out16, out32 = net(in_ten)
+    print(out.shape)
+    net.get_params()

bisnet/resnet.py ADDED Viewed

	@@ -0,0 +1,145 @@

+#######################################################################################
+#
+# MIT License
+#
+# Copyright (c) [2025] [[email protected]]
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+#######################################################################################
+#
+# This project is one of several repositories exploring image segmentation techniques.
+# All related projects and interactive demos can be found at:
+# https://huggingface.co/spaces/leonelhs/removators
+# Self app: https://huggingface.co/spaces/leonelhs/rembg
+#
+# Source code is based on or inspired by several projects.
+# For more details and proper attribution, please refer to the following resources:
+#
+# - [face-makeup.PyTorch] - [https://github.com/zllrunning/face-makeup.PyTorch]
+# - [BiSeNet] [https://github.com/CoinCheung/BiSeNet]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from huggingface_hub import hf_hub_download
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+REPO_ID = "leonelhs/faceparser"
+CKPT = "resnet18-5c106cde.pth"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class BasicBlock(nn.Module):
+    def __init__(self, in_chan, out_chan, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(in_chan, out_chan, stride)
+        self.bn1 = nn.BatchNorm2d(out_chan)
+        self.conv2 = conv3x3(out_chan, out_chan)
+        self.bn2 = nn.BatchNorm2d(out_chan)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = None
+        if in_chan != out_chan or stride != 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_chan, out_chan,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_chan),
+            )
+    def forward(self, x):
+        residual = self.conv1(x)
+        residual = F.relu(self.bn1(residual))
+        residual = self.conv2(residual)
+        residual = self.bn2(residual)
+        shortcut = x
+        if self.downsample is not None:
+            shortcut = self.downsample(x)
+        out = shortcut + residual
+        out = self.relu(out)
+        return out
+def create_layer_basic(in_chan, out_chan, bnum, stride=1):
+    layers = [BasicBlock(in_chan, out_chan, stride=stride)]
+    for i in range(bnum - 1):
+        layers.append(BasicBlock(out_chan, out_chan, stride=1))
+    return nn.Sequential(*layers)
+class Resnet18(nn.Module):
+    def __init__(self):
+        super(Resnet18, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
+        self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
+        self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
+        self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(self.bn1(x))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        feat8 = self.layer2(x)  # 1/8
+        feat16 = self.layer3(feat8)  # 1/16
+        feat32 = self.layer4(feat16)  # 1/32
+        return feat8, feat16, feat32
+    def init_weight(self):
+        checkpoint = hf_hub_download(repo_id=REPO_ID, filename=CKPT)
+        state_dict = torch.load(checkpoint, map_location=device, weights_only=False)
+        self_state_dict = self.state_dict()
+        for k, v in state_dict.items():
+            if 'fc' in k: continue
+            self_state_dict.update({k: v})
+        self.load_state_dict(self_state_dict)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+if __name__ == "__main__":
+    net = Resnet18()
+    x = torch.randn(16, 3, 224, 224)
+    out = net(x)
+    print(out[0].size())
+    print(out[1].size())
+    print(out[2].size())
+    net.get_params()

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
-torch>=2.0.1
-torchvision~=0.15.2
-pillow~=9.5.0
-bisnet~=1.0.1
-opencv-python

+torch>=2.8.0
+torchvision>=0.23.0
+opencv-python-headless>=4.12.0.88
+gradio~=5.46.1
+numpy~=2.1.2
+pillow~=11.0.0
+huggingface-hub~=0.35.0