Spaces:
Paused
Paused
unknown
commited on
Commit
·
93eb0ff
1
Parent(s):
be5b973
cuda
Browse files
app.py
CHANGED
|
@@ -136,7 +136,12 @@ class FoleyController:
|
|
| 136 |
cfg_scale_slider,
|
| 137 |
seed_textbox,
|
| 138 |
):
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
vision_transform_list = [
|
| 141 |
torchvision.transforms.Resize((128, 128)),
|
| 142 |
torchvision.transforms.CenterCrop((112, 112)),
|
|
@@ -153,7 +158,7 @@ class FoleyController:
|
|
| 153 |
frames, duration = read_frames_with_moviepy(input_video, max_frame_nums=max_frame_nums)
|
| 154 |
if duration >= 10:
|
| 155 |
duration = 10
|
| 156 |
-
time_frames = torch.FloatTensor(frames).permute(0, 3, 1, 2).to(
|
| 157 |
time_frames = video_transform(time_frames)
|
| 158 |
time_frames = {'frames': time_frames.unsqueeze(0).permute(0, 2, 1, 3, 4)}
|
| 159 |
preds = self.time_detector(time_frames)
|
|
@@ -165,7 +170,7 @@ class FoleyController:
|
|
| 165 |
# w -> b c h w
|
| 166 |
time_condition = torch.FloatTensor(time_condition).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 256, 1)
|
| 167 |
|
| 168 |
-
images = self.image_processor(images=frames, return_tensors="pt").to(
|
| 169 |
image_embeddings = self.image_encoder(**images).image_embeds
|
| 170 |
image_embeddings = torch.mean(image_embeddings, dim=0, keepdim=True).unsqueeze(0).unsqueeze(0)
|
| 171 |
neg_image_embeddings = torch.zeros_like(image_embeddings)
|
|
@@ -208,12 +213,6 @@ class FoleyController:
|
|
| 208 |
controller = FoleyController()
|
| 209 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 210 |
|
| 211 |
-
# move to gpu
|
| 212 |
-
controller.time_detector = controller.time_detector.to(device)
|
| 213 |
-
controller.pipeline = controller.pipeline.to(device)
|
| 214 |
-
controller.vocoder = controller.vocoder.to(device)
|
| 215 |
-
controller.image_encoder = controller.image_encoder.to(device)
|
| 216 |
-
|
| 217 |
with gr.Blocks(css=css) as demo:
|
| 218 |
gr.HTML(
|
| 219 |
'<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
|
|
|
|
| 136 |
cfg_scale_slider,
|
| 137 |
seed_textbox,
|
| 138 |
):
|
| 139 |
+
device = 'cuda'
|
| 140 |
+
# move to gpu
|
| 141 |
+
controller.time_detector = controller.time_detector.to(device)
|
| 142 |
+
controller.pipeline = controller.pipeline.to(device)
|
| 143 |
+
controller.vocoder = controller.vocoder.to(device)
|
| 144 |
+
controller.image_encoder = controller.image_encoder.to(device)
|
| 145 |
vision_transform_list = [
|
| 146 |
torchvision.transforms.Resize((128, 128)),
|
| 147 |
torchvision.transforms.CenterCrop((112, 112)),
|
|
|
|
| 158 |
frames, duration = read_frames_with_moviepy(input_video, max_frame_nums=max_frame_nums)
|
| 159 |
if duration >= 10:
|
| 160 |
duration = 10
|
| 161 |
+
time_frames = torch.FloatTensor(frames).permute(0, 3, 1, 2).to(device)
|
| 162 |
time_frames = video_transform(time_frames)
|
| 163 |
time_frames = {'frames': time_frames.unsqueeze(0).permute(0, 2, 1, 3, 4)}
|
| 164 |
preds = self.time_detector(time_frames)
|
|
|
|
| 170 |
# w -> b c h w
|
| 171 |
time_condition = torch.FloatTensor(time_condition).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 256, 1)
|
| 172 |
|
| 173 |
+
images = self.image_processor(images=frames, return_tensors="pt").to(device)
|
| 174 |
image_embeddings = self.image_encoder(**images).image_embeds
|
| 175 |
image_embeddings = torch.mean(image_embeddings, dim=0, keepdim=True).unsqueeze(0).unsqueeze(0)
|
| 176 |
neg_image_embeddings = torch.zeros_like(image_embeddings)
|
|
|
|
| 213 |
controller = FoleyController()
|
| 214 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
with gr.Blocks(css=css) as demo:
|
| 217 |
gr.HTML(
|
| 218 |
'<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
|