Spaces:
Running
Running
admin
commited on
Commit
·
d08ce40
1
Parent(s):
2f89051
fix frame display style
Browse files
app.py
CHANGED
|
@@ -105,6 +105,14 @@ def load(audio_path: str, converto="mel"):
|
|
| 105 |
return list(norm(Xtr_spec))
|
| 106 |
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def infer(audio_path: str, log_name: str):
|
| 109 |
if not audio_path:
|
| 110 |
return None, "Please input an audio!"
|
|
@@ -113,6 +121,8 @@ def infer(audio_path: str, log_name: str):
|
|
| 113 |
spec = log_name.split("_")[-1]
|
| 114 |
try:
|
| 115 |
input = load(audio_path, converto=spec)
|
|
|
|
|
|
|
| 116 |
if "vit" in backbone or "swin" in backbone:
|
| 117 |
eval_net = t_EvalNet(
|
| 118 |
backbone,
|
|
@@ -140,9 +150,14 @@ def infer(audio_path: str, log_name: str):
|
|
| 140 |
for y in output:
|
| 141 |
preds = list(y.T)
|
| 142 |
for pred in preds:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
outputs.append(
|
| 144 |
{
|
| 145 |
-
"Frame":
|
| 146 |
"Tech": TRANSLATE[CLASSES[torch.argmax(pred).item()]],
|
| 147 |
}
|
| 148 |
)
|
|
|
|
| 105 |
return list(norm(Xtr_spec))
|
| 106 |
|
| 107 |
|
| 108 |
+
def format_second(seconds):
|
| 109 |
+
integer_part = int(seconds)
|
| 110 |
+
decimal_part = round(seconds - integer_part, 3)
|
| 111 |
+
hours, remainder = divmod(integer_part, 3600)
|
| 112 |
+
minutes, seconds = divmod(remainder, 60)
|
| 113 |
+
return f"{hours:02}:{minutes:02}:{seconds:02}.{decimal_part:.3f}"
|
| 114 |
+
|
| 115 |
+
|
| 116 |
def infer(audio_path: str, log_name: str):
|
| 117 |
if not audio_path:
|
| 118 |
return None, "Please input an audio!"
|
|
|
|
| 121 |
spec = log_name.split("_")[-1]
|
| 122 |
try:
|
| 123 |
input = load(audio_path, converto=spec)
|
| 124 |
+
dur = librosa.get_duration(path=audio_path)
|
| 125 |
+
frames_per_3s = input[0].shape[1]
|
| 126 |
if "vit" in backbone or "swin" in backbone:
|
| 127 |
eval_net = t_EvalNet(
|
| 128 |
backbone,
|
|
|
|
| 150 |
for y in output:
|
| 151 |
preds = list(y.T)
|
| 152 |
for pred in preds:
|
| 153 |
+
start = index * TIME_LENGTH / frames_per_3s
|
| 154 |
+
if start > dur:
|
| 155 |
+
break
|
| 156 |
+
|
| 157 |
+
to = (index + 1) * TIME_LENGTH / frames_per_3s
|
| 158 |
outputs.append(
|
| 159 |
{
|
| 160 |
+
"Frame": f"{format_second(start)} - {format_second(to)}",
|
| 161 |
"Tech": TRANSLATE[CLASSES[torch.argmax(pred).item()]],
|
| 162 |
}
|
| 163 |
)
|