wissemkarous commited on
Commit
a43b81e
·
verified ·
1 Parent(s): 2cb7d2f
Files changed (2) hide show
  1. app.py +125 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from typing import List
4
+ import os
5
+ import cv2
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.optimizers import legacy
8
+ from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
9
+ from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
10
+
11
+
12
+
13
+ def convert_mp4_to_mpg(input_file, output_file):
14
+ """
15
+ Convert an MP4 video file to an MPG video file using OpenCV.
16
+
17
+ Args:
18
+ input_file (str): Path to the input MP4 file.
19
+ output_file (str): Path to the output MPG file.
20
+
21
+ Returns:
22
+ None
23
+ """
24
+ cap = cv2.VideoCapture(input_file)
25
+
26
+ # Check if the video file was opened successfully
27
+ if not cap.isOpened():
28
+ raise Exception(f"Could not open video file: {input_file}")
29
+
30
+ # Define the codec and create a VideoWriter object
31
+ fourcc = cv2.VideoWriter_fourcc(*'mpg2') # Use 'mpg2' codec for MPG files
32
+ out = cv2.VideoWriter(output_file, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
33
+
34
+ while cap.isOpened():
35
+ ret, frame = cap.read()
36
+ if not ret:
37
+ break
38
+
39
+ out.write(frame)
40
+
41
+ # Release the video objects
42
+ cap.release()
43
+ out.release()
44
+
45
+ def load_video(video_path):
46
+
47
+ cap = cv2.VideoCapture(video_path)
48
+ frames = []
49
+ for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
50
+ ret, frame = cap.read()
51
+ frame = tf.image.rgb_to_grayscale(frame)
52
+ frames.append(frame[190:236,80:220,:])
53
+ cap.release()
54
+
55
+ mean = tf.math.reduce_mean(frames)
56
+ std = tf.math.reduce_std(tf.cast(frames, tf.float32))
57
+ return tf.cast((frames - mean), tf.float32) / std
58
+
59
+ def load_data(path: str):
60
+ path = bytes.decode(path.numpy())
61
+ #file_name = path.split('/')[-1].split('.')[0]
62
+ # File name splitting for windows
63
+ file_name = path.split('\\')[-1].split('.')[0]
64
+ video_path = os.path.join('data','s1',f'{file_name}.mpg')
65
+ alignment_path = os.path.join('data','alignments','s1',f'{file_name}.align')
66
+ frames = load_video(video_path)
67
+
68
+
69
+ return frames
70
+
71
+
72
+ vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
73
+ char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
74
+ num_to_char = tf.keras.layers.StringLookup(
75
+ vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
76
+ )
77
+
78
+ model = Sequential()
79
+ model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
80
+ model.add(Activation('relu'))
81
+ model.add(MaxPool3D((1,2,2)))
82
+
83
+ model.add(Conv3D(256, 3, padding='same'))
84
+ model.add(Activation('relu'))
85
+ model.add(MaxPool3D((1,2,2)))
86
+
87
+ model.add(Conv3D(75, 3, padding='same'))
88
+ model.add(Activation('relu'))
89
+ model.add(MaxPool3D((1,2,2)))
90
+
91
+ model.add(TimeDistributed(Flatten()))
92
+
93
+ model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
94
+ model.add(Dropout(.5))
95
+
96
+ model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
97
+ model.add(Dropout(.5))
98
+
99
+ model.add(Dense(char_to_num.vocabulary_size()+1, kernel_initializer='he_normal', activation='softmax'))
100
+ model.summary()
101
+
102
+ optimizer = legacy.Adam(learning_rate=0.001) # Replace legacy.Adam with the appropriate legacy optimizer you used during training
103
+
104
+ model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
105
+
106
+ #Loading weights
107
+ model.load_weights('model/checkpoint')
108
+
109
+ def Predict(Video):
110
+ #convert_mp4_to_mpg(Video,'output.mpg')
111
+ sample = load_data(tf.convert_to_tensor(Video))
112
+ yhat = model.predict(tf.expand_dims(sample, axis=0))
113
+ decoded = tf.keras.backend.ctc_decode(yhat, input_length=[75], greedy=True)[0][0].numpy()
114
+ result=[tf.strings.reduce_join([num_to_char(word) for word in sentence]) for sentence in decoded]
115
+ return result[0].numpy().decode('utf-8')
116
+
117
+
118
+ interface = gr.Interface(fn=Predict,
119
+ inputs="video",
120
+ outputs="text",
121
+ title='Video Lip Reading',
122
+ description="""Wlc this code developped by wissem karous with <3' """)
123
+
124
+
125
+ interface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tensorflow
2
+ opencv-python