Spaces:
Build error
Build error
| !pip install tensorflow tensorflow_hub tensorflow_text | |
| from huggingface_hub import from_pretrained_keras | |
| import numpy as np | |
| import pandas as pd | |
| import tensorflow as tf | |
| import tensorflow_hub as hub | |
| import tensorflow_text as text | |
| from tensorflow import keras | |
| import gradio as gr | |
| def make_bert_preprocessing_model(sentence_features, seq_length=128): | |
| """Returns Model mapping string features to BERT inputs. | |
| Args: | |
| sentence_features: A list with the names of string-valued features. | |
| seq_length: An integer that defines the sequence length of BERT inputs. | |
| Returns: | |
| A Keras Model that can be called on a list or dict of string Tensors | |
| (with the order or names, resp., given by sentence_features) and | |
| returns a dict of tensors for input to BERT. | |
| """ | |
| input_segments = [ | |
| tf.keras.layers.Input(shape=(), dtype=tf.string, name=ft) | |
| for ft in sentence_features | |
| ] | |
| # tokenize the text to word pieces | |
| bert_preprocess = hub.load(bert_preprocess_path) | |
| tokenizer = hub.KerasLayer(bert_preprocess.tokenize, | |
| name="tokenizer") | |
| segments = [tokenizer(s) for s in input_segments] | |
| truncated_segments = segments | |
| packer = hub.KerasLayer(bert_preprocess.bert_pack_inputs, | |
| arguments=dict(seq_length=seq_length), | |
| name="packer") | |
| model_inputs = packer(truncated_segments) | |
| return keras.Model(input_segments, model_inputs) | |
| def preprocess_image(image_path, resize): | |
| extension = tf.strings.split(image_path)[-1] | |
| image = tf.io.read_file(image_path) | |
| if extension == b"jpg": | |
| image = tf.image.decode_jpeg(image, 3) | |
| else: | |
| image = tf.image.decode_png(image, 3) | |
| image = tf.image.resize(image, resize) | |
| return image | |
| def preprocess_text(text_1, text_2): | |
| text_1 = tf.convert_to_tensor([text_1]) | |
| text_2 = tf.convert_to_tensor([text_2]) | |
| output = bert_preprocess_model([text_1, text_2]) | |
| output = {feature: tf.squeeze(output[feature]) for feature in bert_input_features} | |
| return output | |
| def preprocess_text_and_image(sample, resize): | |
| image_1 = preprocess_image(sample['image_1_path'], resize) | |
| image_2 = preprocess_image(sample['image_2_path'], resize) | |
| text = preprocess_text(sample['text_1'], sample['text_2']) | |
| return {"image_1": image_1, "image_2": image_2, "text": text} | |
| def classify_info(image_1, text_1, image_2, text_2): | |
| sample = dict() | |
| sample['image_1_path'] = image_1 | |
| sample['image_2_path'] = image_2 | |
| sample['text_1'] = text_1 | |
| sample['text_2'] = text_2 | |
| dataframe = pd.DataFrame(sample, index=[0]) | |
| ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), [0])) | |
| ds = ds.map(lambda x, y: (preprocess_text_and_image(x, resize), y)).cache() | |
| batch_size = 1 | |
| auto = tf.data.AUTOTUNE | |
| ds = ds.batch(batch_size).prefetch(auto) | |
| output = model.predict(ds) | |
| label = np.argmax(output) | |
| return labels[label] | |
| model = from_pretrained_keras("keras-io/multimodal-entailment") | |
| resize = (128, 128) | |
| bert_input_features = ["input_word_ids", "input_type_ids", "input_mask"] | |
| bert_model_path = ("https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1") | |
| bert_preprocess_path = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3" | |
| bert_preprocess_model = make_bert_preprocessing_model(['text_1', 'text_2']) | |
| labels = {0: "Contradictory", 1: "Implies", 2: "No Entailment"} | |
| resize = (128, 128) | |
| image_1 = gr.inputs.Image(type="filepath") | |
| image_2 = gr.inputs.Image(type="filepath") | |
| text_1 = gr.inputs.Textbox(lines=5) | |
| text_2 = gr.inputs.Textbox(lines=5) | |
| label = gr.outputs.Label() | |
| iface = gr.Interface(classify_info, | |
| inputs=[image_1, text_1, image_2, text_2],outputs=label) | |
| iface.launch() |