albertmartinez's picture
update mining
222cf81
raw
history blame contribute delete
5.39 kB
import multiprocessing
import threading
import gradio as gr
from mining import mining
from sts import sts
from utils import getDataFrame, save_to_csv, delete_folder_periodically
CONCURRENCY_LIMIT = 5
with gr.Blocks() as demo:
with gr.Tab("Paraphrase Mining"):
with gr.Row():
gr.Markdown(
"### Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences")
with gr.Row():
with gr.Column():
gr.Markdown("#### sentences")
upload_button_sentences = gr.UploadButton(label="upload sentences csv", file_types=['.csv'],
file_count="single")
output_data_sentences = gr.Dataframe(headers=["text"], col_count=1, label="sentences data")
upload_button_sentences.upload(fn=getDataFrame, inputs=upload_button_sentences,
outputs=output_data_sentences, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
score_mining = gr.Number(label="score", value=0.96, interactive=True)
submit_button_mining = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
output_mining = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
label="Mining")
submit_button_mining.click(
fn=mining,
inputs=[model, upload_button_sentences, score_mining],
outputs=output_mining
)
download_button = gr.Button("Download Results as CSV", variant="huggingface")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_mining,
outputs=download_file
)
with gr.Tab("Semantic Textual Similarity"):
with gr.Row(): # Row for the title
gr.Markdown(
"### Semantic Textual Similarity (STS), we want to produce embeddings for all texts involved and calculate the similarities between them")
with gr.Row(): # First row of two columns
with gr.Column():
gr.Markdown("#### sentences 1")
upload_button_sentences1 = gr.UploadButton(label="upload sentences 1 csv", file_types=['.csv'],
file_count="single")
output_data_sentences1 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 1 data")
upload_button_sentences1.upload(fn=getDataFrame, inputs=upload_button_sentences1,
outputs=output_data_sentences1, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Column():
gr.Markdown("#### sentences 2")
upload_button_sentences2 = gr.UploadButton(label="upload sentences 2 csv", file_types=['.csv'],
file_count="single")
output_data_sentences2 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 2 data")
upload_button_sentences2.upload(fn=getDataFrame, inputs=upload_button_sentences2,
outputs=output_data_sentences2, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
score_sts = gr.Number(label="score", value=0.96, interactive=True)
submit_button_sts = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("#### STS Results")
output_sts = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
label="Semantic Textual Similarit")
submit_button_sts.click(
fn=sts,
inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts],
outputs=output_sts
)
download_button = gr.Button("Download Results as CSV", variant="huggingface")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_sts,
outputs=download_file
)
if __name__ == "__main__":
multiprocessing.set_start_method("spawn")
folder_path = "data"
thread = threading.Thread(target=delete_folder_periodically, args=(folder_path, 1800), daemon=True)
thread.start()
print(gr.__version__)
demo.launch()