Corran's picture
Update app.py
39db9fe verified
import gradio as gr
import numpy as np
from usearch.index import Index
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentencex import segment
from usearch.index import search, MetricKind, Matches, BatchMatches
import csv
HTML_Output = """<html><head><style>/* Tooltip container */
.tooltip {
position: relative;
width: 600px;
display: inline-block;
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}
/* Tooltip text */
.tooltip .tooltiptext {
visibility: hidden;
width: "100%";
background-color: #555;
color: #34e1eb;
text-align: center;
padding: 5px 0;
border-radius: 6px;
/* Position the tooltip text */
position: absolute;
z-index: 1;
top: 125%;
left: 50%;
margin-left: -60px;
/* Fade in tooltip */
opacity: 0;
transition: opacity 0.3s;
}
/* Tooltip arrow */
.tooltip .tooltiptext::before {
content: "";
position: absolute;
bottom: 100%;
left: 50%;
margin-left: -5px;
border-width: 5px;
border-style: solid;
border-color: #555 transparent transparent transparent;
}
/* Show the tooltip text when you mouse over the tooltip container */
.tooltip:hover .tooltiptext {
visibility: visible;
opacity: 1;
}</style></head><body>"""
model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True)
rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']
rf = list(rf)
rf_emb = model.encode(rf)
def get_matches(inputs):
global index, model, rf
paragraph_matches = []
for input in inputs:
embs = model.encode(input,batch_size=128)
matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
sentence_matches = []
for match_ in matches:
sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
paragraph_matches.append(sentence_matches)
return paragraph_matches
def return_rf_scores(abstract):
sentences = list(segment("en", abstract))
matches = get_matches(sentences)
output = HTML_Output
for s,m in zip(sentences,matches):
tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
tooltip = "\n".join(tooltip)
output+=f"""<div class="tooltip">{s}
<span class="tooltiptext">{tooltip}</span>
</div><br>"""
output += "</body></html>"
return output
examples = []
with open("examples.tsv","r") as ex:
rd = csv.reader(ex, delimiter="\t", quotechar='"')
for row in rd:
examples.append(row)
demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=examples)
demo.launch()