|
import gradio as gr |
|
import numpy as np |
|
from usearch.index import Index |
|
from sentence_transformers import SentenceTransformer |
|
from datasets import load_dataset |
|
from sentencex import segment |
|
from usearch.index import search, MetricKind, Matches, BatchMatches |
|
import csv |
|
|
|
HTML_Output = """<html><head><style>/* Tooltip container */ |
|
.tooltip { |
|
position: relative; |
|
width: 600px; |
|
display: inline-block; |
|
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */ |
|
} |
|
|
|
/* Tooltip text */ |
|
.tooltip .tooltiptext { |
|
visibility: hidden; |
|
width: "100%"; |
|
background-color: #555; |
|
color: #34e1eb; |
|
text-align: center; |
|
padding: 5px 0; |
|
border-radius: 6px; |
|
|
|
/* Position the tooltip text */ |
|
position: absolute; |
|
z-index: 1; |
|
top: 125%; |
|
left: 50%; |
|
margin-left: -60px; |
|
|
|
/* Fade in tooltip */ |
|
opacity: 0; |
|
transition: opacity 0.3s; |
|
} |
|
|
|
/* Tooltip arrow */ |
|
.tooltip .tooltiptext::before { |
|
content: ""; |
|
position: absolute; |
|
bottom: 100%; |
|
left: 50%; |
|
margin-left: -5px; |
|
border-width: 5px; |
|
border-style: solid; |
|
border-color: #555 transparent transparent transparent; |
|
} |
|
|
|
/* Show the tooltip text when you mouse over the tooltip container */ |
|
.tooltip:hover .tooltiptext { |
|
visibility: visible; |
|
opacity: 1; |
|
}</style></head><body>""" |
|
|
|
model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True) |
|
|
|
rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function'] |
|
|
|
rf = list(rf) |
|
rf_emb = model.encode(rf) |
|
|
|
|
|
def get_matches(inputs): |
|
global index, model, rf |
|
paragraph_matches = [] |
|
|
|
for input in inputs: |
|
embs = model.encode(input,batch_size=128) |
|
|
|
matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True) |
|
sentence_matches = [] |
|
for match_ in matches: |
|
sentence_matches.append((rf[match_.key],str(round(match_.distance,2)))) |
|
paragraph_matches.append(sentence_matches) |
|
|
|
return paragraph_matches |
|
|
|
|
|
def return_rf_scores(abstract): |
|
|
|
sentences = list(segment("en", abstract)) |
|
matches = get_matches(sentences) |
|
|
|
output = HTML_Output |
|
|
|
for s,m in zip(sentences,matches): |
|
tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m] |
|
tooltip = "\n".join(tooltip) |
|
output+=f"""<div class="tooltip">{s} |
|
<span class="tooltiptext">{tooltip}</span> |
|
</div><br>""" |
|
|
|
output += "</body></html>" |
|
|
|
return output |
|
|
|
examples = [] |
|
|
|
with open("examples.tsv","r") as ex: |
|
rd = csv.reader(ex, delimiter="\t", quotechar='"') |
|
for row in rd: |
|
examples.append(row) |
|
|
|
|
|
demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=examples) |
|
demo.launch() |
|
|