File size: 2,610 Bytes
4ccdc70 192fef8 66779e6 192fef8 4ccdc70 df87262 4ccdc70 eec35d0 4ccdc70 eec35d0 4ccdc70 192fef8 4ccdc70 192fef8 4ccdc70 192fef8 4ccdc70 53fe126 4ccdc70 53fe126 4ccdc70 192fef8 4ccdc70 192fef8 4ccdc70 32b3519 192fef8 32b3519 39db9fe 4ccdc70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import numpy as np
from usearch.index import Index
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentencex import segment
from usearch.index import search, MetricKind, Matches, BatchMatches
import csv
HTML_Output = """<html><head><style>/* Tooltip container */
.tooltip {
position: relative;
width: 600px;
display: inline-block;
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}
/* Tooltip text */
.tooltip .tooltiptext {
visibility: hidden;
width: "100%";
background-color: #555;
color: #34e1eb;
text-align: center;
padding: 5px 0;
border-radius: 6px;
/* Position the tooltip text */
position: absolute;
z-index: 1;
top: 125%;
left: 50%;
margin-left: -60px;
/* Fade in tooltip */
opacity: 0;
transition: opacity 0.3s;
}
/* Tooltip arrow */
.tooltip .tooltiptext::before {
content: "";
position: absolute;
bottom: 100%;
left: 50%;
margin-left: -5px;
border-width: 5px;
border-style: solid;
border-color: #555 transparent transparent transparent;
}
/* Show the tooltip text when you mouse over the tooltip container */
.tooltip:hover .tooltiptext {
visibility: visible;
opacity: 1;
}</style></head><body>"""
model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True)
rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']
rf = list(rf)
rf_emb = model.encode(rf)
def get_matches(inputs):
global index, model, rf
paragraph_matches = []
for input in inputs:
embs = model.encode(input,batch_size=128)
matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
sentence_matches = []
for match_ in matches:
sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
paragraph_matches.append(sentence_matches)
return paragraph_matches
def return_rf_scores(abstract):
sentences = list(segment("en", abstract))
matches = get_matches(sentences)
output = HTML_Output
for s,m in zip(sentences,matches):
tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
tooltip = "\n".join(tooltip)
output+=f"""<div class="tooltip">{s}
<span class="tooltiptext">{tooltip}</span>
</div><br>"""
output += "</body></html>"
return output
examples = []
with open("examples.tsv","r") as ex:
rd = csv.reader(ex, delimiter="\t", quotechar='"')
for row in rd:
examples.append(row)
demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=examples)
demo.launch()
|