Spaces:

KaiserML
/

Demo-Sci-Rhetoric-Classifier

Sleeping

File size: 2,610 Bytes

4ccdc70
 
 
 
 
 
192fef8
66779e6
192fef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ccdc70
df87262
4ccdc70
eec35d0
4ccdc70
eec35d0
4ccdc70
 
192fef8
 
4ccdc70
192fef8
4ccdc70
192fef8
 
 
 
 
 
 
 
 
 
4ccdc70
 
53fe126
4ccdc70
53fe126
4ccdc70
192fef8
 
 
4ccdc70
192fef8
 
 
 
 
 
 
4ccdc70
 
 
32b3519
192fef8
32b3519
 
 
 
 
 
39db9fe
4ccdc70

import gradio as gr
import numpy as np
from usearch.index import Index
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentencex import segment
from usearch.index import search, MetricKind, Matches, BatchMatches
import csv

HTML_Output = """<html><head><style>/* Tooltip container */
.tooltip {
  position: relative;
  width: 600px;
  display: inline-block;
  border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}

/* Tooltip text */
.tooltip .tooltiptext {
  visibility: hidden;
  width: "100%";
  background-color: #555;
  color: #34e1eb;
  text-align: center;
  padding: 5px 0;
  border-radius: 6px;

  /* Position the tooltip text */
  position: absolute;
  z-index: 1;
  top: 125%;
  left: 50%;
  margin-left: -60px;

  /* Fade in tooltip */
  opacity: 0;
  transition: opacity 0.3s;
}

/* Tooltip arrow */
.tooltip .tooltiptext::before {
  content: "";
  position: absolute;
  bottom: 100%;
  left: 50%;
  margin-left: -5px;
  border-width: 5px;
  border-style: solid;
  border-color: #555 transparent transparent transparent;
}

/* Show the tooltip text when you mouse over the tooltip container */
.tooltip:hover .tooltiptext {
  visibility: visible;
  opacity: 1;
}</style></head><body>"""

model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True)

rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']

rf = list(rf)
rf_emb = model.encode(rf)


def get_matches(inputs):
  global index, model, rf
  paragraph_matches = []

  for input in inputs:
    embs = model.encode(input,batch_size=128)

    matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
    sentence_matches = []
    for match_ in matches:
        sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
    paragraph_matches.append(sentence_matches)

  return paragraph_matches


def return_rf_scores(abstract):
    
    sentences = list(segment("en", abstract))
    matches = get_matches(sentences)

    output = HTML_Output

    for s,m in zip(sentences,matches):
        tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
        tooltip = "\n".join(tooltip)
        output+=f"""<div class="tooltip">{s}
  <span class="tooltiptext">{tooltip}</span>
</div><br>"""

    output += "</body></html>"
        
    return output

examples = []

with open("examples.tsv","r") as ex:
    rd = csv.reader(ex, delimiter="\t", quotechar='"')
    for row in rd:
        examples.append(row)
    

demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=examples)
demo.launch()