Spaces:
Running
Running
Commit
·
f9dda4c
1
Parent(s):
a2556d4
replace by rapidfuzz
Browse files- app.py +14 -12
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from datasets import load_dataset
|
|
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
|
5 |
# Load the dataset from Hugging Face
|
@@ -27,26 +28,27 @@ def embedding_search(input_text, top_x, embeddings, corpus, reference):
|
|
27 |
for result in results
|
28 |
]
|
29 |
|
30 |
-
def
|
31 |
"""
|
32 |
-
Perform word overlap search
|
|
|
33 |
"""
|
34 |
input_words = input_text.lower().split()
|
35 |
scores = []
|
36 |
|
37 |
for i, text in enumerate(corpus):
|
38 |
text_words = text.lower().split()
|
|
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
)
|
46 |
|
47 |
-
scores.append((i,
|
48 |
|
49 |
-
# Sort by
|
50 |
scores.sort(key=lambda x: x[1], reverse=True)
|
51 |
return [(reference[idx], corpus[idx], score) for idx, score in scores[:top_x]]
|
52 |
|
@@ -63,10 +65,10 @@ def recommend_proverbs_comparison(input_text, top_x):
|
|
63 |
)
|
64 |
|
65 |
# Word Overlap Similarity
|
66 |
-
semantic_results_overlap =
|
67 |
input_text, definitions, proverbs, top_x
|
68 |
)
|
69 |
-
literal_results_overlap =
|
70 |
input_text, proverbs, definitions, top_x
|
71 |
)
|
72 |
|
|
|
1 |
import gradio as gr
|
2 |
from datasets import load_dataset
|
3 |
+
from rapidfuzz.fuzz import ratio
|
4 |
from sentence_transformers import SentenceTransformer, util
|
5 |
|
6 |
# Load the dataset from Hugging Face
|
|
|
28 |
for result in results
|
29 |
]
|
30 |
|
31 |
+
def fuzzy_search(input_text, corpus, reference, top_x, threshold=70):
|
32 |
"""
|
33 |
+
Perform fuzzy word overlap search between input_text and corpus using RapidFuzz.
|
34 |
+
Matches words based on a similarity score above the threshold.
|
35 |
"""
|
36 |
input_words = input_text.lower().split()
|
37 |
scores = []
|
38 |
|
39 |
for i, text in enumerate(corpus):
|
40 |
text_words = text.lower().split()
|
41 |
+
match_score = 0
|
42 |
|
43 |
+
# Compare each word in input_text with each word in the current text
|
44 |
+
for input_word in input_words:
|
45 |
+
for text_word in text_words:
|
46 |
+
if ratio(input_word, text_word) >= threshold: # Fuzzy match
|
47 |
+
match_score += 1
|
|
|
48 |
|
49 |
+
scores.append((i, match_score))
|
50 |
|
51 |
+
# Sort by match score and return the top results
|
52 |
scores.sort(key=lambda x: x[1], reverse=True)
|
53 |
return [(reference[idx], corpus[idx], score) for idx, score in scores[:top_x]]
|
54 |
|
|
|
65 |
)
|
66 |
|
67 |
# Word Overlap Similarity
|
68 |
+
semantic_results_overlap = fuzzy_search(
|
69 |
input_text, definitions, proverbs, top_x
|
70 |
)
|
71 |
+
literal_results_overlap = fuzzy_search(
|
72 |
input_text, proverbs, definitions, top_x
|
73 |
)
|
74 |
|
requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
sentence_transformers
|
|
|
|
1 |
sentence_transformers
|
2 |
+
rapidfuzz
|