Spaces:
Runtime error
Runtime error
Commit
·
bf223f9
1
Parent(s):
a7babda
Upload 2 files
Browse files- app.py +47 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from nltk.corpus import cmudict
|
3 |
+
from difflib import SequenceMatcher
|
4 |
+
|
5 |
+
# Load CMU Pronouncing Dictionary
|
6 |
+
d = cmudict.dict()
|
7 |
+
|
8 |
+
# Function to get phonetic transcription of a word
|
9 |
+
def phonetic_transcription(word):
|
10 |
+
try:
|
11 |
+
return d[word.lower()][0]
|
12 |
+
except KeyError:
|
13 |
+
return None
|
14 |
+
|
15 |
+
# Function to calculate phonetic similarity between two words
|
16 |
+
def phonetic_similarity(word1, word2):
|
17 |
+
pt1 = phonetic_transcription(word1)
|
18 |
+
pt2 = phonetic_transcription(word2)
|
19 |
+
if pt1 is None or pt2 is None:
|
20 |
+
return 0
|
21 |
+
else:
|
22 |
+
return SequenceMatcher(None, pt1, pt2).ratio()
|
23 |
+
|
24 |
+
# User input for list of words and similarity threshold
|
25 |
+
words = st.text_input("Enter list of words (separated by commas):")
|
26 |
+
threshold = st.slider("Similarity threshold:", min_value=0.0, max_value=1.0, value=0.5)
|
27 |
+
|
28 |
+
if words:
|
29 |
+
words = [word.strip() for word in words.split(",")]
|
30 |
+
n_words = len(words)
|
31 |
+
|
32 |
+
# Calculate phonetic similarity matrix
|
33 |
+
similarity_matrix = [[0 for _ in range(n_words)] for _ in range(n_words)]
|
34 |
+
for i in range(n_words):
|
35 |
+
for j in range(i+1, n_words):
|
36 |
+
similarity = phonetic_similarity(words[i], words[j])
|
37 |
+
similarity_matrix[i][j] = similarity
|
38 |
+
similarity_matrix[j][i] = similarity
|
39 |
+
|
40 |
+
# Find similar words based on similarity threshold
|
41 |
+
similar_words = []
|
42 |
+
for i in range(n_words):
|
43 |
+
similar_words.append([words[j] for j in range(n_words) if similarity_matrix[i][j] >= threshold])
|
44 |
+
|
45 |
+
# Display similar words with matching score
|
46 |
+
for i in range(n_words):
|
47 |
+
st.write(f"{words[i]}: {[f'{word} ({int(similarity_matrix[i][j]*100)}%)' for j, word in enumerate(similar_words[i])]}")
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
nltk
|