wordsimilarity / app.py
hectorduran's picture
Upload app.py
e763fba
import streamlit as st
import nltk
from nltk.corpus import cmudict
from difflib import SequenceMatcher
# Load CMU Pronouncing Dictionary
nltk.download('cmudict')
d = cmudict.dict()
# Function to get phonetic transcription of a word
def phonetic_transcription(word):
try:
return d[word.lower()][0]
except KeyError:
return None
# Function to calculate phonetic similarity between two words
def phonetic_similarity(word1, word2):
pt1 = phonetic_transcription(word1)
pt2 = phonetic_transcription(word2)
if pt1 is None or pt2 is None:
return 0
else:
return SequenceMatcher(None, pt1, pt2).ratio()
# User input for list of words and similarity threshold
words = st.text_input("Enter list of words (separated by commas):")
threshold = st.slider("Similarity threshold:", min_value=0.0, max_value=1.0, value=0.5)
if words:
words = [word.strip() for word in words.split(",")]
n_words = len(words)
# Calculate phonetic similarity matrix
similarity_matrix = [[0 for _ in range(n_words)] for _ in range(n_words)]
for i in range(n_words):
for j in range(i+1, n_words):
similarity = phonetic_similarity(words[i], words[j])
similarity_matrix[i][j] = similarity
similarity_matrix[j][i] = similarity
# Find similar words based on similarity threshold
similar_words = []
for i in range(n_words):
similar_words.append([words[j] for j in range(n_words) if similarity_matrix[i][j] >= threshold])
# Display similar words with matching score
for i in range(n_words):
st.write(f"{words[i]}: {[f'{word} ({int(similarity_matrix[i][j]*100)}%)' for j, word in enumerate(similar_words[i])]}")