Spaces:
Runtime error
Runtime error
import streamlit as st | |
import nltk | |
from nltk.corpus import cmudict | |
from difflib import SequenceMatcher | |
# Load CMU Pronouncing Dictionary | |
nltk.download('cmudict') | |
d = cmudict.dict() | |
# Function to get phonetic transcription of a word | |
def phonetic_transcription(word): | |
try: | |
return d[word.lower()][0] | |
except KeyError: | |
return None | |
# Function to calculate phonetic similarity between two words | |
def phonetic_similarity(word1, word2): | |
pt1 = phonetic_transcription(word1) | |
pt2 = phonetic_transcription(word2) | |
if pt1 is None or pt2 is None: | |
return 0 | |
else: | |
return SequenceMatcher(None, pt1, pt2).ratio() | |
# User input for list of words and similarity threshold | |
words = st.text_input("Enter list of words (separated by commas):") | |
threshold = st.slider("Similarity threshold:", min_value=0.0, max_value=1.0, value=0.5) | |
if words: | |
words = [word.strip() for word in words.split(",")] | |
n_words = len(words) | |
# Calculate phonetic similarity matrix | |
similarity_matrix = [[0 for _ in range(n_words)] for _ in range(n_words)] | |
for i in range(n_words): | |
for j in range(i+1, n_words): | |
similarity = phonetic_similarity(words[i], words[j]) | |
similarity_matrix[i][j] = similarity | |
similarity_matrix[j][i] = similarity | |
# Find similar words based on similarity threshold | |
similar_words = [] | |
for i in range(n_words): | |
similar_words.append([words[j] for j in range(n_words) if similarity_matrix[i][j] >= threshold]) | |
# Display similar words with matching score | |
for i in range(n_words): | |
st.write(f"{words[i]}: {[f'{word} ({int(similarity_matrix[i][j]*100)}%)' for j, word in enumerate(similar_words[i])]}") |