import streamlit as st import nltk from nltk.corpus import cmudict from difflib import SequenceMatcher # Load CMU Pronouncing Dictionary nltk.download('cmudict') d = cmudict.dict() # Function to get phonetic transcription of a word def phonetic_transcription(word): try: return d[word.lower()][0] except KeyError: return None # Function to calculate phonetic similarity between two words def phonetic_similarity(word1, word2): pt1 = phonetic_transcription(word1) pt2 = phonetic_transcription(word2) if pt1 is None or pt2 is None: return 0 else: return SequenceMatcher(None, pt1, pt2).ratio() # User input for list of words and similarity threshold words = st.text_input("Enter list of words (separated by commas):") threshold = st.slider("Similarity threshold:", min_value=0.0, max_value=1.0, value=0.5) if words: words = [word.strip() for word in words.split(",")] n_words = len(words) # Calculate phonetic similarity matrix similarity_matrix = [[0 for _ in range(n_words)] for _ in range(n_words)] for i in range(n_words): for j in range(i+1, n_words): similarity = phonetic_similarity(words[i], words[j]) similarity_matrix[i][j] = similarity similarity_matrix[j][i] = similarity # Find similar words based on similarity threshold similar_words = [] for i in range(n_words): similar_words.append([words[j] for j in range(n_words) if similarity_matrix[i][j] >= threshold]) # Display similar words with matching score for i in range(n_words): st.write(f"{words[i]}: {[f'{word} ({int(similarity_matrix[i][j]*100)}%)' for j, word in enumerate(similar_words[i])]}")