hectorduran commited on
Commit
bf223f9
·
1 Parent(s): a7babda

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from nltk.corpus import cmudict
3
+ from difflib import SequenceMatcher
4
+
5
+ # Load CMU Pronouncing Dictionary
6
+ d = cmudict.dict()
7
+
8
+ # Function to get phonetic transcription of a word
9
+ def phonetic_transcription(word):
10
+ try:
11
+ return d[word.lower()][0]
12
+ except KeyError:
13
+ return None
14
+
15
+ # Function to calculate phonetic similarity between two words
16
+ def phonetic_similarity(word1, word2):
17
+ pt1 = phonetic_transcription(word1)
18
+ pt2 = phonetic_transcription(word2)
19
+ if pt1 is None or pt2 is None:
20
+ return 0
21
+ else:
22
+ return SequenceMatcher(None, pt1, pt2).ratio()
23
+
24
+ # User input for list of words and similarity threshold
25
+ words = st.text_input("Enter list of words (separated by commas):")
26
+ threshold = st.slider("Similarity threshold:", min_value=0.0, max_value=1.0, value=0.5)
27
+
28
+ if words:
29
+ words = [word.strip() for word in words.split(",")]
30
+ n_words = len(words)
31
+
32
+ # Calculate phonetic similarity matrix
33
+ similarity_matrix = [[0 for _ in range(n_words)] for _ in range(n_words)]
34
+ for i in range(n_words):
35
+ for j in range(i+1, n_words):
36
+ similarity = phonetic_similarity(words[i], words[j])
37
+ similarity_matrix[i][j] = similarity
38
+ similarity_matrix[j][i] = similarity
39
+
40
+ # Find similar words based on similarity threshold
41
+ similar_words = []
42
+ for i in range(n_words):
43
+ similar_words.append([words[j] for j in range(n_words) if similarity_matrix[i][j] >= threshold])
44
+
45
+ # Display similar words with matching score
46
+ for i in range(n_words):
47
+ st.write(f"{words[i]}: {[f'{word} ({int(similarity_matrix[i][j]*100)}%)' for j, word in enumerate(similar_words[i])]}")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ nltk