Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,15 +15,18 @@ from nltk.tokenize import sent_tokenize
|
|
| 15 |
nltk.download('wordnet')
|
| 16 |
from nltk.corpus import wordnet
|
| 17 |
import random
|
| 18 |
-
|
|
|
|
| 19 |
# Load spaCy model
|
| 20 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
| 21 |
|
|
|
|
| 22 |
# Initialize Wikipedia API with a user agent
|
| 23 |
user_agent = 'QGen/1.0 ([email protected])'
|
| 24 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
| 25 |
|
| 26 |
-
@st.cache_resource
|
| 27 |
def load_model():
|
| 28 |
model_name = "DevBM/t5-large-squad"
|
| 29 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
@@ -55,20 +58,19 @@ def extract_keywords(text):
|
|
| 55 |
# Load spaCy model (medium-sized model with word vectors)
|
| 56 |
nlp = spacy.load("en_core_web_md")
|
| 57 |
|
| 58 |
-
def
|
| 59 |
-
#
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
#
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
similarity = nlp(w.text).similarity(nlp(word))
|
| 67 |
-
similar_words.append((w.text, similarity))
|
| 68 |
|
| 69 |
-
|
| 70 |
-
similar_words.sort(key=lambda x: x[1], reverse=True)
|
| 71 |
-
return [word for word, _ in similar_words[:n]]
|
| 72 |
|
| 73 |
def get_synonyms(word, n=3):
|
| 74 |
synonyms = []
|
|
@@ -83,8 +85,8 @@ def get_synonyms(word, n=3):
|
|
| 83 |
def generate_options(answer, context, n=3):
|
| 84 |
options = [answer]
|
| 85 |
|
| 86 |
-
# Try to get similar words based on
|
| 87 |
-
similar_words =
|
| 88 |
options.extend(similar_words)
|
| 89 |
|
| 90 |
# If we don't have enough options, try synonyms
|
|
@@ -138,6 +140,7 @@ def entity_linking(keyword):
|
|
| 138 |
return None
|
| 139 |
|
| 140 |
# Function to generate questions using beam search
|
|
|
|
| 141 |
def generate_question(context, answer, num_beams):
|
| 142 |
input_text = f"<context> {context} <answer> {answer}"
|
| 143 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
|
@@ -147,7 +150,7 @@ def generate_question(context, answer, num_beams):
|
|
| 147 |
|
| 148 |
# Function to export questions to CSV
|
| 149 |
def export_to_csv(data):
|
| 150 |
-
df = pd.DataFrame(data, columns=["Context", "Answer", "Question"])
|
| 151 |
csv = df.to_csv(index=False,encoding='utf-8')
|
| 152 |
return csv
|
| 153 |
|
|
@@ -157,7 +160,7 @@ def export_to_pdf(data):
|
|
| 157 |
pdf.add_page()
|
| 158 |
pdf.set_font("Arial", size=12)
|
| 159 |
|
| 160 |
-
for context, answer, question in data:
|
| 161 |
pdf.multi_cell(0, 10, f"Context: {context}")
|
| 162 |
pdf.multi_cell(0, 10, f"Answer: {answer}")
|
| 163 |
pdf.multi_cell(0, 10, f"Question: {question}")
|
|
@@ -199,13 +202,13 @@ if st.button("Generate Questions"):
|
|
| 199 |
st.write(f"**Answer:** {keyword}")
|
| 200 |
st.write(f"**Question:** {question}")
|
| 201 |
st.write(f"**Options:**")
|
| 202 |
-
for j, option in options:
|
| 203 |
st.write(f"{chr(65+j)}. {option}")
|
| 204 |
|
| 205 |
if linked_entity:
|
| 206 |
st.write(f"**Entity Link:** {linked_entity}")
|
| 207 |
st.write("---")
|
| 208 |
-
data.append((context, keyword, question))
|
| 209 |
|
| 210 |
# Add the data to session state
|
| 211 |
st.session_state.data = data
|
|
@@ -224,4 +227,4 @@ if st.button("Generate Questions"):
|
|
| 224 |
|
| 225 |
|
| 226 |
else:
|
| 227 |
-
st.write("Please enter some text to generate questions.")
|
|
|
|
| 15 |
nltk.download('wordnet')
|
| 16 |
from nltk.corpus import wordnet
|
| 17 |
import random
|
| 18 |
+
from sense2vec import Sense2Vec
|
| 19 |
+
import sense2vec
|
| 20 |
# Load spaCy model
|
| 21 |
nlp = spacy.load("en_core_web_sm")
|
| 22 |
+
# s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
|
| 23 |
|
| 24 |
+
s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
|
| 25 |
# Initialize Wikipedia API with a user agent
|
| 26 |
user_agent = 'QGen/1.0 ([email protected])'
|
| 27 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
| 28 |
|
| 29 |
+
@st.cache_resource
|
| 30 |
def load_model():
|
| 31 |
model_name = "DevBM/t5-large-squad"
|
| 32 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
|
|
| 58 |
# Load spaCy model (medium-sized model with word vectors)
|
| 59 |
nlp = spacy.load("en_core_web_md")
|
| 60 |
|
| 61 |
+
def get_similar_words_sense2vec(word, n=3):
|
| 62 |
+
# Try to find the word with its most likely part-of-speech
|
| 63 |
+
word_with_pos = word + "|NOUN"
|
| 64 |
+
if word_with_pos in s2v:
|
| 65 |
+
similar_words = s2v.most_similar(word_with_pos, n=n)
|
| 66 |
+
return [word.split("|")[0] for word, _ in similar_words]
|
| 67 |
|
| 68 |
+
# If not found, try without POS
|
| 69 |
+
if word in s2v:
|
| 70 |
+
similar_words = s2v.most_similar(word, n=n)
|
| 71 |
+
return [word.split("|")[0] for word, _ in similar_words]
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
return []
|
|
|
|
|
|
|
| 74 |
|
| 75 |
def get_synonyms(word, n=3):
|
| 76 |
synonyms = []
|
|
|
|
| 85 |
def generate_options(answer, context, n=3):
|
| 86 |
options = [answer]
|
| 87 |
|
| 88 |
+
# Try to get similar words based on sense2vec
|
| 89 |
+
similar_words = get_similar_words_sense2vec(answer, n)
|
| 90 |
options.extend(similar_words)
|
| 91 |
|
| 92 |
# If we don't have enough options, try synonyms
|
|
|
|
| 140 |
return None
|
| 141 |
|
| 142 |
# Function to generate questions using beam search
|
| 143 |
+
@st.cache_data
|
| 144 |
def generate_question(context, answer, num_beams):
|
| 145 |
input_text = f"<context> {context} <answer> {answer}"
|
| 146 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
|
|
|
| 150 |
|
| 151 |
# Function to export questions to CSV
|
| 152 |
def export_to_csv(data):
|
| 153 |
+
df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
|
| 154 |
csv = df.to_csv(index=False,encoding='utf-8')
|
| 155 |
return csv
|
| 156 |
|
|
|
|
| 160 |
pdf.add_page()
|
| 161 |
pdf.set_font("Arial", size=12)
|
| 162 |
|
| 163 |
+
for context, answer, question, options in data:
|
| 164 |
pdf.multi_cell(0, 10, f"Context: {context}")
|
| 165 |
pdf.multi_cell(0, 10, f"Answer: {answer}")
|
| 166 |
pdf.multi_cell(0, 10, f"Question: {question}")
|
|
|
|
| 202 |
st.write(f"**Answer:** {keyword}")
|
| 203 |
st.write(f"**Question:** {question}")
|
| 204 |
st.write(f"**Options:**")
|
| 205 |
+
for j, option in enumerate(options):
|
| 206 |
st.write(f"{chr(65+j)}. {option}")
|
| 207 |
|
| 208 |
if linked_entity:
|
| 209 |
st.write(f"**Entity Link:** {linked_entity}")
|
| 210 |
st.write("---")
|
| 211 |
+
data.append((context, keyword, question, options))
|
| 212 |
|
| 213 |
# Add the data to session state
|
| 214 |
st.session_state.data = data
|
|
|
|
| 227 |
|
| 228 |
|
| 229 |
else:
|
| 230 |
+
st.write("Please enter some text to generate questions.")
|