Spaces:

DevBM
/

QGen

Runtime error

App Files Files Community

DevBM

AneriThakkar commited on Jul 9, 2024

Commit

472f4fa

verified ·

1 Parent(s): dbb2b74

added email mechanism (#1)

Browse files

- added email mechanism (126de2b4015235a33370aa63ed153ac9e37cefd2)

Co-authored-by: Thakkar Aneri Pareshkumar <[email protected]>

Files changed (1) hide show

app.py +84 -89

app.py CHANGED Viewed

@@ -11,8 +11,7 @@ from functools import lru_cache
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('brown')
-from nltk.tokenize import sent_tokenize, word_tokenize
-from nltk.tag import pos_tag
 nltk.download('wordnet')
 from nltk.corpus import wordnet
 import random
@@ -31,6 +30,13 @@ import uuid
 import time
 import asyncio
 import aiohttp
 print("***************************************************************")
 st.set_page_config(
@@ -107,7 +113,6 @@ elif select_model == "T5-small":
 nlp, s2v = load_nlp_models()
 similarity_model, spell = load_qa_models()
 context_model = similarity_model
-sentence_model = similarity_model
 model, tokenizer = load_model(modelname)
 # Info Section
 def display_info():
@@ -148,6 +153,7 @@ def get_pdf_text(pdf_file):
         page = doc.load_page(page_num)
         text += page.get_text()
     return text
 def save_feedback(question, answer, rating, options, context):
     feedback_file = 'question_feedback.json'
     if os.path.exists(feedback_file):
@@ -164,10 +170,38 @@ def save_feedback(question, answer, rating, options, context):
     }
     # feedback_data[question] = rating
     feedback_data.append(tpl)
     with open(feedback_file, 'w') as f:
         json.dump(feedback_data, f)
 # Function to clean text
 def clean_text(text):
@@ -253,7 +287,7 @@ def get_synonyms(word, n=3):
                     return synonyms
     return synonyms
-def get_fallback_options(answer, context, n=3):
     options = [answer]
     # Add contextually relevant words using a pre-trained model
@@ -294,84 +328,6 @@ def get_fallback_options(answer, context, n=3):
     return options
-def get_semantic_similarity(word1, word2):
-    embeddings = sentence_model.encode([word1, word2])
-    return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
-def ensure_grammatical_consistency(question, answer, option):
-    question_pos = pos_tag(word_tokenize(question))
-    answer_pos = pos_tag(word_tokenize(answer))
-    option_pos = pos_tag(word_tokenize(option))
-    # Check if the answer and option have the same part of speech
-    if answer_pos[-1][1] != option_pos[-1][1]:
-        return False
-    # Check if the option fits grammatically in the question
-    question_template = question.replace(answer, "PLACEHOLDER")
-    option_question = question_template.replace("PLACEHOLDER", option)
-    option_question_pos = pos_tag(word_tokenize(option_question))
-    return question_pos == option_question_pos
-def get_word_type(word):
-    doc = nlp(word)
-    return doc[0].pos_
-async def generate_options_async(answer, context, question, n=4):
-    try:
-        options = [answer]
-        # Get context words
-        doc = nlp(context)
-        context_words = [token.text for token in doc if token.is_alpha and token.text.lower() != answer.lower()]
-        # Get answer type
-        answer_type = get_word_type(answer)
-        print(answer_type,"\n")
-        # Get semantically similar words
-        similar_words = []
-        for word in context_words:
-            if get_word_type(word) == answer_type:
-                similarity = get_semantic_similarity(answer, word)
-                if 0.3 < similarity < 0.8:  # Adjust these thresholds as needed
-                    similar_words.append((word, similarity))
-        # Sort by similarity (descending) and take top n-1
-        similar_words.sort(key=lambda x: x[1], reverse=True)
-        top_similar_words = [word for word, _ in similar_words[:n-1]]
-        # Ensure grammatical consistency
-        consistent_options = []
-        for word in top_similar_words:
-            if ensure_grammatical_consistency(question, answer, word):
-                consistent_options.append(word)
-            if len(consistent_options) == n-1:
-                break
-        options.extend(consistent_options)
-        # If we don't have enough options, fall back to original method
-        while len(options) < n:
-            fallback_options = get_fallback_options(answer, context, 3)
-            for option in fallback_options:
-                if option not in options and ensure_grammatical_consistency(question, answer, option):
-                    options.append(option)
-                    break
-        # Shuffle the options
-        random.shuffle(options)
-        print(options)
-        st.write("All possibel options are: ", options, "\n")
-        return options
-    except Exception as e:
-        raise QuestionGenerationError(f"Error in generating options: {str(e)}")
 # Function to map keywords to sentences with customizable context window size
 def map_keywords_to_sentences(text, keywords, context_window_size):
     sentences = sent_tokenize(text)
@@ -411,8 +367,38 @@ async def generate_question_async(context, answer, num_beams):
     except Exception as e:
         raise QuestionGenerationError(f"Error in question generation: {str(e)}")
 # Function to generate questions using beam search
@@ -451,7 +437,7 @@ async def process_batch(batch, keywords, context_window_size, num_beams):
         keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
         for keyword, context in keyword_sentence_mapping.items():
             question = await generate_question_async(context, keyword, num_beams)
-            options = await generate_options_async(keyword, context, question)
             overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
             if overall_score >= 0.5:
                 questions.append({
@@ -548,7 +534,6 @@ def main():
         num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
         context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
         num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
-        use_llm_for_options = st.toggle("Use AI for Advanced option generation", False)
         col1, col2 = st.columns(2)
         with col1:
             extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
@@ -569,7 +554,7 @@ def main():
     if text:
         text = clean_text(text)
     generate_questions_button = st.button("Generate Questions")
-    # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
     # if generate_questions_button:
     if generate_questions_button and text:
@@ -625,10 +610,20 @@ def main():
                 q['question'] = st.text_input(f"Edit Question {i+1}:", value=q['question'], key=f"question_{i}")
                 q['rating'] = st.select_slider(f"Rate this question (1-5)", options=[1, 2, 3, 4, 5], key=f"rating_{i}")
                 if st.button(f"Submit Feedback for Question {i+1}", key=f"submit_{i}"):
-                    save_feedback(q['question'], q['answer'], q['rating'], q['options'], q['context'])
                     st.success(f"Feedback submitted for Question {i+1}")
             st.write("---")
         # Export buttons
         # if st.session_state.generated_questions:
         if state['generated_questions']:

 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('brown')
+from nltk.tokenize import sent_tokenize
 nltk.download('wordnet')
 from nltk.corpus import wordnet
 import random
 import time
 import asyncio
 import aiohttp
+# '-----------------'
+import smtplib
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.base import MIMEBase
+from email import encoders
+# '------------------'
 print("***************************************************************")
 st.set_page_config(
 nlp, s2v = load_nlp_models()
 similarity_model, spell = load_qa_models()
 context_model = similarity_model
 model, tokenizer = load_model(modelname)
 # Info Section
 def display_info():
         page = doc.load_page(page_num)
         text += page.get_text()
     return text
 def save_feedback(question, answer, rating, options, context):
     feedback_file = 'question_feedback.json'
     if os.path.exists(feedback_file):
     }
     # feedback_data[question] = rating
     feedback_data.append(tpl)
+    print(feedback_data)
     with open(feedback_file, 'w') as f:
         json.dump(feedback_data, f)
+    return feedback_file
+# -----------------------------------------------------------------------------------------
+def send_email_with_attachment(email_subject, email_body, recipient_emails, sender_email, sender_password, attachment_path):
+    msg = MIMEMultipart()
+    msg['From'] = sender_email
+    msg['To'] = ", ".join(recipient_emails)  # Join the list of recipients with commas
+    msg['Subject'] = email_subject
+    msg.attach(MIMEText(email_body, 'plain'))
+    attachment = open(attachment_path, 'rb')
+    part = MIMEBase('application', 'octet-stream')
+    part.set_payload(attachment.read())
+    encoders.encode_base64(part)
+    part.add_header('Content-Disposition', f'attachment; filename={os.path.basename(attachment_path)}')
+    msg.attach(part)
+    attachment.close()
+    with smtplib.SMTP('smtp.gmail.com', 587) as server:
+        server.starttls()
+        print(sender_email)
+        print(sender_password)
+        server.login(sender_email, sender_password)
+        text = msg.as_string()
+        server.sendmail(sender_email, recipient_emails, text)
+# ----------------------------------------------------------------------------------
 # Function to clean text
 def clean_text(text):
                     return synonyms
     return synonyms
+def generate_options(answer, context, n=3):
     options = [answer]
     # Add contextually relevant words using a pre-trained model
     return options
 # Function to map keywords to sentences with customizable context window size
 def map_keywords_to_sentences(text, keywords, context_window_size):
     sentences = sent_tokenize(text)
     except Exception as e:
         raise QuestionGenerationError(f"Error in question generation: {str(e)}")
+async def generate_options_async(answer, context, n=3):
+    try:
+        options = [answer]
+        # Add contextually relevant words using a pre-trained model
+        context_embedding = await asyncio.to_thread(context_model.encode, context)
+        answer_embedding = await asyncio.to_thread(context_model.encode, answer)
+        context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
+        # Compute similarity scores and sort context words
+        similarity_scores = [util.pytorch_cos_sim(await asyncio.to_thread(context_model.encode, word), answer_embedding).item() for word in context_words]
+        sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
+        options.extend(sorted_context_words[:n])
+        # Try to get similar words based on sense2vec
+        similar_words = await asyncio.to_thread(get_similar_words_sense2vec, answer, n)
+        options.extend(similar_words)
+        # If we don't have enough options, try synonyms
+        if len(options) < n + 1:
+            synonyms = await asyncio.to_thread(get_synonyms, answer, n - len(options) + 1)
+            options.extend(synonyms)
+        # Ensure we have the correct number of unique options
+        options = list(dict.fromkeys(options))[:n+1]
+        # Shuffle the options
+        random.shuffle(options)
+        return options
+    except Exception as e:
+        raise QuestionGenerationError(f"Error in generating options: {str(e)}")
 # Function to generate questions using beam search
         keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
         for keyword, context in keyword_sentence_mapping.items():
             question = await generate_question_async(context, keyword, num_beams)
+            options = await generate_options_async(keyword, context)
             overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
             if overall_score >= 0.5:
                 questions.append({
         num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
         context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
         num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
         col1, col2 = st.columns(2)
         with col1:
             extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
     if text:
         text = clean_text(text)
     generate_questions_button = st.button("Generate Questions")
+    st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
     # if generate_questions_button:
     if generate_questions_button and text:
                 q['question'] = st.text_input(f"Edit Question {i+1}:", value=q['question'], key=f"question_{i}")
                 q['rating'] = st.select_slider(f"Rate this question (1-5)", options=[1, 2, 3, 4, 5], key=f"rating_{i}")
                 if st.button(f"Submit Feedback for Question {i+1}", key=f"submit_{i}"):
+                    feedback_file=save_feedback(q['question'], q['answer'], q['rating'], q['options'], q['context'])
                     st.success(f"Feedback submitted for Question {i+1}")
+                    pswd = st.secrets['EMAIL_PASSWORD']
+                    send_email_with_attachment(
+    email_subject='feedback from QGen',
+    email_body='Please find the attached feedback JSON file.',
+    recipient_emails=['[email protected]', '[email protected]'],
+    sender_email='[email protected]',
+    sender_password=pswd,
+    attachment_path=feedback_file)
+                    st.write("Feedback sent to admin")
             st.write("---")
         # Export buttons
         # if st.session_state.generated_questions:
         if state['generated_questions']: