Spaces:

DevBM
/

QGen

Runtime error

App Files Files Community

DevBM commited on Jul 3, 2024

Commit

84c3fd4

verified ·

1 Parent(s): 5e04f07

Update app.py

Browse files

Files changed (1) hide show

app.py +190 -63

app.py CHANGED Viewed

@@ -19,21 +19,25 @@ from sense2vec import Sense2Vec
 import sense2vec
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 print("***************************************************************")
 st.set_page_config(
     page_title="Question Generator",
     initial_sidebar_state="collapsed",
 )
-# Load spaCy model
-nlp = spacy.load("en_core_web_md")
-# s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
-s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
 # Initialize Wikipedia API with a user agent
 user_agent = 'QGen/1.0 ([email protected])'
 wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
 @st.cache_resource
 def load_model():
     model_name = "DevBM/t5-large-squad"
@@ -41,6 +45,46 @@ def load_model():
     tokenizer = T5Tokenizer.from_pretrained(model_name)
     return model, tokenizer
 # Function to extract keywords using combined techniques
 def extract_keywords(text, extract_all):
     doc = nlp(text)
@@ -162,8 +206,10 @@ def generate_question(context, answer, num_beams):
 # Function to export questions to CSV
 def export_to_csv(data):
-    df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
-    csv = df.to_csv(index=False,encoding='utf-8')
     return csv
 # Function to export questions to PDF
@@ -172,14 +218,15 @@ def export_to_pdf(data):
     pdf.add_page()
     pdf.set_font("Arial", size=12)
-    for context, answer, question, options in data:
-        pdf.multi_cell(0, 10, f"Context: {context}")
-        pdf.multi_cell(0, 10, f"Answer: {answer}")
-        pdf.multi_cell(0, 10, f"Question: {question}")
         pdf.ln(10)
-    # pdf.output("questions.pdf")
-    return pdf.output(name='questions.pdf',dest='S').encode('latin1')
 def display_word_cloud(generated_questions):
     word_frequency = {}
@@ -194,74 +241,154 @@ def display_word_cloud(generated_questions):
     plt.axis('off')
     st.pyplot()
-if 'data' not in st.session_state:
-    st.session_state.data = None
-# Streamlit interface
-st.title(":blue[Question Generator from Text]")
-text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
-with st.sidebar:
-    st.subheader("Customization Options")
-    # Customization options
-    num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
-    context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
-    num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
-    with st.expander("Choose the Additional Elements to show"):
-        show_context = st.checkbox("Context",True)
-        show_answer = st.checkbox("Answer",True)
-        show_options = st.checkbox("Options",False)
-        show_entity_link = st.checkbox("Enitity Link For Wikipedia",True)
-    extract_all_keywords = st.toggle("Extract max Keywords",value=False)
-if st.button("Generate Questions"):
-    if text:
-        model, tokenizer = load_model()
-        keywords = extract_keywords(text,extract_all_keywords)
         print(f"\n\nFinal Keywords in Main Function: {keywords}\n\n")
         keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
-        st.subheader("Generated Questions:",divider='blue')
-        data = []
         for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
             if i >= num_questions:
                 break
-            linked_entity = entity_linking(keyword)
             question = generate_question(context, keyword, num_beams=num_beams)
-            options = generate_options(keyword, context)
-            st.subheader(body=f":orange[Q{i+1}:] {question}")
             if show_context is True:
-                st.write(f"**Context:** {context}")
             if show_answer is True:
-                st.write(f"**Answer:** {keyword}")
             if show_options is True:
                 st.write(f"**Options:**")
-                for j, option in enumerate(options):
                     st.write(f"{chr(65+j)}. {option}")
             if show_entity_link is True:
                 if linked_entity:
                     st.write(f"**Entity Link:** {linked_entity}")
-            st.write("---")
-            data.append((context, keyword, question, options))
-        # Add the data to session state
-        st.session_state.data = data
-        # display_word_cloud()
-        print(data)
         # Export buttons
-        if st.session_state.data is not None:
             with st.sidebar:
-                st.subheader('Download Content')
-                csv_data = export_to_csv(data)
-                st.download_button(label="CSV Format", data=csv_data, file_name='questions.csv', mime='text/csv')
-                pdf_data = export_to_pdf(data)
-                st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
-        if st.session_state.data is not None:
-            st.markdown("You can download the data from the sidebar.")
-    else:
-        st.write("Please enter some text to generate questions.")
-    print("********************************************************************************")

 import sense2vec
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
+import json
+import os
+from sentence_transformers import SentenceTransformer, util
+import textstat
+import language_tool_python
+from transformers import pipeline
 print("***************************************************************")
 st.set_page_config(
     page_title="Question Generator",
     initial_sidebar_state="collapsed",
 )
 # Initialize Wikipedia API with a user agent
 user_agent = 'QGen/1.0 ([email protected])'
 wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
 @st.cache_resource
 def load_model():
     model_name = "DevBM/t5-large-squad"
     tokenizer = T5Tokenizer.from_pretrained(model_name)
     return model, tokenizer
+# Load Spacy Model
+@st.cache_resource
+def load_nlp_models():
+    nlp = spacy.load("en_core_web_md")
+    s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
+    return nlp, s2v
+# Load Quality Assurance Models
+@st.cache_resource
+def load_qa_models():
+    # Initialize BERT model for sentence similarity
+    similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+    # Initialize language tool for grammar checking
+    language_tool = language_tool_python.LanguageTool('en-US')
+    return similarity_model, language_tool
+nlp, s2v = load_nlp_models()
+model, tokenizer = load_model()
+similarity_model, language_tool = load_qa_models()
+def save_feedback(question, answer,rating):
+    feedback_file = 'question_feedback.json'
+    if os.path.exists(feedback_file):
+        with open(feedback_file, 'r') as f:
+            feedback_data = json.load(f)
+    else:
+        feedback_data = []
+    tpl = {
+        'question' : question,
+        'answer' : answer,
+        'rating' : rating,
+    }
+    # feedback_data[question] = rating
+    feedback_data.append(tpl)
+    with open(feedback_file, 'w') as f:
+        json.dump(feedback_data, f)
 # Function to extract keywords using combined techniques
 def extract_keywords(text, extract_all):
     doc = nlp(text)
 # Function to export questions to CSV
 def export_to_csv(data):
+    # df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
+    df = pd.DataFrame(data)
+    # csv = df.to_csv(index=False,encoding='utf-8')
+    csv = df.to_csv(index=False)
     return csv
 # Function to export questions to PDF
     pdf.add_page()
     pdf.set_font("Arial", size=12)
+    for item in data:
+        pdf.multi_cell(0, 10, f"Context: {item['context']}")
+        pdf.multi_cell(0, 10, f"Question: {item['question']}")
+        pdf.multi_cell(0, 10, f"Answer: {item['answer']}")
+        pdf.multi_cell(0, 10, f"Options: {', '.join(item['options'])}")
+        pdf.multi_cell(0, 10, f"Overall Score: {item['overall_score']:.2f}")
         pdf.ln(10)
+    return pdf.output(dest='S').encode('latin-1')
 def display_word_cloud(generated_questions):
     word_frequency = {}
     plt.axis('off')
     st.pyplot()
+def assess_question_quality(context, question, answer):
+    # Assess relevance using cosine similarity
+    context_doc = nlp(context)
+    question_doc = nlp(question)
+    relevance_score = context_doc.similarity(question_doc)
+    # Assess complexity using token length (as a simple metric)
+    complexity_score = min(len(question_doc) / 20, 1)  # Normalize to 0-1
+    # Assess grammatical correctness
+    errors = language_tool.check(question)
+    grammatical_correctness = 1 - (len(errors) / len(question_doc))  # Normalize to 0-1
+    # Calculate overall score (you can adjust weights as needed)
+    overall_score = (
+        0.4 * relevance_score +
+        0.3 * complexity_score +
+        0.3 * grammatical_correctness
+    )
+    return overall_score, relevance_score, complexity_score, grammatical_correctness
+def main():
+    # Streamlit interface
+    st.title(":blue[Question Generator System]")
+    # Initialize session state
+    if 'generated_questions' not in st.session_state:
+        st.session_state.generated_questions = []
+    text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
+    with st.sidebar:
+        st.subheader("Customization Options")
+        # Customization options
+        num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
+        context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
+        num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
+        with st.expander("Choose the Additional Elements to show"):
+            show_context = st.checkbox("Context",True)
+            show_answer = st.checkbox("Answer",True)
+            show_options = st.checkbox("Options",False)
+            show_entity_link = st.checkbox("Entity Link For Wikipedia",True)
+            show_qa_scores = st.checkbox("QA Score",False)
+        col1, col2 = st.columns(2)
+        with col1:
+            extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
+        with col2:
+            enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
+    generate_questions_button = st.button("Generate Questions")
+    if generate_questions_button and text:
+        st.session_state.generated_questions = []
+        keywords = extract_keywords(text, extract_all_keywords)
         print(f"\n\nFinal Keywords in Main Function: {keywords}\n\n")
         keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
         for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
             if i >= num_questions:
                 break
             question = generate_question(context, keyword, num_beams=num_beams)
+            options = generate_options(keyword,context)
+            overall_score, relevance_score, complexity_score, grammatical_correctness = assess_question_quality(context,question,keyword)
+            tpl = {
+                "question" : question,
+                "context" : context,
+                "answer" : keyword,
+                "options" : options,
+                "overall_score" : overall_score,
+                "relevance_score" : relevance_score,
+                "complexity_score" : complexity_score,
+                "grammatical_correctness" : grammatical_correctness,
+            }
+            st.session_state.generated_questions.append(tpl)
+    # Display generated questions
+    if st.session_state.generated_questions:
+        st.header("Generated Questions:",divider='blue')
+        for i, q in enumerate(st.session_state.generated_questions):
+            # with st.expander(f"Question {i+1}"):
+            st.subheader(body=f":orange[Q{i+1}:] {q['question']}")
             if show_context is True:
+                st.write(f"**Context:** {q['context']}")
             if show_answer is True:
+                st.write(f"**Answer:** {q['answer']}")
             if show_options is True:
                 st.write(f"**Options:**")
+                for j, option in enumerate(q['options']):
                     st.write(f"{chr(65+j)}. {option}")
             if show_entity_link is True:
+                linked_entity = entity_linking(q['answer'])
                 if linked_entity:
                     st.write(f"**Entity Link:** {linked_entity}")
+            if show_qa_scores is True:
+                st.write(f"**Overall Quality Score:** {q['overall_score']:.2f}")
+                st.write(f"**Relevance Score:** {q['relevance_score']:.2f}")
+                st.write(f"**Complexity Score:** {q['complexity_score']:.2f}")
+                st.write(f"**Grammatical Correctness:** {q['grammatical_correctness']:.2f}")
+            # q['context'] = st.text_area(f"Edit Context {i+1}:", value=q['context'], key=f"context_{i}")
+            if enable_feedback_mode:
+                q['question'] = st.text_input(f"Edit Question {i+1}:", value=q['question'], key=f"question_{i}")
+                q['rating'] = st.selectbox(f"Rate this question (1-5)", options=[1, 2, 3, 4, 5], key=f"rating_{i}")
+                if st.button(f"Submit Feedback for Question {i+1}", key=f"submit_{i}"):
+                    save_feedback(q['question'], q['answer'], q['rating'])
+                    st.success(f"Feedback submitted for Question {i+1}")
         # Export buttons
+        if st.session_state.generated_questions:
             with st.sidebar:
+                csv_data = export_to_csv(st.session_state.generated_questions)
+                st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
+                pdf_data = export_to_pdf(st.session_state.generated_questions)
+                st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
+        # View Feedback Statistics
+        with st.expander("View Feedback Statistics"):
+            feedback_file = 'question_feedback.json'
+            if os.path.exists(feedback_file):
+                with open(feedback_file, 'r') as f:
+                    feedback_data = json.load(f)
+                st.subheader("Feedback Statistics")
+                # Calculate average rating
+                ratings = [feedback['rating'] for feedback in feedback_data]
+                avg_rating = sum(ratings) / len(ratings) if ratings else 0
+                st.write(f"Average Question Rating: {avg_rating:.2f}")
+                # Show distribution of ratings
+                rating_counts = {i: ratings.count(i) for i in range(1, 6)}
+                st.bar_chart(rating_counts)
+                # Show some highly rated questions
+                st.subheader("Highly Rated Questions")
+                sorted_feedback = sorted(feedback_data, key=lambda x: x['rating'], reverse=True)
+                top_questions = sorted_feedback[:5]
+                for feedback in top_questions:
+                    st.write(f"Question: {feedback['question']}")
+                    st.write(f"Answer: {feedback['answer']}")
+                    st.write(f"Rating: {feedback['rating']}")
+                    st.write("---")
+            else:
+                st.write("No feedback data available yet.")
+        print("********************************************************************************")
+if __name__ == '__main__':
+    main()