Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import streamlit as st | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| import spacy | |
| import nltk | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from rake_nltk import Rake | |
| import pandas as pd | |
| from fpdf import FPDF | |
| import wikipediaapi | |
| from functools import lru_cache | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('brown') | |
| from nltk.tokenize import sent_tokenize | |
| # Load spaCy model | |
| nlp = spacy.load("en_core_web_sm") | |
| # Initialize Wikipedia API with a user agent | |
| user_agent = 'QGen/1.0 ([email protected])' | |
| wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en') | |
| def load_model(): | |
| model_name = "DevBM/t5-large-squad" | |
| model = T5ForConditionalGeneration.from_pretrained(model_name) | |
| tokenizer = T5Tokenizer.from_pretrained(model_name) | |
| return model, tokenizer | |
| # Initialize session state for model and tokenizer | |
| if 'model' not in st.session_state: | |
| st.session_state.model, st.session_state.tokenizer = load_model() | |
| # Use the model and tokenizer from session state | |
| model = st.session_state.model | |
| tokenizer = st.session_state.tokenizer | |
| # Function to extract keywords using combined techniques | |
| def extract_keywords(text): | |
| # Use RAKE | |
| rake = Rake() | |
| rake.extract_keywords_from_text(text) | |
| rake_keywords = set(rake.get_ranked_phrases()) | |
| # Use spaCy for NER and POS tagging | |
| doc = nlp(text) | |
| spacy_keywords = set([ent.text for ent in doc.ents]) | |
| spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]]) | |
| # Use TF-IDF | |
| vectorizer = TfidfVectorizer(stop_words='english') | |
| X = vectorizer.fit_transform([text]) | |
| tfidf_keywords = set(vectorizer.get_feature_names_out()) | |
| # Combine all keywords | |
| combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords) | |
| return list(combined_keywords) | |
| # Function to map keywords to sentences with customizable context window size | |
| def map_keywords_to_sentences(text, keywords, context_window_size): | |
| sentences = sent_tokenize(text) | |
| keyword_sentence_mapping = {} | |
| for keyword in keywords: | |
| for i, sentence in enumerate(sentences): | |
| if keyword in sentence: | |
| # Combine current sentence with surrounding sentences for context | |
| start = max(0, i - context_window_size) | |
| end = min(len(sentences), i + context_window_size + 1) | |
| context = ' '.join(sentences[start:end]) | |
| if keyword not in keyword_sentence_mapping: | |
| keyword_sentence_mapping[keyword] = context | |
| else: | |
| keyword_sentence_mapping[keyword] += ' ' + context | |
| return keyword_sentence_mapping | |
| # Function to perform entity linking using Wikipedia API | |
| def entity_linking(keyword): | |
| page = wiki_wiki.page(keyword) | |
| if page.exists(): | |
| return page.fullurl | |
| return None | |
| # Function to generate questions using beam search | |
| def generate_question(context, answer, num_beams): | |
| input_text = f"<context> {context} <answer> {answer}" | |
| input_ids = tokenizer.encode(input_text, return_tensors='pt') | |
| outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True) | |
| question = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return question | |
| # Function to export questions to CSV | |
| def export_to_csv(data): | |
| df = pd.DataFrame(data, columns=["Context", "Answer", "Question"]) | |
| csv = df.to_csv(index=False,encoding='utf-8') | |
| return csv | |
| # Function to export questions to PDF | |
| def export_to_pdf(data): | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_font("Arial", size=12) | |
| for context, answer, question in data: | |
| pdf.multi_cell(0, 10, f"Context: {context}") | |
| pdf.multi_cell(0, 10, f"Answer: {answer}") | |
| pdf.multi_cell(0, 10, f"Question: {question}") | |
| pdf.ln(10) | |
| # pdf.output("questions.pdf") | |
| return pdf.output(name='questions.pdf',dest='S').encode('latin1') | |
| if 'data' not in st.session_state: | |
| st.session_state.data = None | |
| # Streamlit interface | |
| st.title(":blue[Question Generator from Text]") | |
| text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.") | |
| with st.sidebar: | |
| st.subheader("Customization Options") | |
| # Customization options | |
| num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5) | |
| context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1) | |
| num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5) | |
| question_complexity = st.selectbox("Select question complexity", ["Simple", "Intermediate", "Complex"]) | |
| if st.button("Generate Questions"): | |
| if text: | |
| load_model() | |
| keywords = extract_keywords(text) | |
| keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size) | |
| st.subheader("Generated Questions:") | |
| data = [] | |
| for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()): | |
| if i >= num_questions: | |
| break | |
| linked_entity = entity_linking(keyword) | |
| question = generate_question(context, keyword, num_beams=num_beams) | |
| st.write(f"**Context:** {context}") | |
| st.write(f"**Answer:** {keyword}") | |
| st.write(f"**Question:** {question}") | |
| if linked_entity: | |
| st.write(f"**Entity Link:** {linked_entity}") | |
| st.write("---") | |
| data.append((context, keyword, question)) | |
| # Add the data to session state | |
| st.session_state.data = data | |
| # Export buttons | |
| if st.session_state.data is not None: | |
| with st.sidebar: | |
| st.subheader('Download Content') | |
| csv_data = export_to_csv(data) | |
| st.download_button(label="CSV Format", data=csv_data, file_name='questions.csv', mime='text/csv') | |
| pdf_data = export_to_pdf(data) | |
| st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf') | |
| else: | |
| st.write("Please enter some text to generate questions.") | |
