Rehman1603 commited on
Commit
ee8a640
·
verified ·
1 Parent(s): 53b6561

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +104 -0
main.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np # linear algebra
2
+ import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
3
+ import time
4
+ import torch
5
+ from transformers import T5ForConditionalGeneration,T5Tokenizer
6
+ import random
7
+ import spacy
8
+ import zipfile
9
+ import os
10
+ os.system('pip install git+https://github.com/boudinfl/pke.git')
11
+ os.system('python -m nltk.downloader universal_tagset')
12
+ os.system('python -m spacy download en')
13
+ os.system('wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz')
14
+ os.system('tar -xvf s2v_reddit_2015_md.tar.gz')
15
+ os.system('python -m spacy download en_core_web_sm')
16
+ import git
17
+ import json
18
+ from sense2vec import Sense2Vec
19
+ import requests
20
+ from collections import OrderedDict
21
+ import string
22
+ import pke
23
+ import nltk
24
+ import numpy
25
+ import en_core_web_sm
26
+ from nltk import FreqDist
27
+ nltk.download('brown', quiet=True, force=True)
28
+ nltk.download('stopwords', quiet=True, force=True)
29
+ nltk.download('popular', quiet=True, force=True)
30
+ from nltk.corpus import stopwords
31
+ from nltk.corpus import brown
32
+ from similarity.normalized_levenshtein import NormalizedLevenshtein
33
+ from nltk.tokenize import sent_tokenize
34
+ from flashtext import KeywordProcessor
35
+ from encoding import beam_search_decoding
36
+ from mcq import tokenize_sentences
37
+ from mcq import get_keywords
38
+ from mcq import get_sentences_for_keyword
39
+ from mcq import generate_questions_mcq
40
+ from mcq import generate_normal_questions
41
+ import time
42
+ tokenizer = T5Tokenizer.from_pretrained('t5-large')
43
+ model = T5ForConditionalGeneration.from_pretrained('Parth/result')
44
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
+ model.to(device)
46
+ # model.eval()
47
+ device = device
48
+ model = model
49
+ nlp = spacy.load('en_core_web_sm')
50
+ s2v = Sense2Vec().from_disk('s2v_old')
51
+ fdist = FreqDist(brown.words())
52
+ normalized_levenshtein = NormalizedLevenshtein()
53
+ def set_seed(seed):
54
+ numpy.random.seed(seed)
55
+ torch.manual_seed(seed)
56
+ if torch.cuda.is_available():
57
+ torch.cuda.manual_seed_all(seed)
58
+ set_seed(42)
59
+
60
+
61
+
62
+ def predict_mcq(payload):
63
+ start = time.time()
64
+ inp = {
65
+ "input_text": payload.get("input_text"),
66
+ "max_questions": payload.get("max_questions", 10)
67
+ }
68
+
69
+ text = inp['input_text']
70
+ sentences = tokenize_sentences(text)
71
+ joiner = " "
72
+ modified_text = joiner.join(sentences)
73
+
74
+
75
+ keywords = get_keywords(nlp,modified_text,inp['max_questions'],s2v,fdist,normalized_levenshtein,len(sentences) )
76
+
77
+
78
+ keyword_sentence_mapping = get_sentences_for_keyword(keywords, sentences)
79
+
80
+ for k in keyword_sentence_mapping.keys():
81
+ text_snippet = " ".join(keyword_sentence_mapping[k][:3])
82
+ keyword_sentence_mapping[k] = text_snippet
83
+
84
+
85
+ final_output = {}
86
+
87
+ if len(keyword_sentence_mapping.keys()) == 0:
88
+ return final_output
89
+ else:
90
+ try:
91
+ generated_questions = generate_questions_mcq(keyword_sentence_mapping,device,tokenizer,model,s2v,normalized_levenshtein)
92
+
93
+ except:
94
+ return final_output
95
+ end = time.time()
96
+
97
+ final_output["statement"] = modified_text
98
+ final_output["questions"] = generated_questions["questions"]
99
+ final_output["time_taken"] = end-start
100
+
101
+ if torch.device=='cuda':
102
+ torch.cuda.empty_cache()
103
+
104
+ return final_output