ismot vinayakdev commited on
Commit
e052b6e
·
0 Parent(s):

Duplicate from vinayakdev/qa-generator

Browse files

Co-authored-by: Vinayak Dev <[email protected]>

Files changed (12) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +38 -0
  3. .streamlit/config.toml +3 -0
  4. README.md +14 -0
  5. app.py +21 -0
  6. electra_model.sav +3 -0
  7. generator.py +140 -0
  8. hfmodel.sav +3 -0
  9. main.py +8 -0
  10. model.sav +3 -0
  11. requirements.txt +5 -0
  12. t5_model.sav +3 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ model.sav filter=lfs diff=lfs merge=lfs -text
36
+ hfmodel.sav filter=lfs diff=lfs merge=lfs -text
37
+ electra_model.sav filter=lfs diff=lfs merge=lfs -text
38
+ t5_model.sav filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ backgroundColor="#FFFFFF"
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Qa Generator
3
+ emoji: 😻
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.15.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: vinayakdev/qa-generator
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import generator
3
+ from generator import *
4
+ import string
5
+
6
+ st.info("On a large paragraph, less questions will be produced, for increased accuracy.\n")
7
+ text_ar = st.text_area("Enter text:")
8
+ col1, col2, col3 = st.columns([2,1,2])
9
+ x = st.container()
10
+ if(col2.button("Generate!")):
11
+ text_ar = text_ar.replace("\n", " ")
12
+ pairs = creator(text_ar)
13
+ for pair in pairs:
14
+ x.text(pair)
15
+ st.balloons()
16
+
17
+
18
+
19
+
20
+
21
+
electra_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d99ce35fd30c98ca681be026c3ddb54f725e5a343535b503c07032611f1b72a
3
+ size 54032204
generator.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ from transformers import (
3
+ # Text2TextGenerationPipeline,
4
+ AutoModelForSeq2SeqLM as alwm,
5
+ # TokenClassificationPipeline,
6
+ # AutoModelForTokenClassification,
7
+ AutoModelForQuestionAnswering as amqa,
8
+ AutoTokenizer as att,
9
+ # BertTokenizer,
10
+ AlbertTokenizer,
11
+ # BertForQuestionAnswering,
12
+ # AlbertForQuestionAnswering,
13
+ # T5Config,
14
+ # T5ForConditionalGeneration,
15
+ T5TokenizerFast,
16
+ PreTrainedTokenizer,
17
+ PreTrainedModel,
18
+ ElectraTokenizer as et,
19
+ # ElectraForQuestionAnswering
20
+ )
21
+ import torch
22
+ import sentencepiece
23
+ import string
24
+ import numpy as np
25
+ from transformers import pipeline
26
+ # from transformers.pipelines import pipeline
27
+ import pickle
28
+ import streamlit as st
29
+
30
+ # sq_tokenizer = att.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
31
+ # sq_model = alwm.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
32
+ # text= "The abolition of feudal privileges by the National Constituent Assembly on 4 August 1789 and the Declaration \\nof the Rights of Man and of the Citizen (La Déclaration des Droits de l'Homme et du Citoyen), drafted by Lafayette \\nwith the help of Thomas Jefferson and adopted on 26 August, paved the way to a Constitutional Monarchy \\n(4 September 1791 – 21 September 1792). Despite these dramatic changes, life at the court continued, while the situation \\nin Paris was becoming critical because of bread shortages in September. On 5 October 1789, a crowd from Paris descended upon Versailles \\nand forced the royal family to move to the Tuileries Palace in Paris, where they lived under a form of house arrest under \\nthe watch of Lafayette's Garde Nationale, while the Comte de Provence and his wife were allowed to reside in the \\nPetit Luxembourg, where they remained until they went into exile on 20 June 1791."
33
+ # hftokenizer = pickle.load(open('models/hftokenizer.sav', 'rb'))
34
+ # hfmodel = pickle.load(open('models/hfmodel.sav', 'rb'))
35
+
36
+ def load_model():
37
+ hfm = pickle.load(open('hfmodel.sav','rb'))
38
+ hft = T5TokenizerFast.from_pretrained("t5-base")
39
+ model = pickle.load(open('model.sav','rb'))
40
+ tok = AlbertTokenizer.from_pretrained("ahotrod/albert_xxlargev1_squad2_512")
41
+ # return hfm, hft,tok, model
42
+ return hfm, hft, tok, model
43
+
44
+ hfmodel, hftokenizer, tokenizer, model = load_model()
45
+
46
+ def run_model(input_string, **generator_args):
47
+ generator_args = {
48
+ "max_length": 256,
49
+ "num_beams": 4,
50
+ "length_penalty": 1.5,
51
+ "no_repeat_ngram_size": 3,
52
+ "early_stopping": True,
53
+ }
54
+ # tokenizer = att.from_pretrained("ThomasSimonini/t5-end2end-question-generation")
55
+ input_string = "generate questions: " + input_string + " </s>"
56
+ input_ids = hftokenizer.encode(input_string, return_tensors="pt")
57
+ res = hfmodel.generate(input_ids, **generator_args)
58
+ output = hftokenizer.batch_decode(res, skip_special_tokens=True)
59
+ output = [item.split("<sep>") for item in output]
60
+ return output
61
+
62
+
63
+
64
+ # al_tokenizer = att.from_pretrained("deepset/electra-base-squad2")
65
+ # al_model = amqa.from_pretrained("deepset/electra-base-squad2")
66
+ # al_model = pickle.load(open('models/al_model.sav', 'rb'))
67
+ # al_tokenizer = pickle.load(open('models/al_tokenizer.sav', 'rb'))
68
+ def QA(question, context):
69
+ # model_name="deepset/electra-base-squad2"
70
+ # nlp = pipeline("question-answering",model=model,tokenizer = tok)
71
+ # format = {
72
+ # 'question':question,
73
+ # 'context':context
74
+ # }
75
+ # res = nlp(format)
76
+ # output = f"{question}\n{string.capwords(res['answer'])}\n"
77
+ # return output
78
+ inputs = tokenizer(question, context, return_tensors="pt")
79
+ # Run the model, the deepset way
80
+ with torch.no_grad():
81
+ output = model(**inputs)
82
+ start_score = output.start_logits
83
+ end_score = output.end_logits
84
+ #Get the rel scores for the context, and calculate the most probable begginign using torch
85
+ start = torch.argmax(start_score)
86
+ end = torch.argmax(end_score)
87
+ #cinvert tokens to strings
88
+ # output = tokenizer.decode(input_ids[start:end+1], skip_special_tokens=True)
89
+ predict_answer_tokens = inputs.input_ids[0, start : end + 1]
90
+ output = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
91
+ output = string.capwords(output)
92
+ if output.isspace() or len(output) == 0:
93
+ return f"Possible question : {question}\n Answer could not be generated accurately."
94
+ return f"Q. {question} \n Ans. {output}"
95
+ # QA("What was the first C program","The first prgram written in C was Hello World")
96
+
97
+ def gen_question(inputs):
98
+
99
+ questions = run_model(inputs)
100
+
101
+ return questions
102
+
103
+ # string_query = "Hello World"
104
+ # gen_question(f"answer: {string_query} context: The first C program said {string_query} "). #The format of the query to generate questions
105
+
106
+ def tokenize(inputs) :
107
+ inputs = hftokenizer.batch_encode_plus(
108
+ inputs,
109
+ max_length=512,
110
+ add_special_tokens=True,
111
+ truncation=True,
112
+ # padding="max_length",
113
+ pad_to_max_length=True,
114
+ return_tensors="pt"
115
+ )
116
+ return inputs
117
+
118
+ def read_file(filepath_name):
119
+ with open(text, "r") as infile:
120
+ contents = infile.read()
121
+ context = contents.replace("\n", " ")
122
+ return context
123
+
124
+ def create_string_for_generator(context):
125
+ gen_list = gen_question(context)
126
+ return (gen_list[0][0]).split('? ')
127
+
128
+ def creator(context):
129
+ questions = create_string_for_generator(context)
130
+ # questions = questions.split('?')
131
+ pairs = []
132
+ for ques in questions:
133
+ pair = QA(ques,context)
134
+ if len(pair) == 0:
135
+ continue
136
+ pairs.append(pair)
137
+ return pairs
138
+ # creator(""""Hello, World!" program by Brian Kernighan (1978).
139
+ # A "Hello, World!" program is generally a computer program that ignores any input and outputs or displays a message similar to "Hello, World!". A small piece of code in most general-purpose programming languages, this program is used to illustrate a language's basic syntax. "Hello, World!" programs are often the first a student learns to write in a given language,[1] and they can also be used as a sanity check to ensure computer software intended to compile or run source code is correctly installed, and that its operator understands how to use it.
140
+ # """)
hfmodel.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b8a0eaae4db9a1d63b7ae641f8a81a31776ac27ee326016d006a8f4a3f9265
3
+ size 891674698
main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import generator
2
+ from generator import *
3
+
4
+
5
+
6
+ context = str(input("Enter paragraph\n"))
7
+ context.replace("\n", " ")
8
+ creator(context)
model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69d343950c24e17fefdac3a0b288306de41127b9097774b01ff7fd0d5887ff6d
3
+ size 823312585
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.24.1
2
+ sentencepiece==0.1.97
3
+ streamlit==1.16.0
4
+ torch==1.13.1
5
+ transformers==4.25.1
t5_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd85d72d50394f86c47cde298e698cdd1f6ef2c43267f40e2719bb3b5f9278ec
3
+ size 242049035