Alexander Casimir Fischer commited on
Commit
3e72a37
·
1 Parent(s): 11d5492

new file: __pycache__/common.cpython-311.pyc

Browse files

new file: __pycache__/main.cpython-311.pyc
new file: common.py
new file: main.py
new file: qc_run.py
new file: requirements.txt

__pycache__/common.cpython-311.pyc ADDED
Binary file (16 kB). View file
 
__pycache__/main.cpython-311.pyc ADDED
Binary file (17.3 kB). View file
 
common.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Central storage for variables, objects, templates used by both apps
2
+ import os
3
+
4
+ #Importing dependencies
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.chains import LLMChain
8
+ from langchain.tools import WikipediaQueryRun
9
+ from langchain.utilities import WikipediaAPIWrapper
10
+
11
+ #Prompt variables dictionary: Definitions of 4th grade Common Core Standards according to source provided by Crossover
12
+ standard_definition_dict = {
13
+ "CCSS.ELA-LITERACY.W.4.1":"Write opinion pieces on topics or texts, supporting a point of view with reasons and information",
14
+ "CCSS.ELA-LITERACY.W.4.2":"Write informative/explanatory texts to examine a topic and convey ideas and information clearly",
15
+ "CCSS.ELA-LITERACY.W.4.3":"Write narratives to develop real or imagined experiences or events using effective technique, descriptive details, and clear event sequences",
16
+ "CCSS.ELA-LITERACY.W.4.4":"Produce clear and coherent writing in which the development and organization are appropriate to task, purpose, and audience",
17
+ "CCSS.ELA-LITERACY.W.4.5":"Develop and strengthen writing as needed by planning, revising, and editing - with guidance and support from peers and adults",
18
+ "CCSS.ELA-LITERACY.W.4.6":"Use technology, including the Internet, to produce and publish writing as well as to interact and collaborate with others; demonstrate sufficient command of keyboarding skills to type a minimum of one page in a single sitting - with some guidance and support from adults",
19
+ "CCSS.ELA-LITERACY.W.4.7":"Conduct short research projects that build knowledge through investigation of different aspects of a topic",
20
+ "CCSS.ELA-LITERACY.W.4.8":"Recall relevant information from experiences or gather relevant information from print and digital sources; take notes and categorize information, and provide a list of sources",
21
+ "CCSS.ELA-LITERACY.W.4.9":"Draw evidence from literary or informational texts to support analysis, reflection, and research",
22
+ "CCSS.ELA-LITERACY.W.4.10":"Write routinely over extended time frames (time for research, reflection, and revision) and shorter time frames (a single sitting or a day or two) for a range of discipline-specific tasks, purposes, and audiences"
23
+ }
24
+
25
+ #Prompt variables dictionary: Definitions on how the AI's FRQ output must be formulated depending on the CCS selected,
26
+ #since a FRQ is not always a question in the grammatical sense
27
+ question_or_task_dict = {
28
+ "CCSS.ELA-LITERACY.W.4.1": "writing task",
29
+ "CCSS.ELA-LITERACY.W.4.2": "writing task",
30
+ "CCSS.ELA-LITERACY.W.4.3": "creative writing task",
31
+ "CCSS.ELA-LITERACY.W.4.4": "writing task. Also define the purpose and the supposed audience of the student's writing",
32
+ "CCSS.ELA-LITERACY.W.4.5": "writing task",
33
+ "CCSS.ELA-LITERACY.W.4.6": "research and writing task. The student may use the internet for research and ask adults for some guidance",
34
+ "CCSS.ELA-LITERACY.W.4.7": "research and writing task with the goal of expanding the students knowledge",
35
+ "CCSS.ELA-LITERACY.W.4.8": "scientific writing task",
36
+ "CCSS.ELA-LITERACY.W.4.9": "free response question",
37
+ "CCSS.ELA-LITERACY.W.4.10": "homework writing task"
38
+ }
39
+
40
+ #Rubric dictionary: chooses the rubric to displayed in expander after the evaluation
41
+ rubric_dict = {
42
+ "CCSS.ELA-LITERACY.W.4.1":"""
43
+ • Write opinion pieces on topics or texts,
44
+ supporting a point of view with reasons and information.
45
+ • Introduce a topic or text clearly, state an opinion, and
46
+ create an organizational structure in which related ideas
47
+ are grouped to support the writer's purpose.
48
+ • Provide reasons that are supported by facts and details.
49
+ • Link opinion and reasons using words and phrases (e.g.,
50
+ for instance, in order to, in addition).
51
+ • Provide a concluding statement or section related to the
52
+ opinion presented.
53
+ """,
54
+ "CCSS.ELA-LITERACY.W.4.2":"""
55
+ • Write informative/explanatory texts to examine a topic
56
+ and convey ideas and information clearly.
57
+ • Introduce a topic clearly and group related information
58
+ in paragraphs and sections; include formatting (e.g.,
59
+ headings), illustrations, and multimedia when useful to
60
+ aiding comprehension.
61
+ • Develop the topic with facts, definitions, concrete
62
+ details, quotations, or other information and examples
63
+ related to the topic.
64
+ • Link ideas within categories of information using words
65
+ and phrases (e.g., another, for example, also, because).
66
+ • Use precise language and domain-specific vocabulary to
67
+ inform about or explain the topic.
68
+ • Provide a concluding statement or section related to the
69
+ information or explanation presented.
70
+ """,
71
+ "CCSS.ELA-LITERACY.W.4.3":"""
72
+ • Write narratives to develop real or imagined experiences
73
+ or events using effective technique, descriptive details,
74
+ and clear event sequences.
75
+ • Orient the reader by establishing a situation and
76
+ introducing a narrator and/or characters; organize an
77
+ event sequence that unfolds naturally.
78
+ • Use dialogue and description to develop experiences and
79
+ events or show the responses of characters to situations.
80
+ • Use a variety of transitional words and phrases to manage
81
+ the sequence of events.
82
+ • Use concrete words and phrases and sensory details to
83
+ convey experiences and events precisely.
84
+ • Provide a conclusion that follows from the narrated
85
+ experiences or events.
86
+ """,
87
+ "CCSS.ELA-LITERACY.W.4.4":"""
88
+ Produce clear and coherent writing in which the development
89
+ and organization are appropriate to task, purpose, and
90
+ audience. (Grade-specific expectations for writing types
91
+ are defined in standards 1-3 above.)
92
+ """,
93
+ "CCSS.ELA-LITERACY.W.4.5":"""
94
+ With guidance and support from peers and adults, develop
95
+ and strengthen writing as needed by planning, revising,
96
+ and editing. (Editing for conventions should demonstrate
97
+ command of Language standards 1-3 up to and including
98
+ grade 4 here.)
99
+ """,
100
+ "CCSS.ELA-LITERACY.W.4.6":"""
101
+ With some guidance and support from adults, use technology,
102
+ including the Internet, to produce and publish writing as
103
+ well as to interact and collaborate with others;
104
+ demonstrate sufficient command of keyboarding skills to
105
+ type a minimum of one page in a single sitting.
106
+ """,
107
+ "CCSS.ELA-LITERACY.W.4.7":"""
108
+ Conduct short research projects that build knowledge
109
+ through investigation of different aspects of a topic.
110
+ """,
111
+ "CCSS.ELA-LITERACY.W.4.8":"""
112
+ Recall relevant information from experiences or gather
113
+ relevant information from print and digital sources; take
114
+ notes and categorize information, and provide a list of
115
+ sources.
116
+ """,
117
+ "CCSS.ELA-LITERACY.W.4.9":"""
118
+ • Draw evidence from literary or informational texts to
119
+ support analysis, reflection, and research.
120
+ • Apply grade 4 Reading standards to literature (e.g.,
121
+ "Describe in depth a character, setting, or event in a
122
+ story or drama, drawing on specific details in the text
123
+ [e.g., a character's thoughts, words, or actions].").
124
+ • Apply grade 4 Reading standards to informational texts
125
+ (e.g., "Explain how an author uses reasons and evidence
126
+ to support particular points in a text").
127
+ """,
128
+ "CCSS.ELA-LITERACY.W.4.10":"""
129
+ Write routinely over extended time frames (time for
130
+ research, reflection, and revision) and shorter time
131
+ frames (a single sitting or a day or two) for a range of
132
+ discipline-specific tasks, purposes, and audiences
133
+ """
134
+ }
135
+
136
+ #Prompt templates: where the actual prompt engineering is happening. See google doc for further reference
137
+ prompt_context = PromptTemplate(
138
+ input_variables=["chosen_topic", "wikitext"],
139
+ template="You are a writer at a school book publishing company. \
140
+ You will be given a certain topic, then your task is to write a school book article about it. \
141
+ Choose your words in accordance to the teachings of Wikipedia, precisely the explanations you find at the end of this prompt. \
142
+ Please do only include full sentences in your answer. \
143
+ The article should have a clearly defined thread that is easy to follow. \
144
+ Please do not give the article a title, only include the body in your answer. \
145
+ Please write in a style that is fun to read and understandable by 12 year old kids. \
146
+ The article should be around 10-15 sentences long. \n\
147
+ \n\
148
+ Topic: {chosen_topic} \n\
149
+ \n\
150
+ Wikipedia: \n\
151
+ {wikitext}"
152
+ )
153
+ prompt_frq = PromptTemplate(
154
+ input_variables=["context", "standard_definition", "question_or_task"],
155
+ template="You are a 4th grade school teacher. \
156
+ You will be given an input text, which is a short article on a certain topic. \
157
+ Thematizing the content of the input text and adressing a 4th grade student, you will then generate a {question_or_task}. \
158
+ Do not include a title such as 'Question:' or 'Writing task:' in your output. \
159
+ Your output must be formulated in such a way that it can be used to test a person's ability to {standard_definition}. \n\
160
+ \n\
161
+ Input text: \n\
162
+ \n\
163
+ {context}"
164
+ )
165
+ prompt_evaluation = PromptTemplate(
166
+ input_variables=["context", "rubric",
167
+ "frq", "chosen_answer"],
168
+ template="You are a 4th grade school teacher. \
169
+ You will be given four inputs: An article on a certain topic. \
170
+ A task or question related to said article, meant to be solved by a 4th grade student. \
171
+ The answer to said task or question, written by one of your students. \
172
+ A standard for evaluating said answer.\
173
+ Your output should be an objective evaluation of the student's answer, taking all inputs into account. \
174
+ Here are your inputs: \n\
175
+ \n\
176
+ The article: \n\
177
+ \n\
178
+ {context} \n\
179
+ \n\
180
+ The task or question: \n\
181
+ \n\
182
+ {frq} \n\
183
+ \n\
184
+ The student's answer: \n\
185
+ \n\
186
+ {chosen_answer} \n\
187
+ \n\
188
+ The standard:\
189
+ \n\
190
+ The student should be able to {rubric}. \n\
191
+ \n\
192
+ Now please take some time to formulate your output, by thoroughly looking at each input. Also think about whether \
193
+ the student has showcased the required ability or not. In other words, whether he passed or failed the test, and why. \
194
+ Structure your output like this: \n\
195
+ \n\
196
+ 'PASSED' or 'FAILED'\n\
197
+ Evaluation / Explaination"
198
+ )
199
+ prompt_topic_rand = PromptTemplate(input_variables=["var"], template="{var} give me a random \
200
+ writing topic in one or maximum two words. Please nothing about sex, drugs, alcohol or violence.")
201
+ prompt_answer_good = PromptTemplate(input_variables=["context", "frq", "standard"],
202
+ template="You are a Senior Test Manager at a successful software company and a very smart person. \
203
+ Your job is to test a new educational software. \
204
+ Please read this text presented to you below, then answer the task or question that follows. \
205
+ Please do not simply copy whole sentences from the text. Just behave like you were back in junior high, \
206
+ where you always had the best English grades of the entire school. \
207
+ Please formulate your answer in a way, that shows off your ability to {standard}. \
208
+ The purpose of the software is to measure this ability in a student. \n\n\
209
+ Here is the text: \n\
210
+ {context} \n\n\
211
+ Here is the task or question: \n\
212
+ {frq} \n\n\
213
+ Please take some time to think, then give it your best shot.")
214
+ prompt_answer_bad = PromptTemplate(input_variables=["context", "frq"],
215
+ template="You are a 10 year old student taking an English exam. You are not particularly intelligent. \
216
+ Unfortunately you also have troubling focusing on tasks and you happen to get the worst English grades of the entire class. \
217
+ Quit reading and homework does just not feel natural for you. You do not even mind, since you are good at a lot of other things. \
218
+ You are, for example, an incredibly creative person and like to tell stories. \
219
+ Please read this text presented to you below, then answer the task or question that follows. \
220
+ \n\n\
221
+ Here is the text: \n\
222
+ {context} \n\n\
223
+ Here is the task or question: \n\
224
+ {frq} \n\n\
225
+ Please remember: you will NOT perform well on this task. Create a poorly formulated answer, \
226
+ using a bit of bad grammar and also make some logical mistakes. \
227
+ Clearly indicating that you do not possess the skills being tested.\
228
+ You must try hard but still fail at this exam.")
229
+ prompt_qc_run = PromptTemplate(input_variables=["context", "frq", "rubric", \
230
+ "answer_good", "evaluation_good", "answer_bad", "evaluation_bad"],
231
+ template="You are a Senior Test Manager with 15 years of experience at a successful software company. \
232
+ Your daily business is to test educational KI software. You also have a degree in linguistics and love logic puzzles. \
233
+ Please have a look at 7 pieces of text, which will be given to you at the end of this prompt. \
234
+ Here are the 7 descriptions: \
235
+ 1. an article on a certain topic, given by the software \n\
236
+ 2. a free-response question on this article, given by the software \n\
237
+ 3. a certain educational standard rubric, that is used to evaluate the answer on this free-response question \n\
238
+ 4. the answer to the free-response question, given by a strong 4th grade student \n\
239
+ 5. the evaluation of the strong answer, given by the software \n\
240
+ 6. the answer to the free-response question, given by a weak 4th grade student \n\
241
+ 7. the evaluation of the weak answer, given by the software \n\
242
+ Your task today is the following: please have a critical look at the output of the software. \
243
+ Take your time on each of the 7 texts, then give critical feedback on any shortcomings of the software's KI. \
244
+ Give recommendations on how to further improve the quality of texts number 1., 2., 5. and 7., \
245
+ by fine-tuning the KI instructions or prompts. \
246
+ Please be rather critical.\n\n\
247
+ {context}\n\n\
248
+ {frq}\n\n\
249
+ {rubric}\n\n\
250
+ {answer_good}\n\n\
251
+ {evaluation_good}\n\n\
252
+ {answer_bad}\n\n\
253
+ {evaluation_bad}")
254
+ prompt_qc_grade = PromptTemplate(input_variables=["qc_report"],
255
+ template="You will be given a precise report that was written to evaluate a new software's performance. \
256
+ Take a good look at the report and decide on an overall evaluation grade that aligns with the entire report's sentiment. \
257
+ The grade should be a percentage (integer between 1 and 100 with a percent symbol) and correctly represent and support the report. \
258
+ Only give an integer and '%' as an output, nothing else.\n\n\
259
+ Here is the report:\n\n\
260
+ {qc_report}")
261
+
262
+ #Defining LLMs, sources and Chains
263
+ llm = ChatOpenAI(model="gpt-4", temperature=0.5)
264
+ precise = ChatOpenAI(model="gpt-4", temperature=0.0)
265
+ random = ChatOpenAI(model="gpt-4", temperature=0.9)
266
+ wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
267
+ context_chain = LLMChain(llm=llm, prompt=prompt_context)
268
+ frq_chain = LLMChain(llm=llm, prompt=prompt_frq, )
269
+ evaluation_chain = LLMChain(llm=llm, prompt=prompt_evaluation)
270
+ qc_answer_good_chain = LLMChain(llm=precise, prompt=prompt_answer_good)
271
+ qc_answer_bad_chain = LLMChain(llm=random, prompt=prompt_answer_bad)
272
+ topic_rand = LLMChain(llm=random, prompt=prompt_topic_rand)
273
+ var="Please"
274
+ qc_run_chain = LLMChain(llm=precise, prompt=prompt_qc_run)
275
+ qc_grade_chain = LLMChain(llm=precise, prompt=prompt_qc_grade)
276
+
277
+ #Tiny helper function that is necessary to keep the wikipedia page metadata and/or
278
+ #unrelated following pages from spilling into the LLM's output
279
+ def trim_text(input_string):
280
+ keyword1 = "Summary:"
281
+ keyword2 = "Page:"
282
+ index1 = input_string.find(keyword1)
283
+ if index1 == -1:
284
+ new_string = input_string
285
+ else:
286
+ new_string = input_string[index1 + len(keyword1):].strip()
287
+ index2 = new_string.find(keyword2)
288
+ if index2 == -1:
289
+ return new_string
290
+ else:
291
+ return new_string[:index2].strip()
292
+
293
+ #Function for detecting the use of copy/pasted parts of the context as answer
294
+ def plagiate(context, answer):
295
+ if answer in context:
296
+ return True
297
+ for i in range(len(answer) - 31):
298
+ substring = answer[i:i + 30]
299
+ if substring in context:
300
+ return True
301
+ return False
main.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #To access my personal API key from operation system environment variables.
2
+ #Inside the HuggingFace Space, this will be substituted by HF's "Secret" variable option.
3
+ #Feel free to use the tool as is (via my personal API key) for the time of my evaluation.
4
+ import os
5
+ import time
6
+
7
+ #Loading Streamlit for GUI
8
+ import streamlit as st
9
+
10
+ #Session variables - saved locally to not run the whole script everytime a user input is entered
11
+ if True:
12
+ if "standard_set" not in st.session_state:
13
+ st.session_state.standard_set = False
14
+ if "topic_set" not in st.session_state:
15
+ st.session_state.topic_set = False
16
+ if "content_set" not in st.session_state:
17
+ st.session_state.content_set = False
18
+ if "answer_set" not in st.session_state:
19
+ st.session_state.answer_set = False
20
+ if "evaluated" not in st.session_state:
21
+ st.session_state.evaluated = False
22
+ if "chosen_standard" not in st.session_state:
23
+ st.session_state.chosen_standard = ""
24
+ if "standard_definition" not in st.session_state:
25
+ st.session_state.standard_definition = ""
26
+ if "question_or_task" not in st.session_state:
27
+ st.session_state.question_or_task = ""
28
+ if "chosen_topic" not in st.session_state:
29
+ st.session_state.chosen_topic = ""
30
+ if "wikitext" not in st.session_state:
31
+ st.session_state.wikitext = ""
32
+ if "context" not in st.session_state:
33
+ st.session_state.context = ""
34
+ if "frq" not in st.session_state:
35
+ st.session_state.frq = ""
36
+ if "chosen_answer" not in st.session_state:
37
+ st.session_state.chosen_answer = ""
38
+ answer_logged=False
39
+
40
+
41
+ #Importing variables, objects, templates from "common"
42
+ from common import llm, wikipedia #language models
43
+ from common import standard_definition_dict, question_or_task_dict, rubric_dict #dictionaries
44
+ from common import prompt_context, prompt_frq, prompt_evaluation #prompt templates
45
+ from common import context_chain, frq_chain, evaluation_chain #prompting chains
46
+ from common import trim_text, plagiate #custom functions
47
+
48
+
49
+ #Setting up streamlit UI, intro
50
+ st.set_page_config(page_title="FQR Generator", page_icon="🎓",
51
+ menu_items={"About":"Version 1.0 \n\n Not for commercial use.",
52
+ "Get help":"https://www.linkedin.com/in/alex-c-fischer"})
53
+ st.title("🎓Common Core FRQ Generator")
54
+ with st.sidebar:
55
+ st.title("Menu")
56
+ st.link_button(label="Admin", url="https://www.google.com")
57
+ st.link_button(label="Contact", url="https://www.linkedin.com/in/alex-c-fischer/")
58
+ st.write("This little tool automatically generates free-response questions (FQRs) \
59
+ to evaluate a 4th grade student's knowledge of a given Common Core Writing Standard \
60
+ by reading and writing on a topic of their own choice. \
61
+ After the FRQ is answered, an evaluation will be provided.")
62
+ st.write("(The language processing is done by an AI model, \
63
+ yet the facts are sourced from the topic's wikipedia page, to ensure hallucination-free and up-to-date content.)" )
64
+
65
+
66
+ #FRQ based on standard, student input and prompting engine
67
+ with st.form("standard_form"):
68
+ st.session_state.chosen_standard = st.selectbox(
69
+ "Choose 4th Grade Common Core Writing standard:",
70
+ ("CCSS.ELA-LITERACY.W.4.1","CCSS.ELA-LITERACY.W.4.2","CCSS.ELA-LITERACY.W.4.3","CCSS.ELA-LITERACY.W.4.4",
71
+ "CCSS.ELA-LITERACY.W.4.5","CCSS.ELA-LITERACY.W.4.6","CCSS.ELA-LITERACY.W.4.7","CCSS.ELA-LITERACY.W.4.8",
72
+ "CCSS.ELA-LITERACY.W.4.9","CCSS.ELA-LITERACY.W.4.10")
73
+ )
74
+ st.session_state.standard_definition = standard_definition_dict[st.session_state.chosen_standard]
75
+ st.session_state.question_or_task = question_or_task_dict[st.session_state.chosen_standard]
76
+ subm_standard =st.form_submit_button("Set")
77
+ if subm_standard:
78
+ st.session_state.standard_set=True
79
+ st.write("We will test your ability to:")
80
+ st.write(f"📜{st.session_state.standard_definition}.")
81
+ if st.session_state.standard_set:
82
+ with st.form("topic_form"):
83
+ st.session_state.chosen_topic = st.text_input("Type in a topic of your interest, then click 'Submit'.")
84
+ subm_topic = st.form_submit_button("Submit")
85
+ if st.session_state.standard_set and subm_topic:
86
+ st.empty()
87
+ with st.spinner('🤖Browsing wikipedia...'):
88
+ if st.session_state.wikitext=="":
89
+ wikitext = trim_text(wikipedia.run(st.session_state.chosen_topic))
90
+ if wikitext=="No good Wikipedia Search Result was found":
91
+ st.write(f"🤖Sorry - I can't find anything in wikipedia on '{st.session_state.chosen_topic}'. \
92
+ I would love to make something up, but I can't do that in here. Please try something else.")
93
+ got_it = st.button("Got it")
94
+ st.session_state.topic_set=False
95
+ st.stop()
96
+ else:
97
+ st.session_state.wikitext = wikitext
98
+ st.session_state.topic_set=True
99
+ st.success("Article found")
100
+ with st.spinner('🤖So interesting! Now please give me a few seconds to create the context and FRQ.'):
101
+ if st.session_state.context=="":
102
+ st.session_state.context = context_chain.run(
103
+ chosen_topic=st.session_state.chosen_topic,
104
+ wikitext=st.session_state.wikitext
105
+ )
106
+ if st.session_state.frq=="":
107
+ st.session_state.frq = frq_chain.run(
108
+ context=st.session_state.context,
109
+ standard_definition=st.session_state.standard_definition,
110
+ question_or_task=st.session_state.question_or_task
111
+ )
112
+ st.success("Content and FRQ created")
113
+ if st.session_state.topic_set:
114
+ with st.form("content_form"):
115
+ st.write("🤖Here we go - that was quick, wasn't it?")
116
+ st.subheader("Context required to answer the FRQ:")
117
+ st.write(st.session_state.context)
118
+ st.subheader("Free Response Question:")
119
+ st.write(st.session_state.frq)
120
+ st.write("🤖Read all of the above? Great! Continue with the assignment at your own pace.")
121
+ next = st.form_submit_button("Continue")
122
+ if next:
123
+ st.session_state.content_set=True
124
+ if st.session_state.content_set:
125
+ with st.form("answer_form"):
126
+ st.session_state.chosen_answer = st.text_area("Type in your answer, then click 'Submit'. Please do not simply copy/paste from above.")
127
+ subm_answer = st.form_submit_button("Submit")
128
+ if st.session_state.content_set and subm_answer:
129
+ with st.spinner('🤖Logging...'):
130
+ pass
131
+ if plagiate(context=st.session_state.context, answer=st.session_state.chosen_answer):
132
+ st.session_state.content_set=False
133
+ st.write("🤖Using Crtl+C/P defeats the purpose of this test, young friend.")
134
+ time.sleep(0.1)
135
+ st.write("🤖Those are the rules. Please overwrite above answer in your own words - \
136
+ trust me, this is a great way to learn interesting new things.")
137
+ got_it = st.button("Got it")
138
+ st.stop()
139
+ else:
140
+ st.subheader("Answer submitted")
141
+ answer_logged=True
142
+ if answer_logged:
143
+ st.session_state.answer_set=True
144
+ if st.session_state.answer_set:
145
+ with st.form("evaluation_form"):
146
+ st.subheader("Evaluation")
147
+ with st.spinner("🤖Let me see how you did today."):
148
+ evaluation = evaluation_chain.run(
149
+ context=st.session_state.context,
150
+ rubric=rubric_dict[st.session_state.chosen_standard],
151
+ frq=st.session_state.frq,
152
+ chosen_answer=st.session_state.chosen_answer)
153
+ st.write(evaluation)
154
+ def clear_form():
155
+ st.session_state.answer_set=False
156
+ next
157
+ st.form_submit_button(label="Reformulate answer", on_click=clear_form)
158
+ st.empty()
159
+ with st.expander("Show Evaluation & Feedback Rubric"):
160
+ st.write("AI evaluated the student's ability to:")
161
+ st.text(rubric_dict[st.session_state.chosen_standard])
162
+ rerun = st.button("Rerun")
163
+ if rerun:
164
+ for key in st.session_state.keys():
165
+ del st.session_state[key]
166
+ st.rerun()
167
+ st.divider()
168
+ st.write("Admin area: clicking below will open a new app")
169
+ st.link_button(label="QC Test run - let GPT-4 take this test", url="https://www.google.com")
170
+
171
+
172
+
173
+
174
+
qc_run.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random as rand
3
+ import pandas as pd
4
+ import time
5
+ import datetime
6
+ import base64
7
+
8
+ import streamlit as st
9
+
10
+ #Importing variables, objects, templates from "common"
11
+ from common import llm, precise, random, wikipedia #language models
12
+ from common import standard_definition_dict, question_or_task_dict, rubric_dict #dictionaries
13
+ from common import prompt_answer_good, prompt_answer_bad, prompt_qc_run, prompt_qc_grade #prompt templates
14
+ from common import context_chain, frq_chain, evaluation_chain, topic_rand, var #prompting chains
15
+ from common import qc_answer_good_chain, qc_answer_bad_chain, qc_run_chain, qc_grade_chain #prompting chains
16
+ from common import trim_text #custom function
17
+
18
+ #script:
19
+ st.set_page_config(page_title="QC Test run FQR Generator", page_icon="⚙️",
20
+ menu_items={"About":"Version 1.0 \n\n Not for commercial use.",
21
+ "Get help":"https://www.linkedin.com/in/alex-c-fischer"})
22
+ st.title("Automatized QC Testing Script for Common Core FRQ Generator")
23
+ st.write("The original Test will now be taken by GPT-4.")
24
+ mode = st.radio("Choose Mode", ["Single Launch (+live generating)", "Serial Launch (+CSV-Download)"])
25
+ launch_qc = st.button("Launch")
26
+ if mode=="Single Launch (+live generating)" and launch_qc:
27
+ topic_qc = topic_rand.run(var=var)
28
+ numb_qc = rand.randint(1, 10)
29
+ standard_qc = "CCSS.ELA-LITERACY.W.4."+str(numb_qc)
30
+ st.divider()
31
+ st.subheader("Random topic:")
32
+ st.write(topic_qc)
33
+ st.subheader("Random CC standard:")
34
+ st.write(standard_qc)
35
+ standard_definition_qc = standard_definition_dict[standard_qc]
36
+ question_or_task_qc = question_or_task_dict[standard_qc]
37
+ wikitext_qc = trim_text(wikipedia.run(topic_qc))
38
+ st.divider()
39
+ st.subheader("Context:")
40
+ context_qc = context_chain.run(chosen_topic=topic_qc, wikitext=wikitext_qc)
41
+ st.write(context_qc)
42
+ st.divider()
43
+ st.subheader("Free Response Question:")
44
+ frq_qc = frq_chain.run(context=context_qc, standard_definition=standard_definition_qc, question_or_task=question_or_task_qc)
45
+ st.write(frq_qc)
46
+ st.divider()
47
+ st.subheader("Good Answer, according to GPT-4:")
48
+ answer_good_qc = qc_answer_good_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
49
+ st.write(answer_good_qc)
50
+ st.divider()
51
+ st.subheader("Evaluation on 'Good Answer':")
52
+ evaluation_good_qc = evaluation_chain.run(
53
+ context=context_qc, rubric=rubric_dict[standard_qc],
54
+ frq=frq_qc, chosen_answer=answer_good_qc
55
+ )
56
+ st.write(evaluation_good_qc)
57
+ st.divider()
58
+ st.subheader("Bad Answer, according to GPT-4:")
59
+ answer_bad_qc = qc_answer_bad_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
60
+ st.write(answer_bad_qc)
61
+ st.divider()
62
+ st.subheader("Evaluation on 'Bad Answer':")
63
+ evaluation_bad_qc = evaluation_chain.run(
64
+ context=context_qc, rubric=rubric_dict[standard_qc],
65
+ frq=frq_qc, chosen_answer=answer_bad_qc
66
+ )
67
+ st.write(evaluation_bad_qc)
68
+ st.divider()
69
+ st.subheader("Quality Control Report:")
70
+ qc_report = qc_run_chain.run(
71
+ context=context_qc, frq=frq_qc, rubric=rubric_dict[standard_qc],
72
+ answer_good=answer_good_qc, evaluation_good=evaluation_good_qc,
73
+ answer_bad=answer_bad_qc, evaluation_bad=evaluation_bad_qc)
74
+ st.write(qc_report)
75
+ with st.form("Overall Accuracy"):
76
+ st.header("Overall grading of generated content:")
77
+ qc_grade = qc_grade_chain.run(qc_report=qc_report)
78
+ st.header(qc_grade)
79
+ st.write("Want to save this run?")
80
+ st.write("Menu in upper right corner > Print > PDF")
81
+ st.form_submit_button("Clear All & Rerun")
82
+ if mode=="Serial Launch (+CSV-Download)":
83
+ batch = st.number_input("Number of reruns", min_value=1, max_value=20, value=1, step=1)
84
+ comment = st.text_input("Comment - note your prompt fine tunings here, to track and analyse their effects")
85
+ if launch_qc:
86
+ df = pd.DataFrame(columns=["Round", "Comment", "Standard", "Topic", "Context", "FRQ", "Good Answer", "Good Evaluation", "Bad Answer", "Bad Evaluation", "Quality Control Report", "Overall Accurancy"])
87
+ progress = st.progress(0)
88
+ for i in range(batch):
89
+ progress.progress((i + 1) / batch)
90
+ topic_qc = topic_rand.run(var=var)
91
+ numb_qc = rand.randint(1, 10)
92
+ standard_qc = "CCSS.ELA-LITERACY.W."+str(numb_qc)
93
+ standard_definition_qc = standard_definition_dict[standard_qc]
94
+ question_or_task_qc = question_or_task_dict[standard_qc]
95
+ wikitext_qc = trim_text(wikipedia.run(topic_qc))
96
+ context_qc = context_chain.run(chosen_topic=topic_qc, wikitext=wikitext_qc)
97
+ frq_qc = frq_chain.run(context=context_qc, standard_definition=standard_definition_qc, question_or_task=question_or_task_qc)
98
+ answer_good_qc = qc_answer_good_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
99
+ evaluation_good_qc = evaluation_chain.run(context=context_qc, standard_definition=standard_definition_qc, frq=frq_qc, chosen_answer=answer_good_qc)
100
+ answer_bad_qc = qc_answer_bad_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
101
+ evaluation_bad_qc = evaluation_chain.run(context=context_qc, standard_definition=standard_definition_qc, frq=frq_qc, chosen_answer=answer_bad_qc)
102
+ qc_report = qc_run_chain.run(context=context_qc, frq=frq_qc, standard_definition=standard_definition_qc, answer_good=answer_good_qc, evaluation_good=evaluation_good_qc, answer_bad=answer_bad_qc, evaluation_bad=evaluation_bad_qc)
103
+ qc_grade = qc_grade_chain.run(qc_report=qc_report)
104
+ df.loc[len(df.index)] = {"Round":i+1, "Comment":comment, "Standard":standard_qc,
105
+ "Topic":topic_qc, "Context":context_qc, "FRQ":frq_qc,
106
+ "Good Answer":answer_good_qc, "Good Evaluation":evaluation_good_qc,
107
+ "Bad Answer":answer_bad_qc, "Bad Evaluation":evaluation_bad_qc,
108
+ "Quality Control Report":qc_report, "Overall Accurancy":qc_grade}
109
+ time.sleep(0.1)
110
+ progress.empty()
111
+ csv = df.to_csv(index=False)
112
+ b64 = base64.b64encode(csv.encode()).decode()
113
+ now = datetime.datetime.now()
114
+ timestamp_str = now.strftime("%Y-%m-%d_%H-%M-%S")
115
+ filename = f"{timestamp_str}_testruns_{batch}_rows.csv"
116
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download Results CSV</a>'
117
+ st.markdown(href, unsafe_allow_html=True)
118
+
requirements.txt ADDED
Binary file (11.7 kB). View file