Alexander Casimir Fischer
commited on
new file: __pycache__/common.cpython-311.pyc
Browse filesnew file: __pycache__/main.cpython-311.pyc
new file:
new file:
new file:
new file: requirements.txt
- __pycache__/common.cpython-311.pyc +0 -0
- __pycache__/main.cpython-311.pyc +0 -0
- +301 -0
- +174 -0
- +118 -0
- requirements.txt +0 -0
Binary file (16 kB). View file
Binary file (17.3 kB). View file
@@ -0,0 +1,301 @@
1 |
#Central storage for variables, objects, templates used by both apps
2 |
import os
3 |
4 |
#Importing dependencies
5 |
from langchain.chat_models import ChatOpenAI
6 |
from langchain.prompts import PromptTemplate
7 |
from langchain.chains import LLMChain
8 |
from import WikipediaQueryRun
9 |
from langchain.utilities import WikipediaAPIWrapper
10 |
11 |
#Prompt variables dictionary: Definitions of 4th grade Common Core Standards according to source provided by Crossover
12 |
standard_definition_dict = {
13 |
"CCSS.ELA-LITERACY.W.4.1":"Write opinion pieces on topics or texts, supporting a point of view with reasons and information",
14 |
"CCSS.ELA-LITERACY.W.4.2":"Write informative/explanatory texts to examine a topic and convey ideas and information clearly",
15 |
"CCSS.ELA-LITERACY.W.4.3":"Write narratives to develop real or imagined experiences or events using effective technique, descriptive details, and clear event sequences",
16 |
"CCSS.ELA-LITERACY.W.4.4":"Produce clear and coherent writing in which the development and organization are appropriate to task, purpose, and audience",
17 |
"CCSS.ELA-LITERACY.W.4.5":"Develop and strengthen writing as needed by planning, revising, and editing - with guidance and support from peers and adults",
18 |
"CCSS.ELA-LITERACY.W.4.6":"Use technology, including the Internet, to produce and publish writing as well as to interact and collaborate with others; demonstrate sufficient command of keyboarding skills to type a minimum of one page in a single sitting - with some guidance and support from adults",
19 |
"CCSS.ELA-LITERACY.W.4.7":"Conduct short research projects that build knowledge through investigation of different aspects of a topic",
20 |
"CCSS.ELA-LITERACY.W.4.8":"Recall relevant information from experiences or gather relevant information from print and digital sources; take notes and categorize information, and provide a list of sources",
21 |
"CCSS.ELA-LITERACY.W.4.9":"Draw evidence from literary or informational texts to support analysis, reflection, and research",
22 |
"CCSS.ELA-LITERACY.W.4.10":"Write routinely over extended time frames (time for research, reflection, and revision) and shorter time frames (a single sitting or a day or two) for a range of discipline-specific tasks, purposes, and audiences"
23 |
24 |
25 |
#Prompt variables dictionary: Definitions on how the AI's FRQ output must be formulated depending on the CCS selected,
26 |
#since a FRQ is not always a question in the grammatical sense
27 |
question_or_task_dict = {
28 |
"CCSS.ELA-LITERACY.W.4.1": "writing task",
29 |
"CCSS.ELA-LITERACY.W.4.2": "writing task",
30 |
"CCSS.ELA-LITERACY.W.4.3": "creative writing task",
31 |
"CCSS.ELA-LITERACY.W.4.4": "writing task. Also define the purpose and the supposed audience of the student's writing",
32 |
"CCSS.ELA-LITERACY.W.4.5": "writing task",
33 |
"CCSS.ELA-LITERACY.W.4.6": "research and writing task. The student may use the internet for research and ask adults for some guidance",
34 |
"CCSS.ELA-LITERACY.W.4.7": "research and writing task with the goal of expanding the students knowledge",
35 |
"CCSS.ELA-LITERACY.W.4.8": "scientific writing task",
36 |
"CCSS.ELA-LITERACY.W.4.9": "free response question",
37 |
"CCSS.ELA-LITERACY.W.4.10": "homework writing task"
38 |
39 |
40 |
#Rubric dictionary: chooses the rubric to displayed in expander after the evaluation
41 |
rubric_dict = {
42 |
43 |
• Write opinion pieces on topics or texts,
44 |
supporting a point of view with reasons and information.
45 |
• Introduce a topic or text clearly, state an opinion, and
46 |
create an organizational structure in which related ideas
47 |
are grouped to support the writer's purpose.
48 |
• Provide reasons that are supported by facts and details.
49 |
• Link opinion and reasons using words and phrases (e.g.,
50 |
for instance, in order to, in addition).
51 |
• Provide a concluding statement or section related to the
52 |
opinion presented.
53 |
54 |
55 |
• Write informative/explanatory texts to examine a topic
56 |
and convey ideas and information clearly.
57 |
• Introduce a topic clearly and group related information
58 |
in paragraphs and sections; include formatting (e.g.,
59 |
headings), illustrations, and multimedia when useful to
60 |
aiding comprehension.
61 |
• Develop the topic with facts, definitions, concrete
62 |
details, quotations, or other information and examples
63 |
related to the topic.
64 |
• Link ideas within categories of information using words
65 |
and phrases (e.g., another, for example, also, because).
66 |
• Use precise language and domain-specific vocabulary to
67 |
inform about or explain the topic.
68 |
• Provide a concluding statement or section related to the
69 |
information or explanation presented.
70 |
71 |
72 |
• Write narratives to develop real or imagined experiences
73 |
or events using effective technique, descriptive details,
74 |
and clear event sequences.
75 |
• Orient the reader by establishing a situation and
76 |
introducing a narrator and/or characters; organize an
77 |
event sequence that unfolds naturally.
78 |
• Use dialogue and description to develop experiences and
79 |
events or show the responses of characters to situations.
80 |
• Use a variety of transitional words and phrases to manage
81 |
the sequence of events.
82 |
• Use concrete words and phrases and sensory details to
83 |
convey experiences and events precisely.
84 |
• Provide a conclusion that follows from the narrated
85 |
experiences or events.
86 |
87 |
88 |
Produce clear and coherent writing in which the development
89 |
and organization are appropriate to task, purpose, and
90 |
audience. (Grade-specific expectations for writing types
91 |
are defined in standards 1-3 above.)
92 |
93 |
94 |
With guidance and support from peers and adults, develop
95 |
and strengthen writing as needed by planning, revising,
96 |
and editing. (Editing for conventions should demonstrate
97 |
command of Language standards 1-3 up to and including
98 |
grade 4 here.)
99 |
100 |
101 |
With some guidance and support from adults, use technology,
102 |
including the Internet, to produce and publish writing as
103 |
well as to interact and collaborate with others;
104 |
demonstrate sufficient command of keyboarding skills to
105 |
type a minimum of one page in a single sitting.
106 |
107 |
108 |
Conduct short research projects that build knowledge
109 |
through investigation of different aspects of a topic.
110 |
111 |
112 |
Recall relevant information from experiences or gather
113 |
relevant information from print and digital sources; take
114 |
notes and categorize information, and provide a list of
115 |
116 |
117 |
118 |
• Draw evidence from literary or informational texts to
119 |
support analysis, reflection, and research.
120 |
• Apply grade 4 Reading standards to literature (e.g.,
121 |
"Describe in depth a character, setting, or event in a
122 |
story or drama, drawing on specific details in the text
123 |
[e.g., a character's thoughts, words, or actions].").
124 |
• Apply grade 4 Reading standards to informational texts
125 |
(e.g., "Explain how an author uses reasons and evidence
126 |
to support particular points in a text").
127 |
128 |
129 |
Write routinely over extended time frames (time for
130 |
research, reflection, and revision) and shorter time
131 |
frames (a single sitting or a day or two) for a range of
132 |
discipline-specific tasks, purposes, and audiences
133 |
134 |
135 |
136 |
#Prompt templates: where the actual prompt engineering is happening. See google doc for further reference
137 |
prompt_context = PromptTemplate(
138 |
input_variables=["chosen_topic", "wikitext"],
139 |
template="You are a writer at a school book publishing company. \
140 |
You will be given a certain topic, then your task is to write a school book article about it. \
141 |
Choose your words in accordance to the teachings of Wikipedia, precisely the explanations you find at the end of this prompt. \
142 |
Please do only include full sentences in your answer. \
143 |
The article should have a clearly defined thread that is easy to follow. \
144 |
Please do not give the article a title, only include the body in your answer. \
145 |
Please write in a style that is fun to read and understandable by 12 year old kids. \
146 |
The article should be around 10-15 sentences long. \n\
147 |
148 |
Topic: {chosen_topic} \n\
149 |
150 |
Wikipedia: \n\
151 |
152 |
153 |
prompt_frq = PromptTemplate(
154 |
input_variables=["context", "standard_definition", "question_or_task"],
155 |
template="You are a 4th grade school teacher. \
156 |
You will be given an input text, which is a short article on a certain topic. \
157 |
Thematizing the content of the input text and adressing a 4th grade student, you will then generate a {question_or_task}. \
158 |
Do not include a title such as 'Question:' or 'Writing task:' in your output. \
159 |
Your output must be formulated in such a way that it can be used to test a person's ability to {standard_definition}. \n\
160 |
161 |
Input text: \n\
162 |
163 |
164 |
165 |
prompt_evaluation = PromptTemplate(
166 |
input_variables=["context", "rubric",
167 |
"frq", "chosen_answer"],
168 |
template="You are a 4th grade school teacher. \
169 |
You will be given four inputs: An article on a certain topic. \
170 |
A task or question related to said article, meant to be solved by a 4th grade student. \
171 |
The answer to said task or question, written by one of your students. \
172 |
A standard for evaluating said answer.\
173 |
Your output should be an objective evaluation of the student's answer, taking all inputs into account. \
174 |
Here are your inputs: \n\
175 |
176 |
The article: \n\
177 |
178 |
{context} \n\
179 |
180 |
The task or question: \n\
181 |
182 |
{frq} \n\
183 |
184 |
The student's answer: \n\
185 |
186 |
{chosen_answer} \n\
187 |
188 |
The standard:\
189 |
190 |
The student should be able to {rubric}. \n\
191 |
192 |
Now please take some time to formulate your output, by thoroughly looking at each input. Also think about whether \
193 |
the student has showcased the required ability or not. In other words, whether he passed or failed the test, and why. \
194 |
Structure your output like this: \n\
195 |
196 |
197 |
Evaluation / Explaination"
198 |
199 |
prompt_topic_rand = PromptTemplate(input_variables=["var"], template="{var} give me a random \
200 |
writing topic in one or maximum two words. Please nothing about sex, drugs, alcohol or violence.")
201 |
prompt_answer_good = PromptTemplate(input_variables=["context", "frq", "standard"],
202 |
template="You are a Senior Test Manager at a successful software company and a very smart person. \
203 |
Your job is to test a new educational software. \
204 |
Please read this text presented to you below, then answer the task or question that follows. \
205 |
Please do not simply copy whole sentences from the text. Just behave like you were back in junior high, \
206 |
where you always had the best English grades of the entire school. \
207 |
Please formulate your answer in a way, that shows off your ability to {standard}. \
208 |
The purpose of the software is to measure this ability in a student. \n\n\
209 |
Here is the text: \n\
210 |
{context} \n\n\
211 |
Here is the task or question: \n\
212 |
{frq} \n\n\
213 |
Please take some time to think, then give it your best shot.")
214 |
prompt_answer_bad = PromptTemplate(input_variables=["context", "frq"],
215 |
template="You are a 10 year old student taking an English exam. You are not particularly intelligent. \
216 |
Unfortunately you also have troubling focusing on tasks and you happen to get the worst English grades of the entire class. \
217 |
Quit reading and homework does just not feel natural for you. You do not even mind, since you are good at a lot of other things. \
218 |
You are, for example, an incredibly creative person and like to tell stories. \
219 |
Please read this text presented to you below, then answer the task or question that follows. \
220 |
221 |
Here is the text: \n\
222 |
{context} \n\n\
223 |
Here is the task or question: \n\
224 |
{frq} \n\n\
225 |
Please remember: you will NOT perform well on this task. Create a poorly formulated answer, \
226 |
using a bit of bad grammar and also make some logical mistakes. \
227 |
Clearly indicating that you do not possess the skills being tested.\
228 |
You must try hard but still fail at this exam.")
229 |
prompt_qc_run = PromptTemplate(input_variables=["context", "frq", "rubric", \
230 |
"answer_good", "evaluation_good", "answer_bad", "evaluation_bad"],
231 |
template="You are a Senior Test Manager with 15 years of experience at a successful software company. \
232 |
Your daily business is to test educational KI software. You also have a degree in linguistics and love logic puzzles. \
233 |
Please have a look at 7 pieces of text, which will be given to you at the end of this prompt. \
234 |
Here are the 7 descriptions: \
235 |
1. an article on a certain topic, given by the software \n\
236 |
2. a free-response question on this article, given by the software \n\
237 |
3. a certain educational standard rubric, that is used to evaluate the answer on this free-response question \n\
238 |
4. the answer to the free-response question, given by a strong 4th grade student \n\
239 |
5. the evaluation of the strong answer, given by the software \n\
240 |
6. the answer to the free-response question, given by a weak 4th grade student \n\
241 |
7. the evaluation of the weak answer, given by the software \n\
242 |
Your task today is the following: please have a critical look at the output of the software. \
243 |
Take your time on each of the 7 texts, then give critical feedback on any shortcomings of the software's KI. \
244 |
Give recommendations on how to further improve the quality of texts number 1., 2., 5. and 7., \
245 |
by fine-tuning the KI instructions or prompts. \
246 |
Please be rather critical.\n\n\
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
prompt_qc_grade = PromptTemplate(input_variables=["qc_report"],
255 |
template="You will be given a precise report that was written to evaluate a new software's performance. \
256 |
Take a good look at the report and decide on an overall evaluation grade that aligns with the entire report's sentiment. \
257 |
The grade should be a percentage (integer between 1 and 100 with a percent symbol) and correctly represent and support the report. \
258 |
Only give an integer and '%' as an output, nothing else.\n\n\
259 |
Here is the report:\n\n\
260 |
261 |
262 |
#Defining LLMs, sources and Chains
263 |
llm = ChatOpenAI(model="gpt-4", temperature=0.5)
264 |
precise = ChatOpenAI(model="gpt-4", temperature=0.0)
265 |
random = ChatOpenAI(model="gpt-4", temperature=0.9)
266 |
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
267 |
context_chain = LLMChain(llm=llm, prompt=prompt_context)
268 |
frq_chain = LLMChain(llm=llm, prompt=prompt_frq, )
269 |
evaluation_chain = LLMChain(llm=llm, prompt=prompt_evaluation)
270 |
qc_answer_good_chain = LLMChain(llm=precise, prompt=prompt_answer_good)
271 |
qc_answer_bad_chain = LLMChain(llm=random, prompt=prompt_answer_bad)
272 |
topic_rand = LLMChain(llm=random, prompt=prompt_topic_rand)
273 |
274 |
qc_run_chain = LLMChain(llm=precise, prompt=prompt_qc_run)
275 |
qc_grade_chain = LLMChain(llm=precise, prompt=prompt_qc_grade)
276 |
277 |
#Tiny helper function that is necessary to keep the wikipedia page metadata and/or
278 |
#unrelated following pages from spilling into the LLM's output
279 |
def trim_text(input_string):
280 |
keyword1 = "Summary:"
281 |
keyword2 = "Page:"
282 |
index1 = input_string.find(keyword1)
283 |
if index1 == -1:
284 |
new_string = input_string
285 |
286 |
new_string = input_string[index1 + len(keyword1):].strip()
287 |
index2 = new_string.find(keyword2)
288 |
if index2 == -1:
289 |
return new_string
290 |
291 |
return new_string[:index2].strip()
292 |
293 |
#Function for detecting the use of copy/pasted parts of the context as answer
294 |
def plagiate(context, answer):
295 |
if answer in context:
296 |
return True
297 |
for i in range(len(answer) - 31):
298 |
substring = answer[i:i + 30]
299 |
if substring in context:
300 |
return True
301 |
return False
@@ -0,0 +1,174 @@
1 |
#To access my personal API key from operation system environment variables.
2 |
#Inside the HuggingFace Space, this will be substituted by HF's "Secret" variable option.
3 |
#Feel free to use the tool as is (via my personal API key) for the time of my evaluation.
4 |
import os
5 |
import time
6 |
7 |
#Loading Streamlit for GUI
8 |
import streamlit as st
9 |
10 |
#Session variables - saved locally to not run the whole script everytime a user input is entered
11 |
if True:
12 |
if "standard_set" not in st.session_state:
13 |
st.session_state.standard_set = False
14 |
if "topic_set" not in st.session_state:
15 |
st.session_state.topic_set = False
16 |
if "content_set" not in st.session_state:
17 |
st.session_state.content_set = False
18 |
if "answer_set" not in st.session_state:
19 |
st.session_state.answer_set = False
20 |
if "evaluated" not in st.session_state:
21 |
st.session_state.evaluated = False
22 |
if "chosen_standard" not in st.session_state:
23 |
st.session_state.chosen_standard = ""
24 |
if "standard_definition" not in st.session_state:
25 |
st.session_state.standard_definition = ""
26 |
if "question_or_task" not in st.session_state:
27 |
st.session_state.question_or_task = ""
28 |
if "chosen_topic" not in st.session_state:
29 |
st.session_state.chosen_topic = ""
30 |
if "wikitext" not in st.session_state:
31 |
st.session_state.wikitext = ""
32 |
if "context" not in st.session_state:
33 |
st.session_state.context = ""
34 |
if "frq" not in st.session_state:
35 |
st.session_state.frq = ""
36 |
if "chosen_answer" not in st.session_state:
37 |
st.session_state.chosen_answer = ""
38 |
39 |
40 |
41 |
#Importing variables, objects, templates from "common"
42 |
from common import llm, wikipedia #language models
43 |
from common import standard_definition_dict, question_or_task_dict, rubric_dict #dictionaries
44 |
from common import prompt_context, prompt_frq, prompt_evaluation #prompt templates
45 |
from common import context_chain, frq_chain, evaluation_chain #prompting chains
46 |
from common import trim_text, plagiate #custom functions
47 |
48 |
49 |
#Setting up streamlit UI, intro
50 |
st.set_page_config(page_title="FQR Generator", page_icon="🎓",
51 |
menu_items={"About":"Version 1.0 \n\n Not for commercial use.",
52 |
"Get help":""})
53 |
st.title("🎓Common Core FRQ Generator")
54 |
with st.sidebar:
55 |
56 |
st.link_button(label="Admin", url="")
57 |
st.link_button(label="Contact", url="")
58 |
st.write("This little tool automatically generates free-response questions (FQRs) \
59 |
to evaluate a 4th grade student's knowledge of a given Common Core Writing Standard \
60 |
by reading and writing on a topic of their own choice. \
61 |
After the FRQ is answered, an evaluation will be provided.")
62 |
st.write("(The language processing is done by an AI model, \
63 |
yet the facts are sourced from the topic's wikipedia page, to ensure hallucination-free and up-to-date content.)" )
64 |
65 |
66 |
#FRQ based on standard, student input and prompting engine
67 |
with st.form("standard_form"):
68 |
st.session_state.chosen_standard = st.selectbox(
69 |
"Choose 4th Grade Common Core Writing standard:",
70 |
71 |
72 |
73 |
74 |
st.session_state.standard_definition = standard_definition_dict[st.session_state.chosen_standard]
75 |
st.session_state.question_or_task = question_or_task_dict[st.session_state.chosen_standard]
76 |
subm_standard =st.form_submit_button("Set")
77 |
if subm_standard:
78 |
79 |
st.write("We will test your ability to:")
80 |
81 |
if st.session_state.standard_set:
82 |
with st.form("topic_form"):
83 |
st.session_state.chosen_topic = st.text_input("Type in a topic of your interest, then click 'Submit'.")
84 |
subm_topic = st.form_submit_button("Submit")
85 |
if st.session_state.standard_set and subm_topic:
86 |
87 |
with st.spinner('🤖Browsing wikipedia...'):
88 |
if st.session_state.wikitext=="":
89 |
wikitext = trim_text(
90 |
if wikitext=="No good Wikipedia Search Result was found":
91 |
st.write(f"🤖Sorry - I can't find anything in wikipedia on '{st.session_state.chosen_topic}'. \
92 |
I would love to make something up, but I can't do that in here. Please try something else.")
93 |
got_it = st.button("Got it")
94 |
95 |
96 |
97 |
st.session_state.wikitext = wikitext
98 |
99 |
st.success("Article found")
100 |
with st.spinner('🤖So interesting! Now please give me a few seconds to create the context and FRQ.'):
101 |
if st.session_state.context=="":
102 |
st.session_state.context =
103 |
104 |
105 |
106 |
if st.session_state.frq=="":
107 |
st.session_state.frq =
108 |
109 |
110 |
111 |
112 |
st.success("Content and FRQ created")
113 |
if st.session_state.topic_set:
114 |
with st.form("content_form"):
115 |
st.write("🤖Here we go - that was quick, wasn't it?")
116 |
st.subheader("Context required to answer the FRQ:")
117 |
118 |
st.subheader("Free Response Question:")
119 |
120 |
st.write("🤖Read all of the above? Great! Continue with the assignment at your own pace.")
121 |
next = st.form_submit_button("Continue")
122 |
if next:
123 |
124 |
if st.session_state.content_set:
125 |
with st.form("answer_form"):
126 |
st.session_state.chosen_answer = st.text_area("Type in your answer, then click 'Submit'. Please do not simply copy/paste from above.")
127 |
subm_answer = st.form_submit_button("Submit")
128 |
if st.session_state.content_set and subm_answer:
129 |
with st.spinner('🤖Logging...'):
130 |
131 |
if plagiate(context=st.session_state.context, answer=st.session_state.chosen_answer):
132 |
133 |
st.write("🤖Using Crtl+C/P defeats the purpose of this test, young friend.")
134 |
135 |
st.write("🤖Those are the rules. Please overwrite above answer in your own words - \
136 |
trust me, this is a great way to learn interesting new things.")
137 |
got_it = st.button("Got it")
138 |
139 |
140 |
st.subheader("Answer submitted")
141 |
142 |
if answer_logged:
143 |
144 |
if st.session_state.answer_set:
145 |
with st.form("evaluation_form"):
146 |
147 |
with st.spinner("🤖Let me see how you did today."):
148 |
evaluation =
149 |
150 |
151 |
152 |
153 |
154 |
def clear_form():
155 |
156 |
157 |
st.form_submit_button(label="Reformulate answer", on_click=clear_form)
158 |
159 |
with st.expander("Show Evaluation & Feedback Rubric"):
160 |
st.write("AI evaluated the student's ability to:")
161 |
162 |
rerun = st.button("Rerun")
163 |
if rerun:
164 |
for key in st.session_state.keys():
165 |
del st.session_state[key]
166 |
167 |
168 |
st.write("Admin area: clicking below will open a new app")
169 |
st.link_button(label="QC Test run - let GPT-4 take this test", url="")
170 |
171 |
172 |
173 |
174 |
@@ -0,0 +1,118 @@
1 |
import os
2 |
import random as rand
3 |
import pandas as pd
4 |
import time
5 |
import datetime
6 |
import base64
7 |
8 |
import streamlit as st
9 |
10 |
#Importing variables, objects, templates from "common"
11 |
from common import llm, precise, random, wikipedia #language models
12 |
from common import standard_definition_dict, question_or_task_dict, rubric_dict #dictionaries
13 |
from common import prompt_answer_good, prompt_answer_bad, prompt_qc_run, prompt_qc_grade #prompt templates
14 |
from common import context_chain, frq_chain, evaluation_chain, topic_rand, var #prompting chains
15 |
from common import qc_answer_good_chain, qc_answer_bad_chain, qc_run_chain, qc_grade_chain #prompting chains
16 |
from common import trim_text #custom function
17 |
18 |
19 |
st.set_page_config(page_title="QC Test run FQR Generator", page_icon="⚙️",
20 |
menu_items={"About":"Version 1.0 \n\n Not for commercial use.",
21 |
"Get help":""})
22 |
st.title("Automatized QC Testing Script for Common Core FRQ Generator")
23 |
st.write("The original Test will now be taken by GPT-4.")
24 |
mode ="Choose Mode", ["Single Launch (+live generating)", "Serial Launch (+CSV-Download)"])
25 |
launch_qc = st.button("Launch")
26 |
if mode=="Single Launch (+live generating)" and launch_qc:
27 |
topic_qc =
28 |
numb_qc = rand.randint(1, 10)
29 |
standard_qc = "CCSS.ELA-LITERACY.W.4."+str(numb_qc)
30 |
31 |
st.subheader("Random topic:")
32 |
33 |
st.subheader("Random CC standard:")
34 |
35 |
standard_definition_qc = standard_definition_dict[standard_qc]
36 |
question_or_task_qc = question_or_task_dict[standard_qc]
37 |
wikitext_qc = trim_text(
38 |
39 |
40 |
context_qc =, wikitext=wikitext_qc)
41 |
42 |
43 |
st.subheader("Free Response Question:")
44 |
frq_qc =, standard_definition=standard_definition_qc, question_or_task=question_or_task_qc)
45 |
46 |
47 |
st.subheader("Good Answer, according to GPT-4:")
48 |
answer_good_qc =, frq=frq_qc, standard=standard_definition_qc)
49 |
50 |
51 |
st.subheader("Evaluation on 'Good Answer':")
52 |
evaluation_good_qc =
53 |
context=context_qc, rubric=rubric_dict[standard_qc],
54 |
frq=frq_qc, chosen_answer=answer_good_qc
55 |
56 |
57 |
58 |
st.subheader("Bad Answer, according to GPT-4:")
59 |
answer_bad_qc =, frq=frq_qc, standard=standard_definition_qc)
60 |
61 |
62 |
st.subheader("Evaluation on 'Bad Answer':")
63 |
evaluation_bad_qc =
64 |
context=context_qc, rubric=rubric_dict[standard_qc],
65 |
frq=frq_qc, chosen_answer=answer_bad_qc
66 |
67 |
68 |
69 |
st.subheader("Quality Control Report:")
70 |
qc_report =
71 |
context=context_qc, frq=frq_qc, rubric=rubric_dict[standard_qc],
72 |
answer_good=answer_good_qc, evaluation_good=evaluation_good_qc,
73 |
answer_bad=answer_bad_qc, evaluation_bad=evaluation_bad_qc)
74 |
75 |
with st.form("Overall Accuracy"):
76 |
st.header("Overall grading of generated content:")
77 |
qc_grade =
78 |
79 |
st.write("Want to save this run?")
80 |
st.write("Menu in upper right corner > Print > PDF")
81 |
st.form_submit_button("Clear All & Rerun")
82 |
if mode=="Serial Launch (+CSV-Download)":
83 |
batch = st.number_input("Number of reruns", min_value=1, max_value=20, value=1, step=1)
84 |
comment = st.text_input("Comment - note your prompt fine tunings here, to track and analyse their effects")
85 |
if launch_qc:
86 |
df = pd.DataFrame(columns=["Round", "Comment", "Standard", "Topic", "Context", "FRQ", "Good Answer", "Good Evaluation", "Bad Answer", "Bad Evaluation", "Quality Control Report", "Overall Accurancy"])
87 |
progress = st.progress(0)
88 |
for i in range(batch):
89 |
progress.progress((i + 1) / batch)
90 |
topic_qc =
91 |
numb_qc = rand.randint(1, 10)
92 |
standard_qc = "CCSS.ELA-LITERACY.W."+str(numb_qc)
93 |
standard_definition_qc = standard_definition_dict[standard_qc]
94 |
question_or_task_qc = question_or_task_dict[standard_qc]
95 |
wikitext_qc = trim_text(
96 |
context_qc =, wikitext=wikitext_qc)
97 |
frq_qc =, standard_definition=standard_definition_qc, question_or_task=question_or_task_qc)
98 |
answer_good_qc =, frq=frq_qc, standard=standard_definition_qc)
99 |
evaluation_good_qc =, standard_definition=standard_definition_qc, frq=frq_qc, chosen_answer=answer_good_qc)
100 |
answer_bad_qc =, frq=frq_qc, standard=standard_definition_qc)
101 |
evaluation_bad_qc =, standard_definition=standard_definition_qc, frq=frq_qc, chosen_answer=answer_bad_qc)
102 |
qc_report =, frq=frq_qc, standard_definition=standard_definition_qc, answer_good=answer_good_qc, evaluation_good=evaluation_good_qc, answer_bad=answer_bad_qc, evaluation_bad=evaluation_bad_qc)
103 |
qc_grade =
104 |
df.loc[len(df.index)] = {"Round":i+1, "Comment":comment, "Standard":standard_qc,
105 |
"Topic":topic_qc, "Context":context_qc, "FRQ":frq_qc,
106 |
"Good Answer":answer_good_qc, "Good Evaluation":evaluation_good_qc,
107 |
"Bad Answer":answer_bad_qc, "Bad Evaluation":evaluation_bad_qc,
108 |
"Quality Control Report":qc_report, "Overall Accurancy":qc_grade}
109 |
110 |
111 |
csv = df.to_csv(index=False)
112 |
b64 = base64.b64encode(csv.encode()).decode()
113 |
now =
114 |
timestamp_str = now.strftime("%Y-%m-%d_%H-%M-%S")
115 |
filename = f"{timestamp_str}_testruns_{batch}_rows.csv"
116 |
href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download Results CSV</a>'
117 |
st.markdown(href, unsafe_allow_html=True)
118 |
Binary file (11.7 kB). View file