Spaces:

AlexCasF
/

ForRealQuiz

Sleeping

Alexander Casimir Fischer commited on Sep 27, 2023

Commit

3e72a37

1 Parent(s): 11d5492

new file: pycache/common.cpython-311.pyc

new file: __pycache__/main.cpython-311.pyc
new file: common.py
new file: main.py
new file: qc_run.py
new file: requirements.txt

Files changed (6) hide show

__pycache__/common.cpython-311.pyc +0 -0
__pycache__/main.cpython-311.pyc +0 -0
common.py +301 -0
main.py +174 -0
qc_run.py +118 -0
requirements.txt +0 -0

__pycache__/common.cpython-311.pyc ADDED Viewed

Binary file (16 kB). View file

__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (17.3 kB). View file

common.py ADDED Viewed

	@@ -0,0 +1,301 @@

+#Central storage for variables, objects, templates used by both apps
+import os
+#Importing dependencies
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.tools import WikipediaQueryRun
+from langchain.utilities import WikipediaAPIWrapper
+#Prompt variables dictionary: Definitions of 4th grade Common Core Standards according to source provided by Crossover
+standard_definition_dict = {
+    "CCSS.ELA-LITERACY.W.4.1":"Write opinion pieces on topics or texts, supporting a point of view with reasons and information",
+    "CCSS.ELA-LITERACY.W.4.2":"Write informative/explanatory texts to examine a topic and convey ideas and information clearly",
+    "CCSS.ELA-LITERACY.W.4.3":"Write narratives to develop real or imagined experiences or events using effective technique, descriptive details, and clear event sequences",
+    "CCSS.ELA-LITERACY.W.4.4":"Produce clear and coherent writing in which the development and organization are appropriate to task, purpose, and audience",
+    "CCSS.ELA-LITERACY.W.4.5":"Develop and strengthen writing as needed by planning, revising, and editing - with guidance and support from peers and adults",
+    "CCSS.ELA-LITERACY.W.4.6":"Use technology, including the Internet, to produce and publish writing as well as to interact and collaborate with others; demonstrate sufficient command of keyboarding skills to type a minimum of one page in a single sitting - with some guidance and support from adults",
+    "CCSS.ELA-LITERACY.W.4.7":"Conduct short research projects that build knowledge through investigation of different aspects of a topic",
+    "CCSS.ELA-LITERACY.W.4.8":"Recall relevant information from experiences or gather relevant information from print and digital sources; take notes and categorize information, and provide a list of sources",
+    "CCSS.ELA-LITERACY.W.4.9":"Draw evidence from literary or informational texts to support analysis, reflection, and research",
+    "CCSS.ELA-LITERACY.W.4.10":"Write routinely over extended time frames (time for research, reflection, and revision) and shorter time frames (a single sitting or a day or two) for a range of discipline-specific tasks, purposes, and audiences"
+    }
+#Prompt variables dictionary: Definitions on how the AI's FRQ output must be formulated depending on the CCS selected,
+#since a FRQ is not always a question in the grammatical sense
+question_or_task_dict = {
+    "CCSS.ELA-LITERACY.W.4.1": "writing task",
+    "CCSS.ELA-LITERACY.W.4.2": "writing task",
+    "CCSS.ELA-LITERACY.W.4.3": "creative writing task",
+    "CCSS.ELA-LITERACY.W.4.4": "writing task. Also define the purpose and the supposed audience of the student's writing",
+    "CCSS.ELA-LITERACY.W.4.5": "writing task",
+    "CCSS.ELA-LITERACY.W.4.6": "research and writing task. The student may use the internet for research and ask adults for some guidance",
+    "CCSS.ELA-LITERACY.W.4.7": "research and writing task with the goal of expanding the students knowledge",
+    "CCSS.ELA-LITERACY.W.4.8": "scientific writing task",
+    "CCSS.ELA-LITERACY.W.4.9": "free response question",
+    "CCSS.ELA-LITERACY.W.4.10": "homework writing task"
+}
+#Rubric dictionary: chooses the rubric to displayed in expander after the evaluation
+rubric_dict = {
+    "CCSS.ELA-LITERACY.W.4.1":"""
+•	Write opinion pieces on topics or texts,
+    supporting a point of view with reasons and information.
+•	Introduce a topic or text clearly, state an opinion, and
+    create an organizational structure in which related ideas
+    are grouped to support the writer's purpose.
+•	Provide reasons that are supported by facts and details.
+•	Link opinion and reasons using words and phrases (e.g.,
+    for instance, in order to, in addition).
+•	Provide a concluding statement or section related to the
+    opinion presented.
+""",
+    "CCSS.ELA-LITERACY.W.4.2":"""
+•	Write informative/explanatory texts to examine a topic
+    and convey ideas and information clearly.
+•	Introduce a topic clearly and group related information
+    in paragraphs and sections; include formatting (e.g.,
+    headings), illustrations, and multimedia when useful to
+    aiding comprehension.
+•	Develop the topic with facts, definitions, concrete
+    details, quotations, or other information and examples
+    related to the topic.
+•	Link ideas within categories of information using words
+    and phrases (e.g., another, for example, also, because).
+•	Use precise language and domain-specific vocabulary to
+    inform about or explain the topic.
+•	Provide a concluding statement or section related to the
+    information or explanation presented.
+""",
+    "CCSS.ELA-LITERACY.W.4.3":"""
+•	Write narratives to develop real or imagined experiences
+    or events using effective technique, descriptive details,
+    and clear event sequences.
+•	Orient the reader by establishing a situation and
+    introducing a narrator and/or characters; organize an
+    event sequence that unfolds naturally.
+•	Use dialogue and description to develop experiences and
+    events or show the responses of characters to situations.
+•	Use a variety of transitional words and phrases to manage
+    the sequence of events.
+•	Use concrete words and phrases and sensory details to
+    convey experiences and events precisely.
+•	Provide a conclusion that follows from the narrated
+    experiences or events.
+""",
+    "CCSS.ELA-LITERACY.W.4.4":"""
+    Produce clear and coherent writing in which the development
+    and organization are appropriate to task, purpose, and
+    audience. (Grade-specific expectations for writing types
+    are defined in standards 1-3 above.)
+    """,
+    "CCSS.ELA-LITERACY.W.4.5":"""
+    With guidance and support from peers and adults, develop
+    and strengthen writing as needed by planning, revising,
+    and editing. (Editing for conventions should demonstrate
+    command of Language standards 1-3 up to and including
+    grade 4 here.)
+    """,
+    "CCSS.ELA-LITERACY.W.4.6":"""
+    With some guidance and support from adults, use technology,
+    including the Internet, to produce and publish writing as
+    well as to interact and collaborate with others;
+    demonstrate sufficient command of keyboarding skills to
+    type a minimum of one page in a single sitting.
+    """,
+    "CCSS.ELA-LITERACY.W.4.7":"""
+    Conduct short research projects that build knowledge
+    through investigation of different aspects of a topic.
+    """,
+    "CCSS.ELA-LITERACY.W.4.8":"""
+    Recall relevant information from experiences or gather
+    relevant information from print and digital sources; take
+    notes and categorize information, and provide a list of
+    sources.
+    """,
+    "CCSS.ELA-LITERACY.W.4.9":"""
+•	Draw evidence from literary or informational texts to
+    support analysis, reflection, and research.
+•	Apply grade 4 Reading standards to literature (e.g.,
+    "Describe in depth a character, setting, or event in a
+    story or drama, drawing on specific details in the text
+    [e.g., a character's thoughts, words, or actions].").
+•	Apply grade 4 Reading standards to informational texts
+    (e.g., "Explain how an author uses reasons and evidence
+    to support particular points in a text").
+""",
+    "CCSS.ELA-LITERACY.W.4.10":"""
+    Write routinely over extended time frames (time for
+    research, reflection, and revision) and shorter time
+    frames (a single sitting or a day or two) for a range of
+    discipline-specific tasks, purposes, and audiences
+    """
+    }
+#Prompt templates: where the actual prompt engineering is happening. See google doc for further reference
+prompt_context = PromptTemplate(
+    input_variables=["chosen_topic", "wikitext"],
+    template="You are a writer at a school book publishing company. \
+        You will be given a certain topic, then your task is to write a school book article about it. \
+        Choose your words in accordance to the teachings of Wikipedia, precisely the explanations you find at the end of this prompt. \
+        Please do only include full sentences in your answer. \
+        The article should have a clearly defined thread that is easy to follow. \
+        Please do not give the article a title, only include the body in your answer. \
+        Please write in a style that is fun to read and understandable by 12 year old kids. \
+        The article should be around 10-15 sentences long. \n\
+        \n\
+        Topic: {chosen_topic} \n\
+        \n\
+        Wikipedia: \n\
+        {wikitext}"
+)
+prompt_frq = PromptTemplate(
+    input_variables=["context", "standard_definition", "question_or_task"],
+    template="You are a 4th grade school teacher. \
+        You will be given an input text, which is a short article on a certain topic. \
+        Thematizing the content of the input text and adressing a 4th grade student, you will then generate a {question_or_task}. \
+        Do not include a title such as 'Question:' or 'Writing task:' in your output. \
+        Your output must be formulated in such a way that it can be used to test a person's ability to {standard_definition}. \n\
+        \n\
+        Input text: \n\
+        \n\
+        {context}"
+)
+prompt_evaluation = PromptTemplate(
+    input_variables=["context", "rubric",
+        "frq", "chosen_answer"],
+    template="You are a 4th grade school teacher. \
+        You will be given four inputs: An article on a certain topic. \
+        A task or question related to said article, meant to be solved by a 4th grade student. \
+        The answer to said task or question, written by one of your students. \
+        A standard for evaluating said answer.\
+        Your output should be an objective evaluation of the student's answer, taking all inputs into account. \
+        Here are your inputs: \n\
+        \n\
+        The article: \n\
+        \n\
+        {context} \n\
+        \n\
+        The task or question: \n\
+        \n\
+        {frq} \n\
+        \n\
+        The student's answer: \n\
+        \n\
+        {chosen_answer} \n\
+        \n\
+        The standard:\
+        \n\
+        The student should be able to {rubric}. \n\
+        \n\
+        Now please take some time to formulate your output, by thoroughly looking at each input. Also think about whether \
+        the student has showcased the required ability or not. In other words, whether he passed or failed the test, and why. \
+        Structure your output like this: \n\
+        \n\
+        'PASSED' or 'FAILED'\n\
+        Evaluation / Explaination"
+)
+prompt_topic_rand = PromptTemplate(input_variables=["var"], template="{var} give me a random \
+    writing topic in one or maximum two words. Please nothing about sex, drugs, alcohol or violence.")
+prompt_answer_good = PromptTemplate(input_variables=["context", "frq", "standard"],
+    template="You are a Senior Test Manager at a successful software company and a very smart person. \
+        Your job is to test a new educational software. \
+        Please read this text presented to you below, then answer the task or question that follows. \
+        Please do not simply copy whole sentences from the text. Just behave like you were back in junior high, \
+        where you always had the best English grades of the entire school. \
+        Please formulate your answer in a way, that shows off your ability to {standard}. \
+        The purpose of the software is to measure this ability in a student. \n\n\
+        Here is the text: \n\
+        {context} \n\n\
+        Here is the task or question: \n\
+        {frq} \n\n\
+        Please take some time to think, then give it your best shot.")
+prompt_answer_bad = PromptTemplate(input_variables=["context", "frq"],
+    template="You are a 10 year old student taking an English exam. You are not particularly intelligent. \
+        Unfortunately you also have troubling focusing on tasks and you happen to get the worst English grades of the entire class. \
+        Quit reading and homework does just not feel natural for you. You do not even mind, since you are good at a lot of other things. \
+        You are, for example, an incredibly creative person and like to tell stories. \
+        Please read this text presented to you below, then answer the task or question that follows. \
+        \n\n\
+        Here is the text: \n\
+        {context} \n\n\
+        Here is the task or question: \n\
+        {frq} \n\n\
+        Please remember: you will NOT perform well on this task. Create a poorly formulated answer, \
+        using a bit of bad grammar and also make some logical mistakes. \
+        Clearly indicating that you do not possess the skills being tested.\
+        You must try hard but still fail at this exam.")
+prompt_qc_run = PromptTemplate(input_variables=["context", "frq", "rubric", \
+    "answer_good", "evaluation_good", "answer_bad", "evaluation_bad"],
+    template="You are a Senior Test Manager with 15 years of experience at a successful software company. \
+        Your daily business is to test educational KI software. You also have a degree in linguistics and love logic puzzles. \
+        Please have a look at 7 pieces of text, which will be given to you at the end of this prompt. \
+        Here are the 7 descriptions: \
+        1. an article on a certain topic, given by the software \n\
+        2. a free-response question on this article, given by the software \n\
+        3. a certain educational standard rubric, that is used to evaluate the answer on this free-response question \n\
+        4. the answer to the free-response question, given by a strong 4th grade student \n\
+        5. the evaluation of the strong answer, given by the software \n\
+        6. the answer to the free-response question, given by a weak 4th grade student \n\
+        7. the evaluation of the weak answer, given by the software \n\
+        Your task today is the following: please have a critical look at the output of the software. \
+        Take your time on each of the 7 texts, then give critical feedback on any shortcomings of the software's KI. \
+        Give recommendations on how to further improve the quality of texts number 1., 2., 5. and 7., \
+        by fine-tuning the KI instructions or prompts. \
+        Please be rather critical.\n\n\
+        {context}\n\n\
+        {frq}\n\n\
+        {rubric}\n\n\
+        {answer_good}\n\n\
+        {evaluation_good}\n\n\
+        {answer_bad}\n\n\
+        {evaluation_bad}")
+prompt_qc_grade = PromptTemplate(input_variables=["qc_report"],
+    template="You will be given a precise report that was written to evaluate a new software's performance. \
+        Take a good look at the report and decide on an overall evaluation grade that aligns with the entire report's sentiment. \
+        The grade should be a percentage (integer between 1 and 100 with a percent symbol) and correctly represent and support the report. \
+        Only give an integer and '%' as an output, nothing else.\n\n\
+        Here is the report:\n\n\
+        {qc_report}")
+#Defining LLMs, sources and Chains
+llm = ChatOpenAI(model="gpt-4", temperature=0.5)
+precise = ChatOpenAI(model="gpt-4", temperature=0.0)
+random = ChatOpenAI(model="gpt-4", temperature=0.9)
+wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
+context_chain = LLMChain(llm=llm, prompt=prompt_context)
+frq_chain = LLMChain(llm=llm, prompt=prompt_frq, )
+evaluation_chain = LLMChain(llm=llm, prompt=prompt_evaluation)
+qc_answer_good_chain = LLMChain(llm=precise, prompt=prompt_answer_good)
+qc_answer_bad_chain = LLMChain(llm=random, prompt=prompt_answer_bad)
+topic_rand = LLMChain(llm=random, prompt=prompt_topic_rand)
+var="Please"
+qc_run_chain = LLMChain(llm=precise, prompt=prompt_qc_run)
+qc_grade_chain = LLMChain(llm=precise, prompt=prompt_qc_grade)
+#Tiny helper function that is necessary to keep the wikipedia page metadata and/or
+#unrelated following pages from spilling into the LLM's output
+def trim_text(input_string):
+    keyword1 = "Summary:"
+    keyword2 = "Page:"
+    index1 = input_string.find(keyword1)
+    if index1 == -1:
+        new_string = input_string
+    else:
+        new_string = input_string[index1 + len(keyword1):].strip()
+    index2 = new_string.find(keyword2)
+    if index2 == -1:
+        return new_string
+    else:
+        return new_string[:index2].strip()
+#Function for detecting the use of copy/pasted parts of the context as answer
+def plagiate(context, answer):
+    if answer in context:
+        return True
+    for i in range(len(answer) - 31):
+        substring = answer[i:i + 30]
+        if substring in context:
+            return True
+    return False

main.py ADDED Viewed

	@@ -0,0 +1,174 @@

+#To access my personal API key from operation system environment variables.
+#Inside the HuggingFace Space, this will be substituted by HF's "Secret" variable option.
+#Feel free to use the tool as is (via my personal API key) for the time of my evaluation.
+import os
+import time
+#Loading Streamlit for GUI
+import streamlit as st
+#Session variables - saved locally to not run the whole script everytime a user input is entered
+if True:
+    if "standard_set" not in st.session_state:
+        st.session_state.standard_set = False
+    if "topic_set" not in st.session_state:
+        st.session_state.topic_set = False
+    if "content_set" not in st.session_state:
+        st.session_state.content_set = False
+    if "answer_set" not in st.session_state:
+        st.session_state.answer_set = False
+    if "evaluated" not in st.session_state:
+        st.session_state.evaluated = False
+    if "chosen_standard" not in st.session_state:
+        st.session_state.chosen_standard = ""
+    if "standard_definition" not in st.session_state:
+        st.session_state.standard_definition = ""
+    if "question_or_task" not in st.session_state:
+        st.session_state.question_or_task = ""
+    if "chosen_topic" not in st.session_state:
+        st.session_state.chosen_topic = ""
+    if "wikitext" not in st.session_state:
+        st.session_state.wikitext = ""
+    if "context" not in st.session_state:
+        st.session_state.context = ""
+    if "frq" not in st.session_state:
+        st.session_state.frq = ""
+    if "chosen_answer" not in st.session_state:
+        st.session_state.chosen_answer = ""
+answer_logged=False
+#Importing variables, objects, templates from "common"
+from common import llm, wikipedia #language models
+from common import standard_definition_dict, question_or_task_dict, rubric_dict #dictionaries
+from common import prompt_context, prompt_frq, prompt_evaluation #prompt templates
+from common import context_chain, frq_chain, evaluation_chain #prompting chains
+from common import trim_text, plagiate #custom functions
+#Setting up streamlit UI, intro
+st.set_page_config(page_title="FQR Generator", page_icon="🎓",
+                menu_items={"About":"Version 1.0 \n\n Not for commercial use.",
+                "Get help":"https://www.linkedin.com/in/alex-c-fischer"})
+st.title("🎓Common Core FRQ Generator")
+with st.sidebar:
+    st.title("Menu")
+    st.link_button(label="Admin", url="https://www.google.com")
+    st.link_button(label="Contact", url="https://www.linkedin.com/in/alex-c-fischer/")
+st.write("This little tool automatically generates free-response questions (FQRs) \
+        to evaluate a 4th grade student's knowledge of a given Common Core Writing Standard \
+        by reading and writing on a topic of their own choice. \
+        After the FRQ is answered, an evaluation will be provided.")
+st.write("(The language processing is done by an AI model, \
+        yet the facts are sourced from the topic's wikipedia page, to ensure hallucination-free and up-to-date content.)" )
+#FRQ based on standard, student input and prompting engine
+with st.form("standard_form"):
+    st.session_state.chosen_standard = st.selectbox(
+        "Choose 4th Grade Common Core Writing standard:",
+        ("CCSS.ELA-LITERACY.W.4.1","CCSS.ELA-LITERACY.W.4.2","CCSS.ELA-LITERACY.W.4.3","CCSS.ELA-LITERACY.W.4.4",
+         "CCSS.ELA-LITERACY.W.4.5","CCSS.ELA-LITERACY.W.4.6","CCSS.ELA-LITERACY.W.4.7","CCSS.ELA-LITERACY.W.4.8",
+         "CCSS.ELA-LITERACY.W.4.9","CCSS.ELA-LITERACY.W.4.10")
+        )
+    st.session_state.standard_definition = standard_definition_dict[st.session_state.chosen_standard]
+    st.session_state.question_or_task = question_or_task_dict[st.session_state.chosen_standard]
+    subm_standard =st.form_submit_button("Set")
+if subm_standard:
+    st.session_state.standard_set=True
+    st.write("We will test your ability to:")
+    st.write(f"📜{st.session_state.standard_definition}.")
+if st.session_state.standard_set:
+    with st.form("topic_form"):
+        st.session_state.chosen_topic = st.text_input("Type in a topic of your interest, then click 'Submit'.")
+        subm_topic = st.form_submit_button("Submit")
+if st.session_state.standard_set and subm_topic:
+    st.empty()
+    with st.spinner('🤖Browsing wikipedia...'):
+        if st.session_state.wikitext=="":
+            wikitext = trim_text(wikipedia.run(st.session_state.chosen_topic))
+    if wikitext=="No good Wikipedia Search Result was found":
+        st.write(f"🤖Sorry - I can't find anything in wikipedia on '{st.session_state.chosen_topic}'. \
+                    I would love to make something up, but I can't do that in here. Please try something else.")
+        got_it = st.button("Got it")
+        st.session_state.topic_set=False
+        st.stop()
+    else:
+        st.session_state.wikitext = wikitext
+        st.session_state.topic_set=True
+    st.success("Article found")
+    with st.spinner('🤖So interesting! Now please give me a few seconds to create the context and FRQ.'):
+        if st.session_state.context=="":
+            st.session_state.context = context_chain.run(
+                chosen_topic=st.session_state.chosen_topic,
+                wikitext=st.session_state.wikitext
+                )
+        if st.session_state.frq=="":
+            st.session_state.frq = frq_chain.run(
+                context=st.session_state.context,
+                standard_definition=st.session_state.standard_definition,
+                question_or_task=st.session_state.question_or_task
+                )
+    st.success("Content and FRQ created")
+if st.session_state.topic_set:
+    with st.form("content_form"):
+        st.write("🤖Here we go - that was quick, wasn't it?")
+        st.subheader("Context required to answer the FRQ:")
+        st.write(st.session_state.context)
+        st.subheader("Free Response Question:")
+        st.write(st.session_state.frq)
+        st.write("🤖Read all of the above? Great! Continue with the assignment at your own pace.")
+        next = st.form_submit_button("Continue")
+        if next:
+            st.session_state.content_set=True
+if st.session_state.content_set:
+    with st.form("answer_form"):
+        st.session_state.chosen_answer = st.text_area("Type in your answer, then click 'Submit'. Please do not simply copy/paste from above.")
+        subm_answer = st.form_submit_button("Submit")
+if st.session_state.content_set and subm_answer:
+    with st.spinner('🤖Logging...'):
+        pass
+    if plagiate(context=st.session_state.context, answer=st.session_state.chosen_answer):
+        st.session_state.content_set=False
+        st.write("🤖Using Crtl+C/P defeats the purpose of this test, young friend.")
+        time.sleep(0.1)
+        st.write("🤖Those are the rules. Please overwrite above answer in your own words - \
+                trust me, this is a great way to learn interesting new things.")
+        got_it = st.button("Got it")
+        st.stop()
+    else:
+        st.subheader("Answer submitted")
+        answer_logged=True
+if  answer_logged:
+    st.session_state.answer_set=True
+if st.session_state.answer_set:
+    with st.form("evaluation_form"):
+        st.subheader("Evaluation")
+        with st.spinner("🤖Let me see how you did today."):
+            evaluation = evaluation_chain.run(
+                context=st.session_state.context,
+                rubric=rubric_dict[st.session_state.chosen_standard],
+                frq=st.session_state.frq,
+                chosen_answer=st.session_state.chosen_answer)
+        st.write(evaluation)
+        def clear_form():
+            st.session_state.answer_set=False
+            next
+        st.form_submit_button(label="Reformulate answer", on_click=clear_form)
+        st.empty()
+    with st.expander("Show Evaluation & Feedback Rubric"):
+        st.write("AI evaluated the student's ability to:")
+        st.text(rubric_dict[st.session_state.chosen_standard])
+    rerun = st.button("Rerun")
+    if rerun:
+        for key in st.session_state.keys():
+            del st.session_state[key]
+        st.rerun()
+    st.divider()
+    st.write("Admin area: clicking below will open a new app")
+    st.link_button(label="QC Test run - let GPT-4 take this test", url="https://www.google.com")

qc_run.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+import random as rand
+import pandas as pd
+import time
+import datetime
+import base64
+import streamlit as st
+#Importing variables, objects, templates from "common"
+from common import llm, precise, random, wikipedia #language models
+from common import standard_definition_dict, question_or_task_dict, rubric_dict #dictionaries
+from common import prompt_answer_good, prompt_answer_bad, prompt_qc_run, prompt_qc_grade #prompt templates
+from common import context_chain, frq_chain, evaluation_chain, topic_rand, var #prompting chains
+from common import qc_answer_good_chain, qc_answer_bad_chain, qc_run_chain, qc_grade_chain #prompting chains
+from common import trim_text #custom function
+#script:
+st.set_page_config(page_title="QC Test run FQR Generator", page_icon="⚙️",
+                menu_items={"About":"Version 1.0 \n\n Not for commercial use.",
+                "Get help":"https://www.linkedin.com/in/alex-c-fischer"})
+st.title("Automatized QC Testing Script for Common Core FRQ Generator")
+st.write("The original Test will now be taken by GPT-4.")
+mode = st.radio("Choose Mode", ["Single Launch (+live generating)", "Serial Launch (+CSV-Download)"])
+launch_qc = st.button("Launch")
+if mode=="Single Launch (+live generating)" and launch_qc:
+    topic_qc = topic_rand.run(var=var)
+    numb_qc = rand.randint(1, 10)
+    standard_qc = "CCSS.ELA-LITERACY.W.4."+str(numb_qc)
+    st.divider()
+    st.subheader("Random topic:")
+    st.write(topic_qc)
+    st.subheader("Random CC standard:")
+    st.write(standard_qc)
+    standard_definition_qc = standard_definition_dict[standard_qc]
+    question_or_task_qc = question_or_task_dict[standard_qc]
+    wikitext_qc = trim_text(wikipedia.run(topic_qc))
+    st.divider()
+    st.subheader("Context:")
+    context_qc = context_chain.run(chosen_topic=topic_qc, wikitext=wikitext_qc)
+    st.write(context_qc)
+    st.divider()
+    st.subheader("Free Response Question:")
+    frq_qc = frq_chain.run(context=context_qc, standard_definition=standard_definition_qc, question_or_task=question_or_task_qc)
+    st.write(frq_qc)
+    st.divider()
+    st.subheader("Good Answer, according to GPT-4:")
+    answer_good_qc = qc_answer_good_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
+    st.write(answer_good_qc)
+    st.divider()
+    st.subheader("Evaluation on 'Good Answer':")
+    evaluation_good_qc = evaluation_chain.run(
+        context=context_qc, rubric=rubric_dict[standard_qc],
+        frq=frq_qc, chosen_answer=answer_good_qc
+        )
+    st.write(evaluation_good_qc)
+    st.divider()
+    st.subheader("Bad Answer, according to GPT-4:")
+    answer_bad_qc = qc_answer_bad_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
+    st.write(answer_bad_qc)
+    st.divider()
+    st.subheader("Evaluation on 'Bad Answer':")
+    evaluation_bad_qc = evaluation_chain.run(
+        context=context_qc, rubric=rubric_dict[standard_qc],
+        frq=frq_qc, chosen_answer=answer_bad_qc
+        )
+    st.write(evaluation_bad_qc)
+    st.divider()
+    st.subheader("Quality Control Report:")
+    qc_report = qc_run_chain.run(
+        context=context_qc, frq=frq_qc, rubric=rubric_dict[standard_qc],
+        answer_good=answer_good_qc, evaluation_good=evaluation_good_qc,
+        answer_bad=answer_bad_qc, evaluation_bad=evaluation_bad_qc)
+    st.write(qc_report)
+    with st.form("Overall Accuracy"):
+        st.header("Overall grading of generated content:")
+        qc_grade = qc_grade_chain.run(qc_report=qc_report)
+        st.header(qc_grade)
+        st.write("Want to save this run?")
+        st.write("Menu in upper right corner > Print > PDF")
+        st.form_submit_button("Clear All & Rerun")
+if mode=="Serial Launch (+CSV-Download)":
+    batch = st.number_input("Number of reruns", min_value=1, max_value=20, value=1, step=1)
+    comment = st.text_input("Comment - note your prompt fine tunings here, to track and analyse their effects")
+    if launch_qc:
+        df = pd.DataFrame(columns=["Round", "Comment", "Standard", "Topic", "Context", "FRQ", "Good Answer", "Good Evaluation", "Bad Answer", "Bad Evaluation", "Quality Control Report", "Overall Accurancy"])
+        progress = st.progress(0)
+        for i in range(batch):
+            progress.progress((i + 1) / batch)
+            topic_qc = topic_rand.run(var=var)
+            numb_qc = rand.randint(1, 10)
+            standard_qc = "CCSS.ELA-LITERACY.W."+str(numb_qc)
+            standard_definition_qc = standard_definition_dict[standard_qc]
+            question_or_task_qc = question_or_task_dict[standard_qc]
+            wikitext_qc = trim_text(wikipedia.run(topic_qc))
+            context_qc = context_chain.run(chosen_topic=topic_qc, wikitext=wikitext_qc)
+            frq_qc = frq_chain.run(context=context_qc, standard_definition=standard_definition_qc, question_or_task=question_or_task_qc)
+            answer_good_qc = qc_answer_good_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
+            evaluation_good_qc = evaluation_chain.run(context=context_qc, standard_definition=standard_definition_qc, frq=frq_qc, chosen_answer=answer_good_qc)
+            answer_bad_qc = qc_answer_bad_chain.run(context=context_qc, frq=frq_qc, standard=standard_definition_qc)
+            evaluation_bad_qc = evaluation_chain.run(context=context_qc, standard_definition=standard_definition_qc, frq=frq_qc, chosen_answer=answer_bad_qc)
+            qc_report = qc_run_chain.run(context=context_qc, frq=frq_qc, standard_definition=standard_definition_qc, answer_good=answer_good_qc, evaluation_good=evaluation_good_qc, answer_bad=answer_bad_qc, evaluation_bad=evaluation_bad_qc)
+            qc_grade = qc_grade_chain.run(qc_report=qc_report)
+            df.loc[len(df.index)] = {"Round":i+1, "Comment":comment, "Standard":standard_qc,
+                    "Topic":topic_qc, "Context":context_qc, "FRQ":frq_qc,
+                    "Good Answer":answer_good_qc, "Good Evaluation":evaluation_good_qc,
+                    "Bad Answer":answer_bad_qc, "Bad Evaluation":evaluation_bad_qc,
+                    "Quality Control Report":qc_report, "Overall Accurancy":qc_grade}
+            time.sleep(0.1)
+        progress.empty()
+        csv = df.to_csv(index=False)
+        b64 = base64.b64encode(csv.encode()).decode()
+        now = datetime.datetime.now()
+        timestamp_str = now.strftime("%Y-%m-%d_%H-%M-%S")
+        filename = f"{timestamp_str}_testruns_{batch}_rows.csv"
+        href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download Results CSV</a>'
+        st.markdown(href, unsafe_allow_html=True)

requirements.txt ADDED Viewed

Binary file (11.7 kB). View file

new file: __pycache__/common.cpython-311.pyc

new file: pycache/common.cpython-311.pyc