File size: 7,017 Bytes
f8f01d6
e52177c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8f01d6
 
e52177c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8f01d6
e52177c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8f01d6
e52177c
 
 
 
 
 
 
 
f8f01d6
e52177c
 
f8f01d6
e52177c
 
 
f8f01d6
e52177c
 
f8f01d6
 
e52177c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

TITLE = "Bloom’s Taxonomy Helper (Classify + Generate + Rewrite)"

# ------------------ Bloom labels ------------------
LABELS = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

# ------------------ Generation templates by Bloom ------------------
PROMPT_TEMPLATES = {
    "Remember": (
        "Write {n} distinct factual recall questions for college students.\n"
        "Topic: {topic}\n"
        "Cognitive focus: Remember (recognize/recall facts and terms).\n"
        "Style: Use 'What', 'When', 'Define', or 'List'.\n"
        "Do not include answers. Number each on its own line."
    ),
    "Understand": (
        "Write {n} distinct comprehension questions for college students.\n"
        "Topic: {topic}\n"
        "Cognitive focus: Understand (explain, summarize, interpret).\n"
        "Style: Use 'Explain', 'Summarize', 'Describe', or 'Give an example of'.\n"
        "Do not include answers. Number each on its own line."
    ),
    "Apply": (
        "Write {n} distinct application questions for college students.\n"
        "Topic: {topic}\n"
        "Cognitive focus: Apply (use procedures, compute, demonstrate use).\n"
        "Style: Ask students to calculate, implement, or solve with concrete data.\n"
        "Avoid 'define' or 'explain' prompts. Do not include answers. Number each on its own line."
    ),
    "Analyze": (
        "Write {n} distinct analysis questions for college students.\n"
        "Topic: {topic}\n"
        "Cognitive focus: Analyze (compare/contrast, break down relationships, cause–effect).\n"
        "Style: Use 'Compare', 'Differentiate', 'Explain why', 'Break down', or 'Trace'.\n"
        "Do not include answers. Number each on its own line."
    ),
    "Evaluate": (
        "Write {n} distinct evaluation questions for college students.\n"
        "Topic: {topic}\n"
        "Cognitive focus: Evaluate (judge, critique, justify with criteria and evidence).\n"
        "Style: Use 'Argue', 'Defend', 'Critique', 'Which is better and why', or 'Assess'.\n"
        "Do not include answers. Number each on its own line."
    ),
    "Create": (
        "Write {n} distinct creation/synthesis tasks for college students.\n"
        "Topic: {topic}\n"
        "Cognitive focus: Create (design, plan, invent, propose, produce a novel artifact).\n"
        "Style: Use 'Design', 'Propose', 'Develop', 'Compose', or 'Build'. Include realistic constraints.\n"
        "Do not include answers. Number each on its own line."
    ),
}

# ------------------ Pipelines ------------------
# Zero-shot classifier (no training)
clf = pipeline("zero-shot-classification", model="typeform/distilbert-base-uncased-mnli")

# Higher-quality generator (CPU-friendly but better than *small*)
GEN_MODEL = "google/flan-t5-base"
gen_tok = AutoTokenizer.from_pretrained(GEN_MODEL)
gen_mdl = AutoModelForSeq2SeqLM.from_pretrained(GEN_MODEL)
gen = pipeline("text2text-generation", model=gen_mdl, tokenizer=gen_tok)

# ------------------ Helpers ------------------
def clean_numbering(text: str, n: int) -> str:
    """Force clean 1..n numbering, each on its own line."""
    lines = [ln.strip(" -*\t") for ln in text.splitlines() if ln.strip()]
    # If model returned a paragraph, split by sentence end
    if len(lines) == 1 and "1." not in lines[0]:
        import re
        parts = [p.strip() for p in re.split(r"(?<=[.?!])\s+", lines[0]) if p.strip()]
        lines = parts
    lines = [ln for ln in lines if any(c.isalpha() for c in ln)]
    lines = lines[:max(1, n)]
    return "\n".join(f"{i+1}. {ln.lstrip('0123456789. ').strip()}" for i, ln in enumerate(lines))

# ------------------ Functions ------------------
def classify_bloom(question: str):
    question = (question or "").strip()
    if not question:
        return "", ""
    res = clf(question, LABELS)
    labels = res["labels"]
    scores = [round(float(s), 3) for s in res["scores"]]
    top = labels[0] if labels else ""
    table = "\n".join(f"{l}: {s}" for l, s in zip(labels, scores))
    return top, table

def generate_questions(topic: str, level: str, n: int, creativity: float):
    topic = (topic or "").strip()
    if not topic:
        return "Please enter a topic."
    template = PROMPT_TEMPLATES.get(level, PROMPT_TEMPLATES["Understand"])
    prompt = template.format(n=int(n), topic=topic)

    out = gen(
        prompt,
        max_new_tokens=180,
        do_sample=(creativity > 0.01),
        temperature=max(0.01, min(1.2, creativity)),
        top_p=0.9,
        num_beams=1,
    )[0]["generated_text"]

    return clean_numbering(out, int(n))

def rewrite_level(question: str, target_level: str):
    question = (question or "").strip()
    if not question:
        return "Paste a question to rewrite."
    # Leverage the template for target level to steer rewriting
    template = PROMPT_TEMPLATES.get(target_level, PROMPT_TEMPLATES["Understand"])
    prompt = (
        f"{template}\n\n"
        f"Transform the following single question to match the level above. Keep it concise and do not include the answer.\n"
        f"Original: {question}\n"
        f"Return exactly 1 numbered question."
    )
    out = gen(prompt, max_new_tokens=100, do_sample=False)[0]["generated_text"]
    return clean_numbering(out, 1)

# ------------------ UI ------------------
with gr.Blocks(title=TITLE) as demo:
    gr.Markdown(f"# {TITLE}")
    gr.Markdown(
        "Classify questions by Bloom level, generate new questions aligned to a level, "
        "and rewrite a question to a different level. Runs fully on open models."
    )

    with gr.Tab("Classify"):
        q = gr.Textbox(
            label="Enter a question",
            lines=4,
            placeholder="e.g., Explain why randomized controlled trials reduce bias."
        )
        top = gr.Textbox(label="Predicted Bloom level", interactive=False)
        scores = gr.Textbox(label="All scores", interactive=False)
        gr.Button("Classify").click(classify_bloom, [q], [top, scores])

    with gr.Tab("Generate"):
        with gr.Row():
            topic = gr.Textbox(label="Topic", value="binary numbers in computer science")
            level = gr.Dropdown(LABELS, value="Apply", label="Bloom level")
        with gr.Row():
            n = gr.Slider(1, 10, value=5, step=1, label="How many questions")
            creativity = gr.Slider(0.0, 1.2, value=0.6, step=0.1, label="Creativity (temperature)")
        out = gr.Textbox(label="Generated questions", lines=12)
        gr.Button("Generate").click(generate_questions, [topic, level, n, creativity], out)

    with gr.Tab("Rewrite"):
        q2 = gr.Textbox(label="Original question", lines=4, value="Define binary number.")
        target = gr.Dropdown(LABELS, value="Analyze", label="Target Bloom level")
        out2 = gr.Textbox(label="Rewritten question", lines=4)
        gr.Button("Rewrite").click(rewrite_level, [q2, target], out2)

demo.launch()