In [None]:
import gradio as gr
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

In [None]:
model_name = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 torch_dtype=torch.float16,
 device_map="auto",
 low_cpu_mem_usage=True
)

chat = pipeline(
 "text-generation",
 model=model,
 tokenizer=tokenizer,
 max_length=512,
 temperature=0.7,
 do_sample=True,
 device=0
)

In [None]:
system_prompt = """You are a helpful assistant guiding a user through the Boston Public Schools registration process.
You are given:
1. The user's most recent message
2. The current known registration info (`info`) — provided as a JSON object
"""

In [None]:
def extract_response_and_update(text):
 think = re.search(r"(.*?)", text, re.DOTALL)
 resp = re.search(r"(.*?)", text, re.DOTALL)
 upd = re.search(r"(.*?)", text, re.DOTALL)

 out_text = resp.group(1).strip() if resp else text
 try:
 update = json.loads(upd.group(1)) if upd else {}
 except json.JSONDecodeError:
 update = {}
 return out_text, update

info = {
 "location": None,
 "school": None,
 "child": {
 "name": None,
 "age": None,
 "grade": None,
 "special_needs": None,
 "transferring": None
 },
 "residency_docs": []
}

In [None]:
def chat_fn(user_message, chat_history):
 full = system_prompt + "\n<|user|>\n" + user_message + "\n<|assistant|>"
 raw = chat(full)[0]["generated_text"].strip()
 resp_text, update = extract_response_and_update(raw)

 def merge(existing, upd):
 for k, v in upd.items():
 if isinstance(v, dict) and k in existing:
 merge(existing[k], v)
 else:
 existing[k] = v
 merge(info, update)

 chat_history = chat_history or []
 chat_history.append((user_message, resp_text))
 return chat_history, chat_history

In [None]:
demo = gr.ChatInterface(
 fn=chat_fn,
 title="Boston School Choice",
 description="Ask me anything about Boston Public Schools registration",
)

demo.launch(inline=True)