import os, re import gradio as gr import torch import spaces from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel from huggingface_hub import login # ========================= # Variáveis do ambiente # ========================= BASE_ID = os.getenv("BASE_ID", "mistralai/Mistral-7B-v0.1") ADAPTER_ID = os.getenv("ADAPTER_ID", "roneymatusp/british-optimizer-mistral-final") HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN: try: login(HF_TOKEN) except Exception: # Se o token não for necessário (modelo não-gated), segue silencioso. pass # ========================= # Cache de modelo # ========================= _tok = None _model = None def load_model(): """Carrega Mistral-7B em 4-bit e aplica o LoRA; mantém em cache.""" global _tok, _model if _tok is not None and _model is not None: return _tok, _model bnb = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16, ) _tok = AutoTokenizer.from_pretrained(BASE_ID, use_fast=True) base = AutoModelForCausalLM.from_pretrained( BASE_ID, torch_dtype=torch.bfloat16, device_map="auto", quantization_config=bnb, ) _model = PeftModel.from_pretrained(base, ADAPTER_ID) _model.eval() return _tok, _model # ========================= # Política de bloqueio # (lista simples; ajuste conforme a escola) # ========================= BANNED = { # palavrões/insultos em PT (exemplos) "merda","porra","caralho","buceta","puta","puto", "viad","bixa","bicha","otario","otário","otaria","otária", "idiota","imbecil","burro","burra", # acrescente termos específicos da política da escola } def violates_policy(text: str) -> bool: if not text or len(text.strip()) < 6: return True t = text.lower() return any(b in t for b in BANNED) # ========================= # Instruções do Otimizador # ========================= SYSTEM = ( "You are a PROMPT OPTIMISER for teachers in the UK. " "You NEVER answer the user's task or give examples/solutions. " "You ONLY return ONE structured prompt that another assistant will answer later. " "Use UK spelling and an academic yet concise tone." ) OPT_TEMPLATE = """Rewrite the user's idea (Portuguese) into exactly ONE optimised prompt for a teaching assistant. Constraints: - Headings MUST be in Portuguese EXACTLY as below. - Content MUST be in UK English (en-GB). - Do NOT include explanations, solutions, examples, or chit-chat. - If the idea is vague (e.g., just 'equations'), keep it curriculum-appropriate and generic. - The assistant may ask up to 3 clarifying questions only if critical gaps remain. Return ONLY the block below: Persona: British educator and prompt engineer supporting teachers in UK schools. Contexto: Tarefa: Formato: Critérios: Idioma de saída: English (United Kingdom) User idea (pt-BR): {user_pt} """ def _generate(prompt: str, max_new_tokens=280, temperature=0.25) -> str: tok, model = load_model() inputs = tok(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): out = model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_p=0.95, pad_token_id=tok.eos_token_id, ) return tok.decode(out[0], skip_special_tokens=True) def keep_only_block(text: str) -> str: """ Mantém apenas o bloco a partir de 'Persona:' até antes de qualquer tokenização extra (User:, Assistant:, ###, ``` etc.). Garante que só sai o prompt, nada de respostas. """ m = re.search(r"Persona\s*:", text, flags=re.IGNORECASE) if not m: # fallback mínimo sempre no formato correto return ( "Persona: British educator and prompt engineer supporting teachers in UK schools.\n" "Contexto: UK classroom context (generic).\n" "Tarefa: Produce a concise lesson plan outline aligned to the user's intent.\n" "Formato: Numbered steps; brief timings; resources if any.\n" "Critérios: Clarity; UK spelling; curriculum alignment; inclusivity (SEN/EAL).\n" "Idioma de saída: English (United Kingdom)" ) clean = text[m.start():].strip() clean = re.split(r"\n\s*(Assistant:|User:|###|```)", clean)[0].strip() # Evita vazamentos ao final (repetições ou rodapés). return clean # ========================= # Função pública do Space # (decorada para ZeroGPU/GPU) # ========================= @spaces.GPU(duration=120) def optimise_free_text(user_input: str) -> str: if violates_policy(user_input): return "fora da política de otimização de prompts" instruction = f"{SYSTEM}\n\n" + OPT_TEMPLATE.format(user_pt=user_input.strip()) raw = _generate(instruction, max_new_tokens=320, temperature=0.22) return keep_only_block(raw) # ========================= # UI — simples, sem chat # ========================= THEME = gr.themes.Base( primary_hue="indigo", secondary_hue="red", ) with gr.Blocks(title="Paulean AI — Otimizador de Prompts (British)", theme=THEME) as demo: gr.Markdown( "## Paulean AI — Otimizador de Prompts (British)\n" "Digite sua ideia **em português** (ex.: *faça uma aula de matemática sobre equações para o IB*). " "O sistema **não responde aulas** nem dúvidas — ele **apenas** devolve um **prompt otimizado** " "no formato padronizado (**Persona, Contexto, Tarefa, Formato, Critérios, Idioma**).\n\n" "**Entradas inadequadas** retornam: `fora da política de otimização de prompts`." ) with gr.Row(): with gr.Column(scale=1): inp = gr.Textbox( label="Sua ideia (pt-BR)", placeholder="Ex.: Faça uma aula de matemática sobre equações do 2º grau (40-50 min), com exemplos e exercícios...", lines=8 ) with gr.Row(): btn = gr.Button("Gerar prompt", variant="primary") clr = gr.Button("Limpar") with gr.Column(scale=1): out = gr.Textbox( label="Prompt otimizado (copiar e usar)", lines=18, show_copy_button=True ) btn.click(optimise_free_text, inputs=inp, outputs=out) clr.click(lambda: ("", ""), inputs=None, outputs=[inp, out]) if __name__ == "__main__": demo.launch()