import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel, PeftConfig import torch import re class MoroccanStudentChatbot: def __init__(self, adapter_model_id="echarif/llama_alpaca_lora_adapter"): """Initialize the chatbot with the fine-tuned PEFT/LoRA model""" print("Loading PEFT adapter config...") self.config = PeftConfig.from_pretrained(adapter_model_id) print("Loading base model...") self.base_model = AutoModelForCausalLM.from_pretrained( self.config.base_model_name_or_path, return_dict=True, device_map="auto", torch_dtype=torch.float16 ) print("Loading LoRA adapter...") self.model = PeftModel.from_pretrained(self.base_model, adapter_model_id) print("Loading tokenizer...") self.tokenizer = AutoTokenizer.from_pretrained(self.config.base_model_name_or_path) # Add padding token if it doesn't exist if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token self.alpaca_prompt = """Ci-dessous se trouve une instruction décrivant une tâche, accompagnée éventuellement d'un contexte supplémentaire. Rédige une réponse qui complète correctement la demande. ### Instruction : {} ### input : {} ### output : {}""" print("Model loaded successfully!") def clean_output(self, raw_output): """Clean the model output to remove unwanted tokens and formatting""" # Decode the output if isinstance(raw_output, torch.Tensor): text = self.tokenizer.decode(raw_output[0], skip_special_tokens=True) else: text = raw_output # Remove the prompt part and keep only the actual response # Look for the "### output :" pattern and extract what comes after output_pattern = r"### output :\s*(.*?)(?:<\|end_of_text\|>|$)" match = re.search(output_pattern, text, re.DOTALL) if match: response = match.group(1).strip() else: # Fallback: try to extract text after "### output :" if "### output :" in text: response = text.split("### output :")[1].strip() else: response = text # Clean up any remaining special tokens response = re.sub(r'<\|.*?\|>', '', response) response = re.sub(r'<.*?>', '', response) # Remove extra whitespace and newlines response = re.sub(r'\n+', '\n', response) response = response.strip() return response if response else "Je suis désolé, je n'ai pas pu générer une réponse appropriée." def generate_response(self, user_input, history): """Generate response for the chatbot""" if not user_input.strip(): return history, "" # Format the prompt prompt = self.alpaca_prompt.format(user_input.strip(), "", "") # Tokenize inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) inputs = {k: v.to(self.model.device) for k, v in inputs.items()} # Generate response with torch.no_grad(): outputs = self.model.generate( **inputs, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=self.tokenizer.eos_token_id, repetition_penalty=1.1, ) # Clean the output response = self.clean_output(outputs) # Update history history.append([user_input, response]) return history, "" def create_interface(): """Create the Gradio interface""" # Initialize the chatbot chatbot = MoroccanStudentChatbot() # Custom CSS for professional styling and responsiveness custom_css = """ .header-container { display: flex; justify-content: space-between; align-items: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px; color: white; } .logo-container { display: flex; align-items: center; gap: 15px; } .logo-placeholder { width: 60px; height: 60px; background: rgba(255, 255, 255, 0.2); border-radius: 10px; display: flex; align-items: center; justify-content: center; font-size: 24px; border: 2px solid rgba(255, 255, 255, 0.3); } .title-section h1 { margin: 0; font-size: 24px; font-weight: bold; } .title-section p { margin: 5px 0 0 0; font-size: 14px; opacity: 0.9; } .university-info { text-align: right; font-size: 12px; opacity: 0.8; } /* Responsive design */ @media (max-width: 768px) { .header-container { flex-direction: column; text-align: center; gap: 15px; } .university-info { text-align: center; } .title-section h1 { font-size: 20px; } .logo-placeholder { width: 50px; height: 50px; font-size: 20px; } } .chatbot-container { max-width: 800px; margin: 0 auto; } .footer-info { text-align: center; margin-top: 20px; padding: 15px; background: #f8f9fa; border-radius: 8px; font-size: 12px; color: #666; } /* Custom chatbot styling */ .gradio-container { max-width: 1200px !important; } /* Improve mobile responsiveness */ @media (max-width: 480px) { .header-container { padding: 15px; } .title-section h1 { font-size: 18px; } .logo-placeholder { width: 40px; height: 40px; font-size: 16px; } } """ # Create the interface with gr.Blocks(css=custom_css, title="Assistant Étudiant Marocain", theme=gr.themes.Soft()) as interface: # Header with logos and title gr.HTML("""
Votre guide pour l'enseignement supérieur au Maroc