Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import torch | |
| from datasets import DatasetDict, Dataset | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, logging | |
| logging.set_verbosity_error() | |
| model_name = 'THUDM/chatglm3-6b' | |
| ############################################# | |
| # bitsandbytes parameters | |
| ############################################# | |
| # Activate 4-bit precision for base model loading | |
| use_4bit = True | |
| # Compute dtype of 4-bit base models | |
| bnb_4bit_compute_dtype = 'float16' | |
| # Quantization type (fp4 or np4) | |
| bnb_4bit_quant_type = 'nf4' | |
| # Activate nested quantization for 4-bit base models | |
| use_nested_quant = False | |
| # device mapping | |
| device = torch.device("cpu") # Set device to CPU | |
| device_map = {"": -1} # Use -1 for CPU in bnb_config | |
| compute_dtype = getattr(torch, bnb_4bit_compute_dtype) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=use_4bit, | |
| bnb_4bit_quant_type=bnb_4bit_quant_type, | |
| bnb_4bit_compute_dtype=compute_dtype, | |
| bnb_4bit_use_double_quant=use_nested_quant, | |
| ) | |
| if compute_dtype == torch.float16 and use_4bit: | |
| major, _ = torch.cuda.get_device_capability() | |
| if major >= 8: | |
| print('='*80) | |
| print('Your GPU supports bfloat16, you can accelerate using the argument --fp16') | |
| print('='*80) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| quantization_config=bnb_config, | |
| device_map=device_map, | |
| ) | |
| model.config.use_cache = False | |
| model.config.pretraining_tp = 1 | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| tokenizer.padding_side = 'left' | |
| # Set the title of the Streamlit app | |
| st.title("Chatbot with LangChain and HuggingFace Model") | |
| # Display the conversation history | |
| conversation_text = st.empty() | |
| # Get the user input | |
| user_input = st.text_input("You: ") | |
| history = [] | |
| # If the user has submitted input | |
| if st.button("Send"): | |
| # Generate the chatbot's response | |
| response, history = model.chat(tokenizer, user_input, history=history) | |
| # Add the response to the conversation history | |
| conversation_history.append(f"Bot: {response}") | |
| # Update the conversation text | |
| conversation_text.markdown("**Conversation:**\n") | |
| for message in conversation_history: | |
| conversation_text.markdown(f"- {message}") |