kietnt0603 commited on
Commit
22eb5b6
1 Parent(s): 3c317fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -26
app.py CHANGED
@@ -3,10 +3,10 @@ import os
3
  import torch
4
  from datasets import DatasetDict, Dataset
5
  from transformers import (
6
- AutoModelForCausalLM,
7
- AutoTokenizer,
8
- BitsAndBytesConfig,
9
- logging
10
  )
11
 
12
  logging.set_verbosity_error()
@@ -30,28 +30,29 @@ bnb_4bit_quant_type = 'nf4'
30
  use_nested_quant = False
31
 
32
  # device mapping
33
- device_map = {"": 0}
 
34
 
35
  compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
36
  bnb_config = BitsAndBytesConfig(
37
- load_in_4bit=use_4bit,
38
- bnb_4bit_quant_type=bnb_4bit_quant_type,
39
- bnb_4bit_compute_dtype=compute_dtype,
40
- bnb_4bit_use_double_quant=use_nested_quant,
41
  )
42
 
43
  if compute_dtype == torch.float16 and use_4bit:
44
- major, _ = torch.cuda.get_device_capability()
45
- if major >= 8:
46
- print('='*80)
47
- print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
48
- print('='*80)
49
 
50
  model = AutoModelForCausalLM.from_pretrained(
51
- model_name,
52
- trust_remote_code=True,
53
- quantization_config=bnb_config,
54
- device_map=device_map,
55
  )
56
  model.config.use_cache = False
57
  model.config.pretraining_tp = 1
@@ -72,13 +73,13 @@ history = []
72
  # If the user has submitted input
73
  if st.button("Send"):
74
 
75
- # Generate the chatbot's response
76
- response, history = model.chat(tokenizer, user_input, history=history)
77
 
78
- # Add the response to the conversation history
79
- conversation_history.append(f"Bot: {response}")
80
 
81
- # Update the conversation text
82
- conversation_text.markdown("**Conversation:**\n")
83
- for message in conversation_history:
84
- conversation_text.markdown(f"- {message}")
 
3
  import torch
4
  from datasets import DatasetDict, Dataset
5
  from transformers import (
6
+   AutoModelForCausalLM,
7
+   AutoTokenizer,
8
+   BitsAndBytesConfig,
9
+   logging
10
  )
11
 
12
  logging.set_verbosity_error()
 
30
  use_nested_quant = False
31
 
32
  # device mapping
33
+ device = torch.device("cpu") # Set device to CPU
34
+ device_map = {"": -1} # Use -1 for CPU in bnb_config
35
 
36
  compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
37
  bnb_config = BitsAndBytesConfig(
38
+   load_in_4bit=use_4bit,
39
+   bnb_4bit_quant_type=bnb_4bit_quant_type,
40
+   bnb_4bit_compute_dtype=compute_dtype,
41
+   bnb_4bit_use_double_quant=use_nested_quant,
42
  )
43
 
44
  if compute_dtype == torch.float16 and use_4bit:
45
+   major, _ = torch.cuda.get_device_capability()
46
+   if major >= 8:
47
+     print('='*80)
48
+     print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
49
+     print('='*80)
50
 
51
  model = AutoModelForCausalLM.from_pretrained(
52
+   model_name,
53
+   trust_remote_code=True,
54
+   quantization_config=bnb_config,
55
+   device_map=device_map,
56
  )
57
  model.config.use_cache = False
58
  model.config.pretraining_tp = 1
 
73
  # If the user has submitted input
74
  if st.button("Send"):
75
 
76
+   # Generate the chatbot's response
77
+   response, history = model.chat(tokenizer, user_input, history=history)
78
 
79
+   # Add the response to the conversation history
80
+   conversation_history.append(f"Bot: {response}")
81
 
82
+   # Update the conversation text
83
+   conversation_text.markdown("**Conversation:**\n")
84
+   for message in conversation_history:
85
+     conversation_text.markdown(f"- {message}")