BorderCollieWei commited on
Commit
170c6af
·
verified ·
1 Parent(s): da8f7f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -1,31 +1,32 @@
1
  # -*- coding: utf-8 -*-
2
- """Hugging Face Space App with INT8 Quantization"""
3
 
4
  import os
5
  import gradio as gr
6
  from huggingface_hub import login
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
 
9
  # 登錄 Hugging Face,使用訪問令牌進行身份驗證
10
- HF_TOKEN = os.getenv("HF_TOKEN") # 從環境變數中獲取訪問令牌
11
  if not HF_TOKEN:
12
  raise ValueError(
13
  "未找到 Hugging Face 訪問令牌!請設置環境變數 'HF_TOKEN',或者直接提供有效的訪問令牌。"
14
  )
15
 
16
- login(HF_TOKEN) # 使用訪問令牌進行身份驗證
17
 
18
- # 加載量化的 Llama-2-13b-chat-hf 模型
19
  MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
 
20
 
21
- # 啟用量化選項
22
  model = AutoModelForCausalLM.from_pretrained(
23
  MODEL_NAME,
24
- device_map="auto", # 自動分配設備(CPU/GPU)
25
- load_in_8bit=True, # 啟用 INT8 量化
26
- use_auth_token=HF_TOKEN # 使用 Hugging Face 訪問令牌
27
  )
28
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
29
 
30
  # 定義推理函數
31
  def generate_text(prompt):
@@ -44,8 +45,8 @@ interface = gr.Interface(
44
  fn=generate_text,
45
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
46
  outputs="text",
47
- title="Llama 2 Text Generator (INT8 Quantized)",
48
- description="Generate text using the INT8-quantized Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
49
  )
50
 
51
  # 啟動應用
 
1
  # -*- coding: utf-8 -*-
2
+ """Hugging Face Space App with CPU Quantization"""
3
 
4
  import os
5
  import gradio as gr
6
  from huggingface_hub import login
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
 
9
  # 登錄 Hugging Face,使用訪問令牌進行身份驗證
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
  if not HF_TOKEN:
12
  raise ValueError(
13
  "未找到 Hugging Face 訪問令牌!請設置環境變數 'HF_TOKEN',或者直接提供有效的訪問令牌。"
14
  )
15
 
16
+ login(HF_TOKEN)
17
 
18
+ # 配置 4-bit 量化
19
  MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
20
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
21
 
22
+ # 加載量化模型
23
  model = AutoModelForCausalLM.from_pretrained(
24
  MODEL_NAME,
25
+ quantization_config=quantization_config,
26
+ device_map="auto",
27
+ token=HF_TOKEN
28
  )
29
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
30
 
31
  # 定義推理函數
32
  def generate_text(prompt):
 
45
  fn=generate_text,
46
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
47
  outputs="text",
48
+ title="Llama 2 Text Generator (CPU Quantized)",
49
+ description="Generate text using the Llama-2-13b-chat-hf model with CPU quantization hosted on Hugging Face Spaces."
50
  )
51
 
52
  # 啟動應用