openfree commited on
Commit
fc3d5aa
ยท
verified ยท
1 Parent(s): 3631b74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -13
app.py CHANGED
@@ -130,21 +130,49 @@ def do_web_search(query: str) -> str:
130
  logger.error(f"Web search failed: {e}")
131
  return f"Web search failed: {str(e)}"
132
 
 
133
  ##############################################################################
134
- # ๋ชจ๋ธ/ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ (ํ…์ŠคํŠธ ์ „์šฉ)
135
  ##############################################################################
136
- MAX_CONTENT_CHARS = 2000
137
- MAX_INPUT_LENGTH = 2096
138
- model_id = os.getenv("MODEL_ID", "openfree/Gemma-3-R1984-1B-0613")
139
-
140
- # ํ…์ŠคํŠธ ์ „์šฉ ๋ชจ๋ธ๋กœ ๋กœ๋“œ
141
- tokenizer = AutoTokenizer.from_pretrained(model_id)
142
- model = AutoModelForCausalLM.from_pretrained(
143
- model_id,
144
- device_map="auto",
145
- torch_dtype=torch.bfloat16,
146
- attn_implementation="eager"
147
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  ##############################################################################
150
  # CSV, TXT, PDF ๋ถ„์„ ํ•จ์ˆ˜
 
130
  logger.error(f"Web search failed: {e}")
131
  return f"Web search failed: {str(e)}"
132
 
133
+
134
  ##############################################################################
135
+ # ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ (Space ํ™˜๊ฒฝ์—์„œ ์ตœ์ ํ™”)
136
  ##############################################################################
137
+ @spaces.GPU
138
+ def load_model(model_name="VIDraft/Gemma-3-R1984-1B", adapter_name="openfree/Gemma-3-R1984-1B-0613"):
139
+ logger.info(f"๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘: {model_name} (์–ด๋Œ‘ํ„ฐ: {adapter_name})")
140
+ clear_cuda_cache() # ์บ์‹œ ์ •๋ฆฌ
141
+
142
+ bnb_config = BitsAndBytesConfig(
143
+ load_in_4bit=True,
144
+ bnb_4bit_use_double_quant=True,
145
+ bnb_4bit_quant_type="nf4",
146
+ bnb_4bit_compute_dtype=torch.bfloat16,
147
+ # nf4_nested_quant=False, # ํ•„์š”์‹œ ํ™œ์„ฑํ™”
148
+ )
149
+
150
+ # ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋“œ
151
+ model = AutoModelForCausalLM.from_pretrained(
152
+ model_name,
153
+ quantization_config=bnb_config,
154
+ device_map="auto",
155
+ trust_remote_code=False,
156
+ )
157
+
158
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ (๋ฒ ์ด์Šค ๋ชจ๋ธ๊ณผ ๋™์ผํ•œ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ)
159
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
160
+ tokenizer.pad_token = tokenizer.eos_token
161
+
162
+ # PEFT ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ ๋ฐ ๋ฒ ์ด์Šค ๋ชจ๋ธ์— ๋ณ‘ํ•ฉ
163
+ from peft import PeftModel
164
+ try:
165
+ model = PeftModel.from_pretrained(model, adapter_name)
166
+ logger.info(f"PEFT ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ๋ฐ ๋ณ‘ํ•ฉ ์™„๋ฃŒ: {adapter_name}")
167
+ except Exception as e:
168
+ logger.error(f"PEFT ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์˜ค๋ฅ˜: {e}")
169
+ logger.warning("์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค. ๋ฒ ์ด์Šค ๋ชจ๋ธ๋กœ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค.")
170
+ # ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์‹คํŒจ ์‹œ ๋ฒ ์ด์Šค ๋ชจ๋ธ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๊ฑฐ๋‚˜ ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ
171
+
172
+ model.eval() # ์ถ”๋ก  ๋ชจ๋“œ๋กœ ์„ค์ •
173
+
174
+ logger.info("๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ ์™„๋ฃŒ")
175
+ return model, tokenizer
176
 
177
  ##############################################################################
178
  # CSV, TXT, PDF ๋ถ„์„ ํ•จ์ˆ˜