Upload folder using huggingface_hub
Browse files- Test_RAG.py +25 -11
Test_RAG.py
CHANGED
@@ -303,25 +303,39 @@ if llm_model_id == "red-pajama-3b-chat" and "GPU" in core.available_devices and
|
|
303 |
# pipeline_kwargs={"max_new_tokens": 2},
|
304 |
# )
|
305 |
from optimum.intel.openvino import OVModelForCausalLM
|
|
|
306 |
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
309 |
export=True, # 将模型转换为 OpenVINO 格式
|
310 |
use_cache=False,
|
311 |
ov_config=ov_config,
|
312 |
trust_remote_code=True # 支持远程代码的信任问题
|
313 |
)
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
|
316 |
-
#
|
317 |
-
#
|
318 |
-
# tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-3.2-3B-Instruct')
|
319 |
-
# if tokenizer.pad_token_id is None:
|
320 |
-
# tokenizer.pad_token_id = tokenizer.eos_token_id
|
321 |
-
# # # 同时确保 HuggingFacePipeline 使用的 tokenizer 也设置了 pad_token_id
|
322 |
-
# llm.pipeline.tokenizer.pad_token_id = tokenizer.pad_token_id
|
323 |
-
print("test:2+2:")
|
324 |
-
print(llm.invoke("2 + 2 ="))
|
325 |
import re
|
326 |
from typing import List
|
327 |
from langchain.text_splitter import (
|
|
|
303 |
# pipeline_kwargs={"max_new_tokens": 2},
|
304 |
# )
|
305 |
from optimum.intel.openvino import OVModelForCausalLM
|
306 |
+
from transformers import pipeline
|
307 |
|
308 |
+
|
309 |
+
model_id = "meta-llama/Meta-Llama-3-8B"
|
310 |
+
ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
|
311 |
+
|
312 |
+
# 使用 OpenVINO 导出模型
|
313 |
+
model = OVModelForCausalLM.from_pretrained(
|
314 |
+
model_id,
|
315 |
export=True, # 将模型转换为 OpenVINO 格式
|
316 |
use_cache=False,
|
317 |
ov_config=ov_config,
|
318 |
trust_remote_code=True # 支持远程代码的信任问题
|
319 |
)
|
320 |
|
321 |
+
# 保存 OpenVINO 模型
|
322 |
+
model.save_pretrained("./openvino_llama_model")
|
323 |
+
|
324 |
+
# Step 2: 加载保存的 OpenVINO 模型并设置推理任务
|
325 |
+
llm_device = "CPU" # 确保你根据环境设置正确的设备
|
326 |
+
llm = pipeline(
|
327 |
+
task="text-generation",
|
328 |
+
model=OVModelForCausalLM.from_pretrained("./openvino_llama_model"),
|
329 |
+
device=llm_device,
|
330 |
+
max_new_tokens=2 # 生成的最大新token数量
|
331 |
+
)
|
332 |
+
|
333 |
+
# Step 3: 执行推理
|
334 |
+
output = llm("2 + 2 =")
|
335 |
+
print(output)
|
336 |
|
337 |
+
# print("test:2+2:")
|
338 |
+
# print(llm.invoke("2 + 2 ="))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
import re
|
340 |
from typing import List
|
341 |
from langchain.text_splitter import (
|