Upload folder using huggingface_hub
Browse files- Test_RAG.py +22 -11
Test_RAG.py
CHANGED
|
@@ -290,18 +290,29 @@ if "GPU" in llm_device and "qwen2-7b-instruct" in llm_model_id:
|
|
| 290 |
if llm_model_id == "red-pajama-3b-chat" and "GPU" in core.available_devices and llm_device in ["GPU", "AUTO"]:
|
| 291 |
ov_config["INFERENCE_PRECISION_HINT"] = "f32"
|
| 292 |
|
| 293 |
-
llm = HuggingFacePipeline.from_model_id(
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
)
|
|
|
|
|
|
|
| 305 |
# # 设置 pad_token_id 为 eos_token_id
|
| 306 |
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 307 |
# tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-3.2-3B-Instruct')
|
|
|
|
| 290 |
if llm_model_id == "red-pajama-3b-chat" and "GPU" in core.available_devices and llm_device in ["GPU", "AUTO"]:
|
| 291 |
ov_config["INFERENCE_PRECISION_HINT"] = "f32"
|
| 292 |
|
| 293 |
+
# llm = HuggingFacePipeline.from_model_id(
|
| 294 |
+
# model_id= "meta-llama/Meta-Llama-3-8B",
|
| 295 |
+
# #meta-llama/Meta-Llama-3-8B------------/meta-llama/Llama-3.2-3B-Instruct
|
| 296 |
+
# task="text-generation",
|
| 297 |
+
# backend="openvino",
|
| 298 |
+
# model_kwargs={
|
| 299 |
+
# "device": llm_device,
|
| 300 |
+
# "ov_config": ov_config,
|
| 301 |
+
# "trust_remote_code": True,
|
| 302 |
+
# },
|
| 303 |
+
# pipeline_kwargs={"max_new_tokens": 2},
|
| 304 |
+
# )
|
| 305 |
+
from optimum.intel.openvino import OVModelForCausalLM
|
| 306 |
+
|
| 307 |
+
llm = OVModelForCausalLM.from_pretrained(
|
| 308 |
+
model_id = "meta-llama/Meta-Llama-3-8B",
|
| 309 |
+
export=True, # 将模型转换为 OpenVINO 格式
|
| 310 |
+
use_cache=False,
|
| 311 |
+
ov_config=ov_config,
|
| 312 |
+
trust_remote_code=True # 支持远程代码的信任问题
|
| 313 |
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
# # 设置 pad_token_id 为 eos_token_id
|
| 317 |
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 318 |
# tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-3.2-3B-Instruct')
|