JiakaiDu commited on
Commit
df0b1a0
1 Parent(s): f39ac79

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. Test_RAG.py +25 -11
Test_RAG.py CHANGED
@@ -303,25 +303,39 @@ if llm_model_id == "red-pajama-3b-chat" and "GPU" in core.available_devices and
303
  # pipeline_kwargs={"max_new_tokens": 2},
304
  # )
305
  from optimum.intel.openvino import OVModelForCausalLM
 
306
 
307
- llm = OVModelForCausalLM.from_pretrained(
308
- model_id = "meta-llama/Meta-Llama-3-8B",
 
 
 
 
 
309
  export=True, # 将模型转换为 OpenVINO 格式
310
  use_cache=False,
311
  ov_config=ov_config,
312
  trust_remote_code=True # 支持远程代码的信任问题
313
  )
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
- # # 设置 pad_token_id 为 eos_token_id
317
- # from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
318
- # tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-3.2-3B-Instruct')
319
- # if tokenizer.pad_token_id is None:
320
- # tokenizer.pad_token_id = tokenizer.eos_token_id
321
- # # # 同时确保 HuggingFacePipeline 使用的 tokenizer 也设置了 pad_token_id
322
- # llm.pipeline.tokenizer.pad_token_id = tokenizer.pad_token_id
323
- print("test:2+2:")
324
- print(llm.invoke("2 + 2 ="))
325
  import re
326
  from typing import List
327
  from langchain.text_splitter import (
 
303
  # pipeline_kwargs={"max_new_tokens": 2},
304
  # )
305
  from optimum.intel.openvino import OVModelForCausalLM
306
+ from transformers import pipeline
307
 
308
+
309
+ model_id = "meta-llama/Meta-Llama-3-8B"
310
+ ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
311
+
312
+ # 使用 OpenVINO 导出模型
313
+ model = OVModelForCausalLM.from_pretrained(
314
+ model_id,
315
  export=True, # 将模型转换为 OpenVINO 格式
316
  use_cache=False,
317
  ov_config=ov_config,
318
  trust_remote_code=True # 支持远程代码的信任问题
319
  )
320
 
321
+ # 保存 OpenVINO 模型
322
+ model.save_pretrained("./openvino_llama_model")
323
+
324
+ # Step 2: 加载保存的 OpenVINO 模型并设置推理任务
325
+ llm_device = "CPU" # 确保你根据环境设置正确的设备
326
+ llm = pipeline(
327
+ task="text-generation",
328
+ model=OVModelForCausalLM.from_pretrained("./openvino_llama_model"),
329
+ device=llm_device,
330
+ max_new_tokens=2 # 生成的最大新token数量
331
+ )
332
+
333
+ # Step 3: 执行推理
334
+ output = llm("2 + 2 =")
335
+ print(output)
336
 
337
+ # print("test:2+2:")
338
+ # print(llm.invoke("2 + 2 ="))
 
 
 
 
 
 
 
339
  import re
340
  from typing import List
341
  from langchain.text_splitter import (