jonathanjordan21 commited on
Commit
f6d2306
·
verified ·
1 Parent(s): 115b068

Update custom_llm.py

Browse files
Files changed (1) hide show
  1. custom_llm.py +6 -11
custom_llm.py CHANGED
@@ -31,14 +31,14 @@ import pickle, asyncio
31
  async def create_vectorstore():
32
  API_TOKEN = os.getenv('HF_INFER_API')
33
 
34
- loader = os.getenv('knowledge_base')
35
  # web_loader = load_web("https://lintasmediadanawa.com")
36
 
37
- splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=20)
38
 
39
  # docs = splitter.create_documents([loader]+web_loader)
40
- docs = splitter.create_documents([loader])
41
- print(len(docs))
42
  emb_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2', encode_kwargs={'normalize_embeddings': True})
43
 
44
  # emb_model = HuggingFaceInferenceAPIEmbeddings(
@@ -48,18 +48,13 @@ async def create_vectorstore():
48
  async def add_docs(d):
49
  db.aadd_documents(await splitter.atransform_documents([d]))
50
 
51
- db = await FAISS.afrom_documents(docs, emb_model)
52
 
53
  f = pickle.load(open("ebesha_ticket.pkl", "rb"))
54
 
55
  print("Docs len :", len(f))
56
 
57
- tasks = []
58
-
59
- for d in f:
60
- tasks.append(db.aadd_documents(await splitter.atransform_documents([d])))
61
-
62
- await asyncio.gather(*tasks)
63
 
64
 
65
 
 
31
  async def create_vectorstore():
32
  API_TOKEN = os.getenv('HF_INFER_API')
33
 
34
+ # loader = os.getenv('knowledge_base')
35
  # web_loader = load_web("https://lintasmediadanawa.com")
36
 
37
+ # splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=20)
38
 
39
  # docs = splitter.create_documents([loader]+web_loader)
40
+ # docs = splitter.create_documents([loader])
41
+ # print(len(docs))
42
  emb_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2', encode_kwargs={'normalize_embeddings': True})
43
 
44
  # emb_model = HuggingFaceInferenceAPIEmbeddings(
 
48
  async def add_docs(d):
49
  db.aadd_documents(await splitter.atransform_documents([d]))
50
 
51
+ # db = FAISS.afrom_documents(docs, emb_model)
52
 
53
  f = pickle.load(open("ebesha_ticket.pkl", "rb"))
54
 
55
  print("Docs len :", len(f))
56
 
57
+ db = FAISS.from_documents(f, emb_model)
 
 
 
 
 
58
 
59
 
60