dh-mc commited on
Commit
927c472
·
1 Parent(s): 71f3335

added telegram bot

Browse files
Files changed (5) hide show
  1. .env.example +2 -2
  2. app.py +6 -0
  3. app_modules/init.py +51 -43
  4. data/questions.txt +2 -4
  5. telegram_bot.py +8 -16
.env.example CHANGED
@@ -83,15 +83,15 @@ TOKENIZERS_PARALLELISM=true
83
 
84
  # env variables for ingesting source PDF files
85
  CHUNCK_SIZE=1024
86
- CHUNK_OVERLAP=512
87
  SOURCE_PATH="data/pdfs/smu_lib_html/"
88
 
89
  # Index for SMU LibBot PDF files - chunk_size=1024 chunk_overlap=512
90
  FAISS_INDEX_PATH="data/smu_lib_index/"
91
 
92
  # telegram bot
 
93
  TELEGRAM_API_TOKEN=
94
- CHAT_API_URL=http://localhost:8080/chat_sync
95
 
96
  # template for env/tgi.conf
97
  export PORT=64300
 
83
 
84
  # env variables for ingesting source PDF files
85
  CHUNCK_SIZE=1024
86
+ CHUNK_OVERLAP=64
87
  SOURCE_PATH="data/pdfs/smu_lib_html/"
88
 
89
  # Index for SMU LibBot PDF files - chunk_size=1024 chunk_overlap=512
90
  FAISS_INDEX_PATH="data/smu_lib_index/"
91
 
92
  # telegram bot
93
+ RUN_TELEGRAM_BOT=true
94
  TELEGRAM_API_TOKEN=
 
95
 
96
  # template for env/tgi.conf
97
  export PORT=64300
app.py CHANGED
@@ -11,6 +11,12 @@ from app_modules.init import app_init
11
  from app_modules.llm_chat_chain import ChatChain
12
  from app_modules.utils import print_llm_response, remove_extra_spaces
13
 
 
 
 
 
 
 
14
  llm_loader, qa_chain = app_init()
15
 
16
  show_param_settings = os.environ.get("SHOW_PARAM_SETTINGS") == "true"
 
11
  from app_modules.llm_chat_chain import ChatChain
12
  from app_modules.utils import print_llm_response, remove_extra_spaces
13
 
14
+ if os.environ.get("RUN_TELEGRAM_BOT") == "true":
15
+ from telegram_bot import start_telegram_bot
16
+
17
+ start_telegram_bot()
18
+ exit(0)
19
+
20
  llm_loader, qa_chain = app_init()
21
 
22
  show_param_settings = os.environ.get("SHOW_PARAM_SETTINGS") == "true"
app_modules/init.py CHANGED
@@ -22,61 +22,69 @@ load_dotenv(found_dotenv, override=False)
22
  # Constants
23
  init_settings()
24
 
 
 
25
 
26
- def app_init(initQAChain: bool = True):
27
- # https://github.com/huggingface/transformers/issues/17611
28
- os.environ["CURL_CA_BUNDLE"] = ""
29
 
30
- llm_model_type = os.environ.get("LLM_MODEL_TYPE")
31
- n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
34
- print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
35
- print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
 
36
 
37
- if initQAChain:
38
- hf_embeddings_model_name = (
39
- os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
40
- )
 
 
41
 
42
- index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get(
43
- "CHROMADB_INDEX_PATH"
44
- )
45
- using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
46
 
47
- start = timer()
48
- embeddings = HuggingFaceInstructEmbeddings(
49
- model_name=hf_embeddings_model_name,
50
- model_kwargs={"device": hf_embeddings_device_type},
51
- )
52
- end = timer()
53
 
54
- print(f"Completed in {end - start:.3f}s")
 
 
55
 
56
- start = timer()
 
 
 
 
 
 
 
57
 
58
- print(
59
- f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}"
60
- )
61
 
62
- if not os.path.isdir(index_path):
63
- raise ValueError(f"{index_path} does not exist!")
64
- elif using_faiss:
65
- vectorstore = FAISS.load_local(index_path, embeddings)
66
- else:
67
- vectorstore = Chroma(
68
- embedding_function=embeddings, persist_directory=index_path
69
- )
70
 
 
 
 
 
 
 
71
  end = timer()
72
-
73
  print(f"Completed in {end - start:.3f}s")
74
 
75
- start = timer()
76
- llm_loader = LLMLoader(llm_model_type)
77
- llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
78
- qa_chain = QAChain(vectorstore, llm_loader) if initQAChain else None
79
- end = timer()
80
- print(f"Completed in {end - start:.3f}s")
81
-
82
  return llm_loader, qa_chain
 
22
  # Constants
23
  init_settings()
24
 
25
+ llm_loader = None
26
+ qa_chain = None
27
 
 
 
 
28
 
29
+ def app_init(initQAChain: bool = True):
30
+ global llm_loader
31
+ global qa_chain
32
+ if llm_loader == None:
33
+ # https://github.com/huggingface/transformers/issues/17611
34
+ os.environ["CURL_CA_BUNDLE"] = ""
35
+
36
+ llm_model_type = os.environ.get("LLM_MODEL_TYPE")
37
+ n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
38
+
39
+ hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
40
+ print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
41
+ print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
42
+
43
+ if initQAChain:
44
+ hf_embeddings_model_name = (
45
+ os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
46
+ )
47
 
48
+ index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get(
49
+ "CHROMADB_INDEX_PATH"
50
+ )
51
+ using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
52
 
53
+ start = timer()
54
+ embeddings = HuggingFaceInstructEmbeddings(
55
+ model_name=hf_embeddings_model_name,
56
+ model_kwargs={"device": hf_embeddings_device_type},
57
+ )
58
+ end = timer()
59
 
60
+ print(f"Completed in {end - start:.3f}s")
 
 
 
61
 
62
+ start = timer()
 
 
 
 
 
63
 
64
+ print(
65
+ f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}"
66
+ )
67
 
68
+ if not os.path.isdir(index_path):
69
+ raise ValueError(f"{index_path} does not exist!")
70
+ elif using_faiss:
71
+ vectorstore = FAISS.load_local(index_path, embeddings)
72
+ else:
73
+ vectorstore = Chroma(
74
+ embedding_function=embeddings, persist_directory=index_path
75
+ )
76
 
77
+ end = timer()
 
 
78
 
79
+ print(f"Completed in {end - start:.3f}s")
 
 
 
 
 
 
 
80
 
81
+ start = timer()
82
+ llm_loader = LLMLoader(llm_model_type)
83
+ llm_loader.init(
84
+ n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type
85
+ )
86
+ qa_chain = QAChain(vectorstore, llm_loader) if initQAChain else None
87
  end = timer()
 
88
  print(f"Completed in {end - start:.3f}s")
89
 
 
 
 
 
 
 
 
90
  return llm_loader, qa_chain
data/questions.txt CHANGED
@@ -1,6 +1,4 @@
1
- What are the library opening hours?
2
- I'm an undergrad. How many books can I borrow from libraries?
3
  Can you list some of recommended resources on generative AI?
4
- Hi, is it necessary to book a terminal first before being able to use the bloomberg computer in the library? or can i just show up?
5
- Hi, I am an alumni of SMU (batch of 2018). I wanted to enquire for SMU Alumni rates for access to library resources (databases, investment studio) etc
6
  I've overdue fine of $4.00. Could you advise on how I can go about paying the fine?
 
 
1
+ How many items can a doctoral student borrow?
 
2
  Can you list some of recommended resources on generative AI?
 
 
3
  I've overdue fine of $4.00. Could you advise on how I can go about paying the fine?
4
+ Hi, I am an alumni of SMU (batch of 2018). I wanted to enquire for SMU Alumni rates for access to library resources (databases, investment studio) etc
telegram_bot.py CHANGED
@@ -14,7 +14,9 @@ from telegram.ext import (
14
  filters,
15
  )
16
 
17
- from app_modules.init import *
 
 
18
 
19
  ctx = ssl.create_default_context()
20
  ctx.set_ciphers("DEFAULT")
@@ -32,7 +34,6 @@ if __version_info__ < (20, 0, 0, "alpha", 1):
32
  )
33
 
34
  TOKEN = os.getenv("TELEGRAM_API_TOKEN")
35
- ENDPOINT = os.getenv("CHAT_API_URL")
36
 
37
 
38
  # Define a few command handlers. These usually take the two arguments update and
@@ -55,16 +56,11 @@ async def chat_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
55
  print(update)
56
  tic = time.perf_counter()
57
  try:
58
- message = {
59
- "question": update.message.text,
60
- "chat_id": update.message.chat.id,
61
- }
62
- print(message)
63
- x = requests.post(ENDPOINT, json=message).json()
64
- temp = time.perf_counter()
65
- print(f"Received response in {temp - tic:0.4f} seconds")
66
- print(x)
67
- result = x["result"]
68
  print(result)
69
  await update.message.reply_text(result[0:8192])
70
  toc = time.perf_counter()
@@ -89,7 +85,3 @@ def start_telegram_bot() -> None:
89
  )
90
 
91
  application.run_polling()
92
-
93
-
94
- if __name__ == "__main__":
95
- start_telegram_bot()
 
14
  filters,
15
  )
16
 
17
+ from app_modules.init import app_init
18
+
19
+ llm_loader, qa_chain = app_init()
20
 
21
  ctx = ssl.create_default_context()
22
  ctx.set_ciphers("DEFAULT")
 
34
  )
35
 
36
  TOKEN = os.getenv("TELEGRAM_API_TOKEN")
 
37
 
38
 
39
  # Define a few command handlers. These usually take the two arguments update and
 
56
  print(update)
57
  tic = time.perf_counter()
58
  try:
59
+ result = qa_chain.call_chain(
60
+ {"question": update.message.text, "chat_history": []}, None
61
+ )
62
+
63
+ result = result["answer"]
 
 
 
 
 
64
  print(result)
65
  await update.message.reply_text(result[0:8192])
66
  toc = time.perf_counter()
 
85
  )
86
 
87
  application.run_polling()