Spaces:
Runtime error
Runtime error
Sean-Case
commited on
Commit
·
aa0ad5d
1
Parent(s):
0b0054b
Cleaned up code a bit, added user icons, thumbs up/down
Browse files- Link to images.txt +4 -0
- app.py +9 -19
- bot.png +0 -0
- chatfuncs/chatfuncs.py +50 -148
- requirements.txt +2 -2
- user.jfif +0 -0
Link to images.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Robot emoji: https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Fluent_Emoji_high_contrast_1f916.svg/32px-Fluent_Emoji_high_contrast_1f916.svg.png
|
| 2 |
+
|
| 3 |
+
Bing smile emoji: https://www.bing.com/images/create/a-black-and-white-emoji-with-a-simple-smile2c-black/6523d2c320df409581e85bec80ef3ba8?id=KTdVbixG8oRqR9BzF6AblQ%3d%3d&view=detailv2&idpp=genimg&idpclose=1&FORM=SYDBIC
|
| 4 |
+
|
app.py
CHANGED
|
@@ -65,35 +65,23 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
|
| 65 |
print(docs_out)
|
| 66 |
|
| 67 |
vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
|
| 68 |
-
|
| 69 |
-
'''
|
| 70 |
-
#with open("vectorstore.pkl", "wb") as f:
|
| 71 |
-
#pickle.dump(vectorstore, f)
|
| 72 |
-
'''
|
| 73 |
-
|
| 74 |
-
#if Path(save_to).exists():
|
| 75 |
-
# vectorstore_func.save_local(folder_path=save_to)
|
| 76 |
-
#else:
|
| 77 |
-
# os.mkdir(save_to)
|
| 78 |
-
# vectorstore_func.save_local(folder_path=save_to)
|
| 79 |
-
|
| 80 |
-
#global vectorstore
|
| 81 |
|
| 82 |
-
#vectorstore = vectorstore_func
|
| 83 |
|
| 84 |
chatf.vectorstore = vectorstore_func
|
| 85 |
|
| 86 |
out_message = "Document processing complete"
|
| 87 |
|
| 88 |
-
#print(out_message)
|
| 89 |
-
#print(f"> Saved to: {save_to}")
|
| 90 |
-
|
| 91 |
return out_message, vectorstore_func
|
| 92 |
|
| 93 |
# Gradio chat
|
| 94 |
|
| 95 |
import gradio as gr
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
| 99 |
|
|
@@ -117,8 +105,8 @@ with block:
|
|
| 117 |
with gr.Tab("Chatbot"):
|
| 118 |
|
| 119 |
with gr.Row():
|
| 120 |
-
chat_height =
|
| 121 |
-
chatbot = gr.Chatbot(height=chat_height)
|
| 122 |
sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
|
| 123 |
|
| 124 |
with gr.Row():
|
|
@@ -194,6 +182,8 @@ with block:
|
|
| 194 |
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
| 195 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 196 |
|
|
|
|
|
|
|
| 197 |
block.queue(concurrency_count=1).launch(debug=True)
|
| 198 |
# -
|
| 199 |
|
|
|
|
| 65 |
print(docs_out)
|
| 66 |
|
| 67 |
vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
|
|
|
| 69 |
|
| 70 |
chatf.vectorstore = vectorstore_func
|
| 71 |
|
| 72 |
out_message = "Document processing complete"
|
| 73 |
|
|
|
|
|
|
|
|
|
|
| 74 |
return out_message, vectorstore_func
|
| 75 |
|
| 76 |
# Gradio chat
|
| 77 |
|
| 78 |
import gradio as gr
|
| 79 |
|
| 80 |
+
def vote(data: gr.LikeData):
|
| 81 |
+
if data.liked:
|
| 82 |
+
print("You upvoted this response: " + data.value)
|
| 83 |
+
else:
|
| 84 |
+
print("You downvoted this response: " + data.value)
|
| 85 |
|
| 86 |
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
| 87 |
|
|
|
|
| 105 |
with gr.Tab("Chatbot"):
|
| 106 |
|
| 107 |
with gr.Row():
|
| 108 |
+
chat_height = 550
|
| 109 |
+
chatbot = gr.Chatbot(height=chat_height, avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False)
|
| 110 |
sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
|
| 111 |
|
| 112 |
with gr.Row():
|
|
|
|
| 182 |
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
| 183 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 184 |
|
| 185 |
+
chatbot.like(vote, None, None)
|
| 186 |
+
|
| 187 |
block.queue(concurrency_count=1).launch(debug=True)
|
| 188 |
# -
|
| 189 |
|
bot.png
ADDED
|
chatfuncs/chatfuncs.py
CHANGED
|
@@ -12,9 +12,7 @@ from threading import Thread
|
|
| 12 |
from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
|
| 13 |
|
| 14 |
# Alternative model sources
|
| 15 |
-
from gpt4all import GPT4All
|
| 16 |
from ctransformers import AutoModelForCausalLM#, AutoTokenizer
|
| 17 |
-
|
| 18 |
from dataclasses import asdict, dataclass
|
| 19 |
|
| 20 |
# Langchain functions
|
|
@@ -33,8 +31,6 @@ from nltk.tokenize import RegexpTokenizer
|
|
| 33 |
from nltk.stem import WordNetLemmatizer
|
| 34 |
import keybert
|
| 35 |
|
| 36 |
-
#from transformers.pipelines import pipeline
|
| 37 |
-
|
| 38 |
# For Name Entity Recognition model
|
| 39 |
from span_marker import SpanMarkerModel
|
| 40 |
|
|
@@ -69,6 +65,7 @@ temperature: float = 0.1
|
|
| 69 |
top_k: int = 3
|
| 70 |
top_p: float = 1
|
| 71 |
repetition_penalty: float = 1.05
|
|
|
|
| 72 |
last_n_tokens: int = 64
|
| 73 |
max_new_tokens: int = 125
|
| 74 |
#seed: int = 42
|
|
@@ -77,7 +74,7 @@ stream: bool = True
|
|
| 77 |
threads: int = threads
|
| 78 |
batch_size:int = 512
|
| 79 |
context_length:int = 4096
|
| 80 |
-
gpu_layers:int = 0#5#gpu_layers
|
| 81 |
sample = True
|
| 82 |
|
| 83 |
@dataclass
|
|
@@ -99,7 +96,7 @@ class GenerationConfig:
|
|
| 99 |
|
| 100 |
|
| 101 |
## Highlight text constants
|
| 102 |
-
hlt_chunk_size =
|
| 103 |
hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
|
| 104 |
hlt_overlap = 0
|
| 105 |
|
|
@@ -110,51 +107,47 @@ ner_model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base-mu
|
|
| 110 |
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
| 111 |
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
| 112 |
|
| 113 |
-
|
|
|
|
| 114 |
|
| 115 |
## Chat models ##
|
| 116 |
-
ctrans_llm = [] # Not leaded by default
|
| 117 |
-
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
| 118 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
| 119 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
| 120 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
| 121 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
| 122 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
| 123 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
| 124 |
-
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
-
|
| 129 |
-
#
|
| 130 |
-
hf_checkpoint = '
|
| 131 |
-
|
| 132 |
-
|
|
|
|
| 133 |
|
| 134 |
-
|
| 135 |
|
| 136 |
-
# model_id = model_name
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
else:
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
else:
|
| 151 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
| 152 |
|
| 153 |
-
|
| 154 |
|
| 155 |
-
|
| 156 |
|
| 157 |
-
|
| 158 |
|
| 159 |
# Vectorstore funcs
|
| 160 |
|
|
@@ -439,7 +432,6 @@ def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_p
|
|
| 439 |
|
| 440 |
return docs_keep_as_doc, doc_df, docs_keep_out
|
| 441 |
|
| 442 |
-
|
| 443 |
def get_expanded_passages(vectorstore, docs, width):
|
| 444 |
|
| 445 |
"""
|
|
@@ -524,86 +516,6 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
| 524 |
|
| 525 |
return expanded_docs, doc_df
|
| 526 |
|
| 527 |
-
|
| 528 |
-
def get_expanded_passages_orig(vectorstore, docs, width):
|
| 529 |
-
|
| 530 |
-
"""
|
| 531 |
-
Extracts expanded passages based on given documents and a width for context.
|
| 532 |
-
|
| 533 |
-
Parameters:
|
| 534 |
-
- vectorstore: The primary data source.
|
| 535 |
-
- docs: List of documents to be expanded.
|
| 536 |
-
- width: Number of documents to expand around a given document for context.
|
| 537 |
-
|
| 538 |
-
Returns:
|
| 539 |
-
- expanded_docs: List of expanded Document objects.
|
| 540 |
-
- doc_df: DataFrame representation of expanded_docs.
|
| 541 |
-
"""
|
| 542 |
-
|
| 543 |
-
from collections import defaultdict
|
| 544 |
-
|
| 545 |
-
def get_docs_from_vstore(vectorstore):
|
| 546 |
-
vector = vectorstore.docstore._dict
|
| 547 |
-
return list(vector.items())
|
| 548 |
-
|
| 549 |
-
def extract_details(docs_list):
|
| 550 |
-
docs_list_out = [tup[1] for tup in docs_list]
|
| 551 |
-
content = [doc.page_content for doc in docs_list_out]
|
| 552 |
-
meta = [doc.metadata for doc in docs_list_out]
|
| 553 |
-
return ''.join(content), meta[0], meta[-1]
|
| 554 |
-
|
| 555 |
-
def get_parent_content_and_meta(vstore_docs, width, target):
|
| 556 |
-
target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
|
| 557 |
-
parent_vstore_out = [vstore_docs[i] for i in target_range]
|
| 558 |
-
|
| 559 |
-
content_str_out, meta_first_out, meta_last_out = [], [], []
|
| 560 |
-
for _ in parent_vstore_out:
|
| 561 |
-
content_str, meta_first, meta_last = extract_details(parent_vstore_out)
|
| 562 |
-
content_str_out.append(content_str)
|
| 563 |
-
meta_first_out.append(meta_first)
|
| 564 |
-
meta_last_out.append(meta_last)
|
| 565 |
-
return content_str_out, meta_first_out, meta_last_out
|
| 566 |
-
|
| 567 |
-
def merge_dicts_except_source(d1, d2):
|
| 568 |
-
merged = {}
|
| 569 |
-
for key in d1:
|
| 570 |
-
if key != "source":
|
| 571 |
-
merged[key] = str(d1[key]) + " to " + str(d2[key])
|
| 572 |
-
else:
|
| 573 |
-
merged[key] = d1[key] # or d2[key], based on preference
|
| 574 |
-
return merged
|
| 575 |
-
|
| 576 |
-
def merge_two_lists_of_dicts(list1, list2):
|
| 577 |
-
return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
|
| 578 |
-
|
| 579 |
-
vstore_docs = get_docs_from_vstore(vectorstore)
|
| 580 |
-
|
| 581 |
-
parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
|
| 582 |
-
|
| 583 |
-
#print(docs)
|
| 584 |
-
|
| 585 |
-
expanded_docs = []
|
| 586 |
-
for doc, score in docs:
|
| 587 |
-
search_section = doc.metadata['page_section']
|
| 588 |
-
search_index = parent_vstore_meta_section.index(search_section) if search_section in parent_vstore_meta_section else -1
|
| 589 |
-
|
| 590 |
-
content_str, meta_first, meta_last = get_parent_content_and_meta(vstore_docs, width, search_index)
|
| 591 |
-
#print("Meta first:")
|
| 592 |
-
#print(meta_first)
|
| 593 |
-
#print("Meta last:")
|
| 594 |
-
#print(meta_last)
|
| 595 |
-
#print("Meta last end.")
|
| 596 |
-
meta_full = merge_two_lists_of_dicts(meta_first, meta_last)
|
| 597 |
-
|
| 598 |
-
#print(meta_full)
|
| 599 |
-
|
| 600 |
-
expanded_doc = (Document(page_content=content_str[0], metadata=meta_full[0]), score)
|
| 601 |
-
expanded_docs.append(expanded_doc)
|
| 602 |
-
|
| 603 |
-
doc_df = create_doc_df(expanded_docs) # Assuming you've defined the 'create_doc_df' function elsewhere
|
| 604 |
-
|
| 605 |
-
return expanded_docs, doc_df
|
| 606 |
-
|
| 607 |
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
| 608 |
|
| 609 |
question = inputs["question"]
|
|
@@ -838,18 +750,6 @@ def highlight_found_text(search_text: str, full_text: str, hlt_chunk_size:int=hl
|
|
| 838 |
return "".join(pos_tokens)
|
| 839 |
|
| 840 |
# # Chat functions
|
| 841 |
-
def produce_streaming_answer_chatbot_gpt4all(history, full_prompt):
|
| 842 |
-
|
| 843 |
-
print("The question is: ")
|
| 844 |
-
print(full_prompt)
|
| 845 |
-
|
| 846 |
-
# Pull the generated text from the streamer, and update the model output.
|
| 847 |
-
history[-1][1] = ""
|
| 848 |
-
for new_text in gpt4all_model.generate(full_prompt, max_tokens=2000, streaming=True):
|
| 849 |
-
if new_text == None: new_text = ""
|
| 850 |
-
history[-1][1] += new_text
|
| 851 |
-
yield history
|
| 852 |
-
|
| 853 |
def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
| 854 |
|
| 855 |
#print("The question is: ")
|
|
@@ -866,7 +766,7 @@ def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
|
| 866 |
streamer=streamer,
|
| 867 |
max_new_tokens=max_new_tokens,
|
| 868 |
do_sample=sample,
|
| 869 |
-
repetition_penalty=
|
| 870 |
top_p=top_p,
|
| 871 |
temperature=temperature,
|
| 872 |
top_k=top_k
|
|
@@ -902,26 +802,28 @@ def produce_streaming_answer_chatbot_ctrans(history, full_prompt):
|
|
| 902 |
|
| 903 |
tokens = ctrans_llm.tokenize(full_prompt)
|
| 904 |
|
| 905 |
-
#
|
| 906 |
-
#from loguru import logger
|
| 907 |
-
|
| 908 |
-
#_ = [elm for elm in full_prompt.splitlines() if elm.strip()]
|
| 909 |
-
#stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
|
| 910 |
-
#print(stop_string)
|
| 911 |
-
|
| 912 |
-
#logger.debug(f"{stop_string=} not used")
|
| 913 |
-
|
| 914 |
-
#_ = psutil.cpu_count(logical=False) - 1
|
| 915 |
-
#cpu_count: int = int(_) if _ else 1
|
| 916 |
-
#logger.debug(f"{cpu_count=}")
|
| 917 |
|
| 918 |
# Pull the generated text from the streamer, and update the model output.
|
| 919 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
history[-1][1] = ""
|
| 921 |
for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
|
| 922 |
if new_text == None: new_text = ""
|
| 923 |
history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
|
|
|
|
| 924 |
yield history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
|
| 926 |
|
| 927 |
def ctrans_generate(
|
|
|
|
| 12 |
from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
|
| 13 |
|
| 14 |
# Alternative model sources
|
|
|
|
| 15 |
from ctransformers import AutoModelForCausalLM#, AutoTokenizer
|
|
|
|
| 16 |
from dataclasses import asdict, dataclass
|
| 17 |
|
| 18 |
# Langchain functions
|
|
|
|
| 31 |
from nltk.stem import WordNetLemmatizer
|
| 32 |
import keybert
|
| 33 |
|
|
|
|
|
|
|
| 34 |
# For Name Entity Recognition model
|
| 35 |
from span_marker import SpanMarkerModel
|
| 36 |
|
|
|
|
| 65 |
top_k: int = 3
|
| 66 |
top_p: float = 1
|
| 67 |
repetition_penalty: float = 1.05
|
| 68 |
+
flan_alpaca_repetition_penalty: float = 1.3
|
| 69 |
last_n_tokens: int = 64
|
| 70 |
max_new_tokens: int = 125
|
| 71 |
#seed: int = 42
|
|
|
|
| 74 |
threads: int = threads
|
| 75 |
batch_size:int = 512
|
| 76 |
context_length:int = 4096
|
| 77 |
+
gpu_layers:int = 0#5#gpu_layers For serving on Huggingface set to 0 as using free CPU instance
|
| 78 |
sample = True
|
| 79 |
|
| 80 |
@dataclass
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
## Highlight text constants
|
| 99 |
+
hlt_chunk_size = 15
|
| 100 |
hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
|
| 101 |
hlt_overlap = 0
|
| 102 |
|
|
|
|
| 107 |
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
| 108 |
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
| 109 |
|
| 110 |
+
## Set model type ##
|
| 111 |
+
model_type = "ctrans"
|
| 112 |
|
| 113 |
## Chat models ##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
if model_type == "ctrans":
|
| 116 |
+
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
| 117 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
| 118 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
| 119 |
|
| 120 |
+
if model_type == "hf":
|
| 121 |
+
# Huggingface chat model
|
| 122 |
+
#hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
|
| 123 |
+
hf_checkpoint = 'declare-lab/flan-alpaca-large'
|
| 124 |
+
|
| 125 |
+
def create_hf_model(model_name):
|
| 126 |
|
| 127 |
+
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
| 128 |
|
| 129 |
+
# model_id = model_name
|
| 130 |
+
|
| 131 |
+
if torch_device == "cuda":
|
| 132 |
+
if "flan" in model_name:
|
| 133 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
| 134 |
+
elif "mpt" in model_name:
|
| 135 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto", trust_remote_code=True)
|
| 136 |
+
else:
|
| 137 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
| 138 |
else:
|
| 139 |
+
if "flan" in model_name:
|
| 140 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 141 |
+
elif "mpt" in model_name:
|
| 142 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
| 143 |
+
else:
|
| 144 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
|
| 147 |
|
| 148 |
+
return model, tokenizer, torch_device
|
| 149 |
|
| 150 |
+
model, tokenizer, torch_device = create_hf_model(model_name = hf_checkpoint)
|
| 151 |
|
| 152 |
# Vectorstore funcs
|
| 153 |
|
|
|
|
| 432 |
|
| 433 |
return docs_keep_as_doc, doc_df, docs_keep_out
|
| 434 |
|
|
|
|
| 435 |
def get_expanded_passages(vectorstore, docs, width):
|
| 436 |
|
| 437 |
"""
|
|
|
|
| 516 |
|
| 517 |
return expanded_docs, doc_df
|
| 518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
| 520 |
|
| 521 |
question = inputs["question"]
|
|
|
|
| 750 |
return "".join(pos_tokens)
|
| 751 |
|
| 752 |
# # Chat functions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
| 754 |
|
| 755 |
#print("The question is: ")
|
|
|
|
| 766 |
streamer=streamer,
|
| 767 |
max_new_tokens=max_new_tokens,
|
| 768 |
do_sample=sample,
|
| 769 |
+
repetition_penalty=flan_alpaca_repetition_penalty,
|
| 770 |
top_p=top_p,
|
| 771 |
temperature=temperature,
|
| 772 |
top_k=top_k
|
|
|
|
| 802 |
|
| 803 |
tokens = ctrans_llm.tokenize(full_prompt)
|
| 804 |
|
| 805 |
+
#config = GenerationConfig(reset=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
|
| 807 |
# Pull the generated text from the streamer, and update the model output.
|
| 808 |
+
import time
|
| 809 |
+
start = time.time()
|
| 810 |
+
NUM_TOKENS=0
|
| 811 |
+
print('-'*4+'Start Generation'+'-'*4)
|
| 812 |
+
|
| 813 |
history[-1][1] = ""
|
| 814 |
for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
|
| 815 |
if new_text == None: new_text = ""
|
| 816 |
history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
|
| 817 |
+
NUM_TOKENS+=1
|
| 818 |
yield history
|
| 819 |
+
|
| 820 |
+
time_generate = time.time() - start
|
| 821 |
+
print('\n')
|
| 822 |
+
print('-'*4+'End Generation'+'-'*4)
|
| 823 |
+
print(f'Num of generated tokens: {NUM_TOKENS}')
|
| 824 |
+
print(f'Time for complete generation: {time_generate}s')
|
| 825 |
+
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
| 826 |
+
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
| 827 |
|
| 828 |
|
| 829 |
def ctrans_generate(
|
requirements.txt
CHANGED
|
@@ -13,8 +13,8 @@ bitsandbytes
|
|
| 13 |
accelerate
|
| 14 |
optimum
|
| 15 |
pypdf
|
| 16 |
-
gradio
|
| 17 |
-
gradio_client==0.
|
| 18 |
python-docx
|
| 19 |
gpt4all
|
| 20 |
ctransformers[cuda]
|
|
|
|
| 13 |
accelerate
|
| 14 |
optimum
|
| 15 |
pypdf
|
| 16 |
+
gradio==3.47.1
|
| 17 |
+
gradio_client==0.6.0
|
| 18 |
python-docx
|
| 19 |
gpt4all
|
| 20 |
ctransformers[cuda]
|
user.jfif
ADDED
|
Binary file (53.4 kB). View file
|
|
|