WizardLM-13B-V1.0-Uncensored-GGML

Runtime error

App Files Files Community

ffreemt commited on Jul 6, 2023

Commit

23fc95a

1 Parent(s): e178694

Update streaming...

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +111 -40

.gitignore CHANGED Viewed

@@ -8,3 +8,4 @@ pyproject.toml
 models
 .ruff_cache
 run-nodemon.sh

 models
 .ruff_cache
 run-nodemon.sh
+app-.py

app.py CHANGED Viewed

@@ -1,10 +1,6 @@
 """Run codes"""
 # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
-# r uff: noqa: E501
-# import gradio
-# gradio.load("models/WizardLM/WizardCoder-15B-V1.0").launch()
 import os
 import time
 from dataclasses import asdict, dataclass
@@ -37,11 +33,60 @@ user_prefix = "[user]: "
 assistant_prefix = "[assistant]: "
 def predict(prompt, bot):
     # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
     logger.debug(f"{prompt=}, {bot=}")
     ns.response = ""
     with about_time() as atime:  # type: ignore
         try:
             # user_prompt = prompt
@@ -59,7 +104,12 @@ def predict(prompt, bot):
             response = ""
             buff.update(value="diggin...")
             for word in generator:
                 # print(word, end="", flush=True)
                 print(word, flush=True)  # vertical stream
                 response += word
@@ -268,6 +318,36 @@ css = """
     .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
     .xsmall {font-size: x-small;}
 """
 with gr.Blocks(
     # title="mpt-30b-chat-ggml",
@@ -281,9 +361,11 @@ with gr.Blocks(
         # )
         gr.Markdown(
             f"""<h4><center>{MODEL_FILENAME}</center></h4>
-            Most examples are meant for another model. You probably should try
-            some coder-related prompts.
             Try to refresh the browser and try again when occasionally errors occur.
@@ -296,13 +378,13 @@ with gr.Blocks(
     chatbot = gr.Chatbot(height=700)  # 500
     buff = gr.Textbox(show_label=False, visible=False)
     with gr.Row():
-        with gr.Column(scale=4):
             msg = gr.Textbox(
                 label="Chat Message Box",
                 placeholder="Ask me anything (press Enter or click Submit to send)",
                 show_label=False,
             ).style(container=False)
-        with gr.Column(scale=1, min_width=100):
             with gr.Row():
                 submit = gr.Button("Submit", elem_classes="xsmall")
                 stop = gr.Button("Stop", visible=False)
@@ -322,37 +404,8 @@ with gr.Blocks(
                         reset = gr.Button("Reset System Prompt")
     with gr.Accordion("Example Inputs", open=True):
-        etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
         examples = gr.Examples(
-            examples=[
-                ["判断一个数是不是质数的 javascript 码"],
-                ["实现python 里 range(10)的 javascript 码"],
-                ["实现python 里 [*(range(10)]的 javascript 码"],
-                ["Explain the plot of Cinderella in a sentence."],
-                [
-                    "How long does it take to become proficient in French, and what are the best methods for retaining information?"
-                ],
-                ["What are some common mistakes to avoid when writing code?"],
-                ["Build a prompt to generate a beautiful portrait of a horse"],
-                ["Suggest four metaphors to describe the benefits of AI"],
-                ["Write a pop song about leaving home for the sandy beaches."],
-                ["Write a summary demonstrating my ability to tame lions"],
-                ["鲁迅和周树人什么关系 说中文"],
-                ["鲁迅和周树人什么关系"],
-                ["鲁迅和周树人什么关系 用英文回答"],
-                ["从前有一头牛，这头牛后面有什么？"],
-                ["正无穷大加一大于正无穷大吗？"],
-                ["正无穷大加正无穷大大于正无穷大吗？"],
-                ["-2的平方根等于什么"],
-                ["树上有5只鸟，猎人开枪打死了一只。树上还有几只鸟？"],
-                ["树上有11只鸟，猎人开枪打死了一只。树上还有几只鸟？提示：需考虑鸟可能受惊吓飞走。"],
-                ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
-                [f"{etext} 翻成中文，列出3个版本"],
-                [f"{etext} \n 翻成中文，保留原意，但使用文学性的语言。不要写解释。列出3个版本"],
-                ["假定 1 + 2 = 4, 试求 7 + 8"],
-                ["Erkläre die Handlung von Cinderella in einem Satz."],
-                ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
-            ],
             inputs=[msg],
             examples_per_page=40,
         )
@@ -367,7 +420,7 @@ with gr.Blocks(
             "biased, or otherwise offensive outputs.",
             elem_classes=["disclaimer"],
         )
     msg.submit(
         # fn=conversation.user_turn,
         fn=predict,
@@ -384,6 +437,24 @@ with gr.Blocks(
         queue=True,
         show_progress="full",
     )
     clear.click(lambda: None, None, chatbot, queue=False)
     # update buff Textbox, every: units in seconds)

 """Run codes"""
 # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
+# ruff: noqa: E501
 import os
 import time
 from dataclasses import asdict, dataclass
 assistant_prefix = "[assistant]: "
+def predict_str(prompt, bot):  # bot is in fact bot_history
+    # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
+    logger.debug(f"{prompt=}, {bot=}")
+    try:
+        # user_prompt = prompt
+        generator = generate(
+            LLM,
+            GENERATION_CONFIG,
+            system_prompt=default_system_prompt,
+            user_prompt=prompt.strip(),
+        )
+        ns.generator = generator  # for .then
+    except Exception as exc:
+        logger.error(exc)
+    # bot.append([prompt, f"{response} {_}"])
+    # return prompt, bot
+    return prompt, bot + [[prompt, None]]
+def bot_str(bot):
+    if bot:
+        bot[-1][1] = ""
+    else:
+        bot = [["Something is wrong", ""]]
+    print(assistant_prefix, end=" ", flush=True)
+    response = ""
+    flag = 1
+    then = time.time()
+    for word in ns.generator:
+        # record first response time
+        if flag:
+            logger.debug(f"\t {time.time() - then:.1f}s")
+            flag = 0
+        print(word, end="", flush=True)
+        # print(word, flush=True)  # vertical stream
+        response += word
+        bot[-1][1] = response
+        yield bot
 def predict(prompt, bot):
     # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
     logger.debug(f"{prompt=}, {bot=}")
     ns.response = ""
+    then = time.time()
     with about_time() as atime:  # type: ignore
         try:
             # user_prompt = prompt
             response = ""
             buff.update(value="diggin...")
+            flag = 1
             for word in generator:
+                # record first response time
+                if flag:
+                    logger.debug(f"\t {time.time() - then:.1f}s")
+                    flag = 0
                 # print(word, end="", flush=True)
                 print(word, flush=True)  # vertical stream
                 response += word
     .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
     .xsmall {font-size: x-small;}
 """
+etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
+examples = [
+    ["Explain the plot of Cinderella in a sentence."],
+    [
+        "How long does it take to become proficient in French, and what are the best methods for retaining information?"
+    ],
+    ["What are some common mistakes to avoid when writing code?"],
+    ["Build a prompt to generate a beautiful portrait of a horse"],
+    ["Suggest four metaphors to describe the benefits of AI"],
+    ["Write a pop song about leaving home for the sandy beaches."],
+    ["Write a summary demonstrating my ability to tame lions"],
+    ["鲁迅和周树人什么关系 说中文"],
+    ["鲁迅和周树人什么关系"],
+    ["鲁迅和周树人什么关系 用英文回答"],
+    ["从前有一头牛，这头牛后面有什么？"],
+    ["正无穷大加一大于正无穷大吗？"],
+    ["正无穷大加正无穷大大于正无穷大吗？"],
+    ["-2的平方根等于什么"],
+    ["树上有5只鸟，猎人开枪打死了一只。树上还有几只鸟？"],
+    ["树上有11只鸟，猎人开枪打死了一只。树上还有几只鸟？提示：需考虑鸟可能受惊吓飞走。"],
+    ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
+    [f"{etext} 翻成中文，列出3个版本"],
+    [f"{etext} \n 翻成中文，保留原意，但使用文学性的语言。不要写解释。列出3个版本"],
+    ["假定 1 + 2 = 4, 试求 7 + 8"],
+    ["判断一个数是不是质数的 javascript 码"],
+    ["实现python 里 range(10)的 javascript 码"],
+    ["实现python 里 [*(range(10)]的 javascript 码"],
+    ["Erkläre die Handlung von Cinderella in einem Satz."],
+    ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
+]
 with gr.Blocks(
     # title="mpt-30b-chat-ggml",
         # )
         gr.Markdown(
             f"""<h4><center>{MODEL_FILENAME}</center></h4>
+            It takes about 100 seconds for the initial reply
+            message to appear. Average streaming rate ~1 sec/chat. The bot only speaks English.
+            Most examples are meant for another model. You probably should try to test
+            some related prompts.
             Try to refresh the browser and try again when occasionally errors occur.
     chatbot = gr.Chatbot(height=700)  # 500
     buff = gr.Textbox(show_label=False, visible=False)
     with gr.Row():
+        with gr.Column(scale=5):
             msg = gr.Textbox(
                 label="Chat Message Box",
                 placeholder="Ask me anything (press Enter or click Submit to send)",
                 show_label=False,
             ).style(container=False)
+        with gr.Column(scale=1, min_width=80):
             with gr.Row():
                 submit = gr.Button("Submit", elem_classes="xsmall")
                 stop = gr.Button("Stop", visible=False)
                         reset = gr.Button("Reset System Prompt")
     with gr.Accordion("Example Inputs", open=True):
         examples = gr.Examples(
+            examples=examples,
             inputs=[msg],
             examples_per_page=40,
         )
             "biased, or otherwise offensive outputs.",
             elem_classes=["disclaimer"],
         )
+    _ = """
     msg.submit(
         # fn=conversation.user_turn,
         fn=predict,
         queue=True,
         show_progress="full",
     )
+    # """
+    msg.submit(
+        # fn=conversation.user_turn,
+        fn=predict_str,
+        inputs=[msg, chatbot],
+        outputs=[msg, chatbot],
+        # queue=True,
+        show_progress="full",
+        api_name="predict",
+    ).then(bot_str, chatbot, chatbot)
+    submit.click(
+        fn=lambda x, y: ("",) + predict_str(x, y)[1:],  # clear msg
+        inputs=[msg, chatbot],
+        outputs=[msg, chatbot],
+        queue=True,
+        show_progress="full",
+    ).then(bot_str, chatbot, chatbot)
     clear.click(lambda: None, None, chatbot, queue=False)
     # update buff Textbox, every: units in seconds)