Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,52 @@
|
|
1 |
import argparse
|
|
|
2 |
import gradio as gr
|
3 |
-
|
4 |
from openai import OpenAI
|
5 |
|
6 |
-
|
7 |
-
|
|
|
8 |
|
9 |
-
def
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
|
|
|
|
15 |
|
16 |
-
|
|
|
|
|
17 |
Your role is to use your's in-depth knowledge in the field of Semiconductor Manufacturing Process to answer the instruction from the user in the most sophisticate, detail and technical ways.
|
18 |
You must use as much as technical term as possible to make the answer more professional and informative.
|
19 |
Your answer must align with these criteria:
|
@@ -23,72 +56,89 @@ Your answer must align with these criteria:
|
|
23 |
4. Causality and Correlation: Analyze the identification and explanation of causal relationships or correlations. This is crucial for effectively diagnosing problems, analyzing scenarios, and discussing theoretical principles.
|
24 |
5. Practicality and Applicability: Judge the practicality and applicability of the recommendations and solutions offered. Ensure they are tailored to the specific conditions of the question and are feasible in the given context.
|
25 |
6. Comprehensive Coverage: Verify that the answer addresses all relevant aspects of the question. The response should be thorough, covering multiple angles and potential outcomes suitable for the type of question asked.
|
26 |
-
7. Relevance to the Question: Confirm that the content of the answer directly addresses the core of the question. All explanations and solutions should be relevant and specifically tailored to the details and constraints of the question.
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
extra_body={
|
42 |
-
"
|
43 |
-
"
|
44 |
-
"length_penalty": 1.0,
|
45 |
-
"top_k": 50,
|
46 |
-
"min_p": 0.8,
|
47 |
-
"best_of": 1,
|
48 |
-
"use_beam_search": False,
|
49 |
-
"early_stopping": False,
|
50 |
},
|
51 |
)
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
def
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
description="""SEMIKONG is the first Open-Source Foundation Model supporting the Semiconductor Manufacturing Industry.\n
|
69 |
-
This is the result of the collaboration between AItomatic, FPT AI Center and Tokyo Electron with the supported from the AI Alliance and IBM.\n
|
70 |
-
For technical report, please refer to: __TBA__ \n
|
71 |
-
For open-source repository, please refer to: __TBA__
|
72 |
-
""",
|
73 |
theme="soft",
|
74 |
-
examples=[
|
75 |
-
"Describe different type of etching in semiconductor manufacturing process",
|
76 |
-
"What is Photolithography ?",
|
77 |
-
"What elements are necessary for miniaturization of semiconductor etch manufacturing equipment and what are their priorities?",
|
78 |
-
],
|
79 |
-
cache_examples=True,
|
80 |
retry_btn=None,
|
81 |
undo_btn="Delete Previous",
|
82 |
clear_btn="Clear",
|
83 |
)
|
84 |
-
return demo
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
if __name__ == "__main__":
|
88 |
-
|
89 |
-
parser.add_argument("--host", type=str, default=None)
|
90 |
-
parser.add_argument("--port", type=int, default=8001)
|
91 |
-
args = parser.parse_args()
|
92 |
-
|
93 |
-
demo = build_demo()
|
94 |
-
demo.queue().launch()
|
|
|
1 |
import argparse
|
2 |
+
from typing import List, Tuple, Generator
|
3 |
import gradio as gr
|
|
|
4 |
from openai import OpenAI
|
5 |
|
6 |
+
# Global variables for configuration
|
7 |
+
client: OpenAI = None
|
8 |
+
args: argparse.Namespace = None
|
9 |
|
10 |
+
def parse_arguments() -> argparse.Namespace:
|
11 |
+
"""Parse command line arguments."""
|
12 |
+
parser = argparse.ArgumentParser(
|
13 |
+
description="Chatbot Interface with Customizable Parameters"
|
14 |
+
)
|
15 |
+
parser.add_argument(
|
16 |
+
"--model-url",
|
17 |
+
type=str,
|
18 |
+
default="http://35.232.14.156:8081/v1",
|
19 |
+
help="Model URL"
|
20 |
+
)
|
21 |
+
parser.add_argument(
|
22 |
+
"-m", "--model",
|
23 |
+
type=str,
|
24 |
+
default="pentagoniac/SEMIKONG-8b-GPTQ",
|
25 |
+
help="Model name for the chatbot"
|
26 |
+
)
|
27 |
+
parser.add_argument(
|
28 |
+
"--temp",
|
29 |
+
type=float,
|
30 |
+
default=0.8,
|
31 |
+
help="Temperature for text generation"
|
32 |
+
)
|
33 |
+
parser.add_argument(
|
34 |
+
"--stop-token-ids",
|
35 |
+
type=str,
|
36 |
+
default="128009,128001",
|
37 |
+
help="Comma-separated stop token IDs"
|
38 |
+
)
|
39 |
+
parser.add_argument("--host", type=str, default=None)
|
40 |
+
parser.add_argument("--port", type=int, default=8000)
|
41 |
+
return parser.parse_args()
|
42 |
|
43 |
+
def create_openai_client(api_base: str) -> OpenAI:
|
44 |
+
"""Create and return an OpenAI client."""
|
45 |
+
return OpenAI(api_key="EMPTY", base_url=api_base)
|
46 |
|
47 |
+
def get_system_message() -> str:
|
48 |
+
"""Return the system message for the chatbot."""
|
49 |
+
return """You are SEMIKONG, an AI Assistant developed by the AI Alliance.
|
50 |
Your role is to use your's in-depth knowledge in the field of Semiconductor Manufacturing Process to answer the instruction from the user in the most sophisticate, detail and technical ways.
|
51 |
You must use as much as technical term as possible to make the answer more professional and informative.
|
52 |
Your answer must align with these criteria:
|
|
|
56 |
4. Causality and Correlation: Analyze the identification and explanation of causal relationships or correlations. This is crucial for effectively diagnosing problems, analyzing scenarios, and discussing theoretical principles.
|
57 |
5. Practicality and Applicability: Judge the practicality and applicability of the recommendations and solutions offered. Ensure they are tailored to the specific conditions of the question and are feasible in the given context.
|
58 |
6. Comprehensive Coverage: Verify that the answer addresses all relevant aspects of the question. The response should be thorough, covering multiple angles and potential outcomes suitable for the type of question asked.
|
59 |
+
7. Relevance to the Question: Confirm that the content of the answer directly addresses the core of the question. All explanations and solutions should be relevant and specifically tailored to the details and constraints of the question."""
|
60 |
+
|
61 |
+
def format_chat_history(history: List[Tuple[str, str]], system_msg: str, message: str) -> List[dict]:
|
62 |
+
"""Format chat history for OpenAI API."""
|
63 |
+
formatted_history = [{"role": "system", "content": system_msg}]
|
64 |
+
for human, assistant in history:
|
65 |
+
formatted_history.append({"role": "user", "content": human})
|
66 |
+
formatted_history.append({"role": "assistant", "content": assistant})
|
67 |
+
formatted_history.append({"role": "user", "content": message})
|
68 |
+
return formatted_history
|
69 |
|
70 |
+
def predict(message: str, history: List[Tuple[str, str]]) -> Generator[str, None, None]:
|
71 |
+
"""Generate predictions based on the chat history and current message."""
|
72 |
+
global client, args
|
73 |
+
history_openai_format = format_chat_history(history, get_system_message(), message)
|
74 |
+
|
75 |
+
stream = client.chat.completions.create(
|
76 |
+
model=args.model,
|
77 |
+
messages=history_openai_format,
|
78 |
+
temperature=args.temp,
|
79 |
+
stream=True,
|
80 |
extra_body={
|
81 |
+
"repetition_penalty": 1,
|
82 |
+
"stop_token_ids": [int(id.strip()) for id in args.stop_token_ids.split(",") if id.strip()] if args.stop_token_ids else [],
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
},
|
84 |
)
|
85 |
|
86 |
+
partial_message = ""
|
87 |
+
for chunk in stream:
|
88 |
+
partial_message += chunk.choices[0].delta.content or ""
|
89 |
+
yield partial_message
|
90 |
|
91 |
+
def get_css() -> str:
|
92 |
+
"""Return CSS for the interface."""
|
93 |
+
return """
|
94 |
+
<style>
|
95 |
+
.centered-content {
|
96 |
+
text-align: center;
|
97 |
+
margin: auto;
|
98 |
+
max-width: 800px;
|
99 |
+
}
|
100 |
+
.centered-content h2 {
|
101 |
+
color: #333;
|
102 |
+
margin-bottom: 20px;
|
103 |
+
}
|
104 |
+
.centered-content p {
|
105 |
+
color: #666;
|
106 |
+
line-height: 1.6;
|
107 |
+
}
|
108 |
+
</style>
|
109 |
+
"""
|
110 |
|
111 |
+
def get_description() -> str:
|
112 |
+
"""Return HTML description for the interface."""
|
113 |
+
return get_css() + """
|
114 |
+
<div class="centered-content">
|
115 |
+
<h2>The world's first open-source LLM designed specifically for the semiconductor industry.</h2>
|
116 |
+
<p>SemiKong is a collaborative open-source effort with support from Aitomatic, TEL, FPT AI Center, The AI Alliance</p>
|
117 |
+
</div>
|
118 |
+
"""
|
119 |
|
120 |
+
def create_chat_interface() -> gr.ChatInterface:
|
121 |
+
"""Create and return a Gradio chat interface."""
|
122 |
+
return gr.ChatInterface(
|
123 |
+
predict,
|
124 |
+
chatbot=gr.Chatbot(height=666),
|
125 |
+
textbox=gr.Textbox(placeholder="Ask SEMIKONG", container=False, scale=7),
|
126 |
+
title="SEMIKONG-8B-GPTQ",
|
127 |
+
description=get_description(),
|
|
|
|
|
|
|
|
|
|
|
128 |
theme="soft",
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
retry_btn=None,
|
130 |
undo_btn="Delete Previous",
|
131 |
clear_btn="Clear",
|
132 |
)
|
|
|
133 |
|
134 |
+
def main():
|
135 |
+
"""Main function to set up and launch the chatbot interface."""
|
136 |
+
global client, args
|
137 |
+
args = parse_arguments()
|
138 |
+
client = create_openai_client(args.model_url)
|
139 |
+
|
140 |
+
interface = create_chat_interface()
|
141 |
+
interface.queue().launch(server_name=args.host, server_port=args.port, share=False)
|
142 |
|
143 |
if __name__ == "__main__":
|
144 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|