Spaces:

DMindAI
/

DMind-1

Running on CPU Upgrade

App Files Files Community

lowesyang commited on May 22

Commit

8072750

1 Parent(s): b826da7

feat: init

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +63 -24
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 💬
 colorFrom: yellow
 colorTo: purple
 sdk: gradio
-sdk_version: 5.0.1
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: yellow
 colorTo: purple
 sdk: gradio
+sdk_version: 5.30.0
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -1,15 +1,31 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
-    history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
@@ -17,27 +33,49 @@ def respond(
 ):
     messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
@@ -46,9 +84,9 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
@@ -57,6 +95,7 @@ demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)",
         ),
     ],
 )

 import gradio as gr
+import requests
+import json
+import os
+from dotenv import load_dotenv
+# 加载.env文件中的环境变量
+load_dotenv()
+# 从环境变量中读取配置
+API_URL = os.getenv("API_URL")
+API_TOKEN = os.getenv("API_TOKEN")
+# 验证必要的环境变量
+if not API_URL or not API_TOKEN:
+    raise ValueError("make sure API_URL & API_TOKEN")
+print(f"[INFO] starting:")
+print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
+print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  # 只显示token的前10位和后10位
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 def respond(
     message,
+    history: list[dict],  # 修改为新的消息格式
     system_message,
     max_tokens,
     temperature,
 ):
     messages = [{"role": "system", "content": system_message}]
+    # 添加历史消息
+    messages.extend(history)
+    # 添加当前用户消息
     messages.append({"role": "user", "content": message})
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {API_TOKEN}"
+    }
+    data = {
+        "model": "/data/DMind-1",
+        "stream": False,
+        "messages": messages,
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": 20,
+        "min_p": 0.1,
+        "max_tokens": 32768
+    }
+    print(f"[INFO] process user msg...")
+    print(f"[INFO] sysMsg: {system_message}")
+    print(f"[INFO] userMsg: {message}")
+    print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
+    print(f"[INFO] reqData: {data}")
+    try:
+        with requests.post(API_URL, headers=headers, json=data) as r:
+            if r.status_code == 200:
+                json_response = r.json()
+                if 'choices' in json_response and len(json_response['choices']) > 0:
+                    content = json_response['choices'][0].get('message', {}).get('content', '')
+                    if content:
+                        # if '<think>' in content and '</think>' in content:
+                            # content = content.split('</think>')[-1].strip()
+                        print(f"[INFO] response: {content}")
+                        return content
+            return "Service temporarily unavailable"
+    except Exception as e:
+        print(f"[ERROR] Request error: {e}")
+        return "Service error occurred"
 """
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
+        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
             label="Top-p (nucleus sampling)",
         ),
     ],
+    type="messages"  # 指定使用新的消息格式
 )

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- huggingface_hub==0.25.2


1	+ huggingface_hub==0.25.2
2	+ python-dotenv==1.0.1