File size: 2,454 Bytes
94db756
beb58a2
f316a62
 
 
 
 
94db756
f316a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94db756
f316a62
 
 
 
 
35c7bed
94db756
 
 
 
9cc946b
beb58a2
 
94db756
f316a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

from flask import Flask, request, Response, jsonify
import os
from rich import print
import json
import requests
from time import sleep
from gradio_client import Client

settings = {
    'node': {
        'id':os.environ.get('nodeId'),
        'models': os.environ.get('nodeModel')
    },
    'security': {
        'passw':os.environ.get('apipassw')        
    },
    'web': {
        'port': os.environ.get('webport', 7860),
        'host': os.environ.get('webhost', '0.0.0.0'),
        'debug': os.environ.get('webdebug', False)
    }
}

app = Flask(__name__)



@app.route("/")
def index():
    return f'Hi, its a node {settings["node"]["id"]} with {settings["node"]["models"]}'

@app.route('/send', methods=['POST'])
def send():
    message = request.json.get('message', 'hello!')

    client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
    result = client.predict(message, api_name="/chat")
    response = {'response': result}
    return jsonify(response)

@app.route("/chat/completions", methods=['POST'])
def chat_completions():
    streaming = request.json.get('stream', False)
    model = request.json.get('model', 'gpt-4')
    messages = request.json.get('messages')
    response = ''

    try:
        response = closeai.ChatCompletion.create(model=model, stream=streaming, messages=messages)
    except Exception as er:
        print(er)
        if '429' in str(er):sleep(30)
        def errorStream(er):
            yield 'data: %s\n\n' %  json.dumps({"status":"!=200","error":str(er)}, separators=(',' ':'))
        return app.response_class(errorStream(er), mimetype='text/event-stream')
    if not streaming:

        return {
            'model': model,
            'result': response["choices"][0]["message"]["content"]
        }

    def stream():
        for token in response:
            completion_data = {
                'model': model,
                'token': token,
                'status':200,
                'choices': [
                    {
                        'delta': {
                            'content': token
                        },
                        'index': 0,
                        'finish_reason': None
                    }
                ]
            }

            yield 'data: %s\n\n' % json.dumps(completion_data, separators=(',' ':'))

    return app.response_class(stream(), mimetype='text/event-stream')

if __name__ == '__main__':
    app.run(**settings['web'])