Spaces:
Paused
Paused
added sentence-transformers encode service
Browse files- app.py +12 -23
- app_gradio.py +0 -49
- requirements.txt +1 -0
- scripts/update_valdata.py +0 -35
app.py
CHANGED
|
@@ -11,9 +11,19 @@ from background_service import BackgroundTaskService
|
|
| 11 |
# anvil.server.connect('PLMOIU5VCGGUOJH2XORIBWV3-ZXZVFLWX7QFIIAF4')
|
| 12 |
anvil.server.connect("S3SLHUQ2BB33NVTP7FWRAOHS-NDSRD7CDALRPSPLL")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
app=Flask(__name__)
|
| 15 |
-
MESSAGED={'title':'API Server',
|
| 16 |
-
'messageL':['published server functions:','
|
| 17 |
'call_gemini(text,key)','call_gpt(text,key,model)',
|
| 18 |
'task_id<=launch(func_name,*args)','poll(task_id)']}
|
| 19 |
|
|
@@ -89,27 +99,6 @@ def encode():
|
|
| 89 |
embedding=emb_array.tolist()
|
| 90 |
return jsonify({'embedding': embedding})
|
| 91 |
|
| 92 |
-
@app.route("/file/<string:filename>")
|
| 93 |
-
def return_file(filename):
|
| 94 |
-
return send_file('./data/'+filename)
|
| 95 |
-
|
| 96 |
-
@app.route('/run',methods=['GET','POST'])
|
| 97 |
-
def run_script():
|
| 98 |
-
script=''
|
| 99 |
-
# print(request.method)
|
| 100 |
-
print(request)
|
| 101 |
-
if request.method=='GET':
|
| 102 |
-
script=request.args.get('script')
|
| 103 |
-
print('I am in get')
|
| 104 |
-
elif request.method=='POST':
|
| 105 |
-
print('I am in post')
|
| 106 |
-
data=request.get_json()
|
| 107 |
-
if 'script' in data: script=data['script']
|
| 108 |
-
if script=='' or script is None: return 'INVALID'
|
| 109 |
-
os.system(script+' > ./out.txt')
|
| 110 |
-
with open('./out.txt','r') as f: output=f.read()
|
| 111 |
-
return output
|
| 112 |
-
|
| 113 |
@app.route('/',methods=['GET', 'POST'])
|
| 114 |
def home():
|
| 115 |
return render_template('home.html',messageD=MESSAGED)
|
|
|
|
| 11 |
# anvil.server.connect('PLMOIU5VCGGUOJH2XORIBWV3-ZXZVFLWX7QFIIAF4')
|
| 12 |
anvil.server.connect("S3SLHUQ2BB33NVTP7FWRAOHS-NDSRD7CDALRPSPLL")
|
| 13 |
|
| 14 |
+
from sentence_transformers import SentenceTransformer
|
| 15 |
+
from sentence_transformers.util import cos_sim
|
| 16 |
+
# model = SentenceTransformer('thenlper/gte-large')
|
| 17 |
+
model = SentenceTransformer('BAAI/bge-large-en')
|
| 18 |
+
|
| 19 |
+
@anvil.server.callable
|
| 20 |
+
def encode(sentence = None):
|
| 21 |
+
vec = model.encode(sentence)
|
| 22 |
+
return [float(val) if isinstance(val, (int, float, np.float32)) else 0.0 for val in vec]
|
| 23 |
+
|
| 24 |
app=Flask(__name__)
|
| 25 |
+
MESSAGED={'title':'API Server for ICAPP',
|
| 26 |
+
'messageL':['published server functions:','encode_anvil(text)', 'encode(sentence)',
|
| 27 |
'call_gemini(text,key)','call_gpt(text,key,model)',
|
| 28 |
'task_id<=launch(func_name,*args)','poll(task_id)']}
|
| 29 |
|
|
|
|
| 99 |
embedding=emb_array.tolist()
|
| 100 |
return jsonify({'embedding': embedding})
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
@app.route('/',methods=['GET', 'POST'])
|
| 103 |
def home():
|
| 104 |
return render_template('home.html',messageD=MESSAGED)
|
app_gradio.py
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
from threading import Thread
|
| 3 |
-
import time
|
| 4 |
-
import anvil.server
|
| 5 |
-
import os
|
| 6 |
-
anvil.server.connect('55MH4EBKM22EP4E6D5T6CVSL-VGO5X4SM6JEXGJVT')
|
| 7 |
-
import json
|
| 8 |
-
import ast
|
| 9 |
-
|
| 10 |
-
def run_script(scriptname):
|
| 11 |
-
# return scriptname
|
| 12 |
-
os.system(scriptname+' > ./out.txt')
|
| 13 |
-
with open('./out.txt','r') as f: output=f.read()
|
| 14 |
-
return output
|
| 15 |
-
|
| 16 |
-
@anvil.server.callable
|
| 17 |
-
def run_command(scriptname):
|
| 18 |
-
os.system(scriptname+' > ./out.txt')
|
| 19 |
-
with open('./out.txt','r') as f: output=f.read()
|
| 20 |
-
return output
|
| 21 |
-
|
| 22 |
-
@anvil.server.callable
|
| 23 |
-
def get_file(filename):
|
| 24 |
-
m = BlobMedia('text/plain', 'Hello, world!', name='hello.txt')
|
| 25 |
-
return m
|
| 26 |
-
|
| 27 |
-
gradio_interface = gr.Interface(
|
| 28 |
-
fn=run_script,
|
| 29 |
-
inputs="text",
|
| 30 |
-
outputs="text",
|
| 31 |
-
title="REST API with Gradio and Huggingface Spaces",
|
| 32 |
-
description='''Inputs should be json of test item e.g., as a dictionary;
|
| 33 |
-
output right now is just returning the input; later label will be returned.
|
| 34 |
-
|
| 35 |
-
This is how to call the API from Python:
|
| 36 |
-
|
| 37 |
-
import requests
|
| 38 |
-
|
| 39 |
-
response = requests.post("https://gmshroff-gmserver.hf.space/run/predict", json={
|
| 40 |
-
"data": [
|
| 41 |
-
"\<put some json string here\>",
|
| 42 |
-
]}).json()
|
| 43 |
-
|
| 44 |
-
data = response["data"])
|
| 45 |
-
|
| 46 |
-
''')
|
| 47 |
-
|
| 48 |
-
gradio_interface.launch()
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -7,3 +7,4 @@ numpy
|
|
| 7 |
transformers
|
| 8 |
google-generativeai
|
| 9 |
openai
|
|
|
|
|
|
| 7 |
transformers
|
| 8 |
google-generativeai
|
| 9 |
openai
|
| 10 |
+
sentence-transformers
|
scripts/update_valdata.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python
|
| 2 |
-
# coding: utf-8
|
| 3 |
-
|
| 4 |
-
# In[ ]:
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
from numerapi import NumerAPI
|
| 8 |
-
import os
|
| 9 |
-
import pandas as pd
|
| 10 |
-
import numpy as np
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
# In[ ]:
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
napi = NumerAPI()
|
| 17 |
-
data_path='./data/'
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# In[ ]:
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
napi.download_dataset("v4.2/validation_int8.parquet", data_path+"validation_int8.parquet")
|
| 24 |
-
validation_data=pd.read_parquet(data_path+"validation_int8.parquet")
|
| 25 |
-
recent_eras=list(validation_data.loc[validation_data['data_type']=='validation']['era'].unique()[-2:])
|
| 26 |
-
validation_subset=validation_data[validation_data['era'].isin(recent_eras)]
|
| 27 |
-
validation_subset.to_parquet(data_path+"validation_subset_int8.parquet",index=False)
|
| 28 |
-
# napi.download_dataset('v4.2/live_int8.parquet',data_path+'live_int8.parquet')
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
# In[ ]:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# print("Now please copy the file to server via: scp ../../data/validation_subset_int8.parquet gms@gms1:/home/gms/numerai/data/.")
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|