Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ import requests
|
|
8 |
import tempfile
|
9 |
from datetime import datetime
|
10 |
from openai import OpenAI
|
11 |
-
from huggingface_hub import upload_file
|
12 |
|
13 |
MODEL = "gpt-4.1-mini"
|
14 |
|
@@ -39,7 +39,7 @@ OPENAI_API_KEYS = get_env_list("OPENAI_API_KEYS", "")
|
|
39 |
NUM_THREADS = int(os.getenv("NUM_THREADS", "4"))
|
40 |
IP_SALT = os.getenv("IP_SALT", "latamgpt-default-salt-2025")
|
41 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
42 |
-
DATASET_REPO = os.getenv("DATASET_REPO", "
|
43 |
|
44 |
def exception_handler(exception_type, exception, traceback):
|
45 |
print(f"{exception_type.__name__}: {exception}")
|
@@ -75,13 +75,25 @@ def upload_to_dataset(log_data):
|
|
75 |
if not HF_TOKEN:
|
76 |
return
|
77 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f:
|
79 |
f.write(json.dumps(log_data) + '\n')
|
80 |
temp_path = f.name
|
81 |
|
82 |
-
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
|
83 |
-
filename = f"conversations/conv_{timestamp}.jsonl"
|
84 |
-
|
85 |
upload_file(
|
86 |
path_or_fileobj=temp_path,
|
87 |
path_in_repo=filename,
|
@@ -95,7 +107,43 @@ def upload_to_dataset(log_data):
|
|
95 |
except Exception as e:
|
96 |
print(f"Upload failed: {e}")
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request):
|
|
|
|
|
99 |
if not OPENAI_API_KEYS or not OPENAI_API_KEYS[0]:
|
100 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "No API keys configured", gr.update(interactive=True), gr.update(interactive=True)
|
101 |
return
|
@@ -116,6 +164,8 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
|
|
116 |
|
117 |
messages.append({"role": "user", "content": inputs})
|
118 |
|
|
|
|
|
119 |
chat_counter += 1
|
120 |
history.append(inputs)
|
121 |
token_counter = 0
|
@@ -143,11 +193,10 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
|
|
143 |
token_counter += 1
|
144 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=False), gr.update(interactive=False)
|
145 |
|
146 |
-
# Re-enable inputs after streaming completes
|
147 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=True), gr.update(interactive=True)
|
148 |
|
149 |
except Exception as e:
|
150 |
-
print(f'
|
151 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, str(e), gr.update(interactive=True), gr.update(interactive=True)
|
152 |
|
153 |
log_data = {
|
@@ -158,6 +207,7 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
|
|
158 |
"country_code": geo_info["country_code"],
|
159 |
"region": geo_info["region"],
|
160 |
"chat_counter": chat_counter,
|
|
|
161 |
"model": MODEL,
|
162 |
"messages": messages,
|
163 |
"response": partial_words,
|
@@ -173,9 +223,9 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
|
|
173 |
def reset_textbox():
|
174 |
return gr.update(value='', interactive=False), gr.update(interactive=False)
|
175 |
|
176 |
-
title = """<h1 align="center">
|
177 |
if DISABLED:
|
178 |
-
title = """<h1 align="center" style="color:red">
|
179 |
|
180 |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|
181 |
```
|
@@ -194,18 +244,18 @@ with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;}
|
|
194 |
|
195 |
with gr.Column(elem_id="col_container", visible=False) as main_block:
|
196 |
chatbot = gr.Chatbot(elem_id='chatbot')
|
197 |
-
inputs = gr.Textbox(placeholder="
|
198 |
state = gr.State([])
|
199 |
|
200 |
with gr.Row():
|
201 |
with gr.Column(scale=7):
|
202 |
b1 = gr.Button(visible=not DISABLED)
|
203 |
with gr.Column(scale=3):
|
204 |
-
server_status_code = gr.Textbox(label="
|
205 |
|
206 |
-
with gr.Accordion("
|
207 |
-
top_p = gr.Slider(minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (
|
208 |
-
temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, interactive=True, label="
|
209 |
chat_counter = gr.Number(value=0, visible=False, precision=0)
|
210 |
|
211 |
with gr.Column(elem_id="user_consent_container") as user_consent_block:
|
@@ -217,8 +267,9 @@ with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;}
|
|
217 |
<div>
|
218 |
<p>Al usar nuestra aplicaci贸n, que funciona con la API de OpenAI, reconoces y aceptas los siguientes t茅rminos sobre los datos que proporcionas:</p>
|
219 |
<ol>
|
220 |
-
<li><strong>Recolecci贸n:</strong> Podemos recopilar informaci贸n, incluyendo las entradas que escribes en nuestra aplicaci贸n, las salidas generadas por la API de OpenAI, y ciertos detalles t茅cnicos sobre tu dispositivo y conexi贸n (como tipo de navegador, sistema operativo
|
221 |
-
<li><strong>Uso:</strong> Podemos usar los datos recopilados para prop贸sitos de investigaci贸n
|
|
|
222 |
<li><strong>Compartici贸n y Publicaci贸n:</strong> Tus datos, incluyendo los detalles t茅cnicos recopilados de los headers de solicitud de tu dispositivo, pueden ser publicados, compartidos con terceros, o usados para an谩lisis y prop贸sitos de reportes.</li>
|
223 |
<li><strong>Retenci贸n de Datos:</strong> Podemos retener tus datos, incluyendo los detalles t茅cnicos recopilados de los headers de solicitud de tu dispositivo, por el tiempo que sea necesario.</li>
|
224 |
</ol>
|
|
|
8 |
import tempfile
|
9 |
from datetime import datetime
|
10 |
from openai import OpenAI
|
11 |
+
from huggingface_hub import upload_file, list_repo_files, create_repo
|
12 |
|
13 |
MODEL = "gpt-4.1-mini"
|
14 |
|
|
|
39 |
NUM_THREADS = int(os.getenv("NUM_THREADS", "4"))
|
40 |
IP_SALT = os.getenv("IP_SALT", "latamgpt-default-salt-2025")
|
41 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
42 |
+
DATASET_REPO = os.getenv("DATASET_REPO", "latamgpt/copuchat-conversations")
|
43 |
|
44 |
def exception_handler(exception_type, exception, traceback):
|
45 |
print(f"{exception_type.__name__}: {exception}")
|
|
|
75 |
if not HF_TOKEN:
|
76 |
return
|
77 |
try:
|
78 |
+
try:
|
79 |
+
create_repo(
|
80 |
+
repo_id=DATASET_REPO,
|
81 |
+
repo_type="dataset",
|
82 |
+
private=True,
|
83 |
+
exist_ok=True,
|
84 |
+
token=HF_TOKEN
|
85 |
+
)
|
86 |
+
except:
|
87 |
+
pass
|
88 |
+
|
89 |
+
# Create individual files with timestamp to avoid conflicts
|
90 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
|
91 |
+
filename = f"raw/{timestamp}.jsonl"
|
92 |
+
|
93 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f:
|
94 |
f.write(json.dumps(log_data) + '\n')
|
95 |
temp_path = f.name
|
96 |
|
|
|
|
|
|
|
97 |
upload_file(
|
98 |
path_or_fileobj=temp_path,
|
99 |
path_in_repo=filename,
|
|
|
107 |
except Exception as e:
|
108 |
print(f"Upload failed: {e}")
|
109 |
|
110 |
+
def get_global_chat_counter():
|
111 |
+
if not HF_TOKEN:
|
112 |
+
return 1
|
113 |
+
try:
|
114 |
+
files = list_repo_files(repo_id=DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
|
115 |
+
conversation_files = [f for f in files if f.startswith("conversations/") and f.endswith(".jsonl")]
|
116 |
+
|
117 |
+
if not conversation_files:
|
118 |
+
return 1
|
119 |
+
|
120 |
+
max_counter = 0
|
121 |
+
for file in conversation_files[-10:]: # Check last 10 files for efficiency
|
122 |
+
try:
|
123 |
+
from huggingface_hub import hf_hub_download
|
124 |
+
local_file = hf_hub_download(
|
125 |
+
repo_id=DATASET_REPO,
|
126 |
+
repo_type="dataset",
|
127 |
+
filename=file,
|
128 |
+
token=HF_TOKEN
|
129 |
+
)
|
130 |
+
with open(local_file, 'r') as f:
|
131 |
+
data = json.load(f)
|
132 |
+
counter = data.get('chat_counter', 0)
|
133 |
+
max_counter = max(max_counter, counter)
|
134 |
+
except:
|
135 |
+
continue
|
136 |
+
|
137 |
+
return max_counter + 1
|
138 |
+
except:
|
139 |
+
return 1
|
140 |
+
|
141 |
+
GLOBAL_CHAT_COUNTER = get_global_chat_counter()
|
142 |
+
print(f"Starting global chat counter at: {GLOBAL_CHAT_COUNTER}")
|
143 |
+
|
144 |
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request):
|
145 |
+
global GLOBAL_CHAT_COUNTER
|
146 |
+
|
147 |
if not OPENAI_API_KEYS or not OPENAI_API_KEYS[0]:
|
148 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "No API keys configured", gr.update(interactive=True), gr.update(interactive=True)
|
149 |
return
|
|
|
164 |
|
165 |
messages.append({"role": "user", "content": inputs})
|
166 |
|
167 |
+
GLOBAL_CHAT_COUNTER += 1
|
168 |
+
global_counter = GLOBAL_CHAT_COUNTER
|
169 |
chat_counter += 1
|
170 |
history.append(inputs)
|
171 |
token_counter = 0
|
|
|
193 |
token_counter += 1
|
194 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=False), gr.update(interactive=False)
|
195 |
|
|
|
196 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=True), gr.update(interactive=True)
|
197 |
|
198 |
except Exception as e:
|
199 |
+
print(f'Error API OpenAI: {e}')
|
200 |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, str(e), gr.update(interactive=True), gr.update(interactive=True)
|
201 |
|
202 |
log_data = {
|
|
|
207 |
"country_code": geo_info["country_code"],
|
208 |
"region": geo_info["region"],
|
209 |
"chat_counter": chat_counter,
|
210 |
+
"global_chat_counter": global_counter,
|
211 |
"model": MODEL,
|
212 |
"messages": messages,
|
213 |
"response": partial_words,
|
|
|
223 |
def reset_textbox():
|
224 |
return gr.update(value='', interactive=False), gr.update(interactive=False)
|
225 |
|
226 |
+
title = """<h1 align="center">Copuchat: Recolecci贸n de datos para LatamGPT</h1>"""
|
227 |
if DISABLED:
|
228 |
+
title = """<h1 align="center" style="color:red">Esta app alcanz贸 su l铆mite de uso. Porfavor intenta reingresar ma帽ana.</h1>"""
|
229 |
|
230 |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|
231 |
```
|
|
|
244 |
|
245 |
with gr.Column(elem_id="col_container", visible=False) as main_block:
|
246 |
chatbot = gr.Chatbot(elem_id='chatbot')
|
247 |
+
inputs = gr.Textbox(placeholder="", label="Escribe tu mensaje y presiona Enter")
|
248 |
state = gr.State([])
|
249 |
|
250 |
with gr.Row():
|
251 |
with gr.Column(scale=7):
|
252 |
b1 = gr.Button(visible=not DISABLED)
|
253 |
with gr.Column(scale=3):
|
254 |
+
server_status_code = gr.Textbox(label="C贸digo de estado del servidor")
|
255 |
|
256 |
+
with gr.Accordion("Par谩metros", open=False):
|
257 |
+
top_p = gr.Slider(minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (muestreo de n煤cleo)")
|
258 |
+
temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, interactive=True, label="Temperatura")
|
259 |
chat_counter = gr.Number(value=0, visible=False, precision=0)
|
260 |
|
261 |
with gr.Column(elem_id="user_consent_container") as user_consent_block:
|
|
|
267 |
<div>
|
268 |
<p>Al usar nuestra aplicaci贸n, que funciona con la API de OpenAI, reconoces y aceptas los siguientes t茅rminos sobre los datos que proporcionas:</p>
|
269 |
<ol>
|
270 |
+
<li><strong>Recolecci贸n:</strong> Podemos recopilar informaci贸n, incluyendo las entradas que escribes en nuestra aplicaci贸n, las salidas generadas por la API de OpenAI, y ciertos detalles t茅cnicos sobre tu dispositivo y conexi贸n (como tipo de navegador, sistema operativo y ubicaci贸n geogr谩fica) proporcionados por los headers de solicitud de tu dispositivo.</li>
|
271 |
+
<li><strong>Uso:</strong> Podemos usar los datos recopilados para prop贸sitos de investigaci贸n y desarrollo de LatamGPT.</li>
|
272 |
+
<li><strong>Uso:</strong> Tus datos pasaran por un proceso de anonimizaci贸n para evitar la recolecci贸n de informaci贸n privada.</li>
|
273 |
<li><strong>Compartici贸n y Publicaci贸n:</strong> Tus datos, incluyendo los detalles t茅cnicos recopilados de los headers de solicitud de tu dispositivo, pueden ser publicados, compartidos con terceros, o usados para an谩lisis y prop贸sitos de reportes.</li>
|
274 |
<li><strong>Retenci贸n de Datos:</strong> Podemos retener tus datos, incluyendo los detalles t茅cnicos recopilados de los headers de solicitud de tu dispositivo, por el tiempo que sea necesario.</li>
|
275 |
</ol>
|