Copuchat / app.py
ouhenio's picture
Update app.py
7764644 verified
import gradio as gr
import os
import sys
import json
import random
import hashlib
import requests
import tempfile
from datetime import datetime
from openai import OpenAI
from huggingface_hub import upload_file, list_repo_files, create_repo, hf_hub_download
MODEL = "gpt-4.1-mini"
def get_env_bool(key, default="False"):
value = os.getenv(key, default)
if isinstance(value, bool):
return value
return str(value).lower() in ('true', '1', 'yes', 'on')
def get_env_list(key, default=""):
value = os.getenv(key, default)
if not value or value == "":
return []
if value.startswith('[') and value.endswith(']'):
try:
parsed = json.loads(value)
if isinstance(parsed, list):
return [str(item).strip() for item in parsed if str(item).strip()]
except json.JSONDecodeError:
pass
return [item.strip() for item in str(value).split(',') if item.strip()]
DISABLED = get_env_bool("DISABLED", "False")
OPENAI_API_KEYS = get_env_list("OPENAI_API_KEYS", "")
NUM_THREADS = int(os.getenv("NUM_THREADS", "4"))
IP_SALT = os.getenv("IP_SALT", "latamgpt-default-salt-2025")
HF_TOKEN = os.getenv("HF_TOKEN")
DATASET_REPO = os.getenv("DATASET_REPO", "latam-gpt/copuchat-conversations")
def exception_handler(exception_type, exception, traceback):
print(f"{exception_type.__name__}: {exception}")
sys.excepthook = exception_handler
sys.tracebacklimit = 0
def get_user_fingerprint(request):
real_ip = (
request.headers.get('x-forwarded-for', '').split(',')[0].strip() or
request.headers.get('x-real-ip', '') or
getattr(request, 'client', {}).get('host', 'unknown')
)
fingerprint_data = f"{real_ip}:{IP_SALT}"
user_fingerprint = hashlib.sha256(fingerprint_data.encode()).hexdigest()[:16]
return real_ip, user_fingerprint
def get_country_from_ip(ip):
try:
response = requests.get(f"http://ip-api.com/json/{ip}", timeout=2)
if response.status_code == 200:
data = response.json()
return {
"country": data.get('country', 'Unknown'),
"country_code": data.get('countryCode', 'UN'),
"region": data.get('regionName', 'Unknown')
}
except:
pass
return {"country": "Unknown", "country_code": "UN", "region": "Unknown"}
def generate_conversation_hash(session_id, user_fingerprint):
return hashlib.sha256(f"{session_id}:{user_fingerprint}".encode()).hexdigest()[:12]
def generate_conversation_filename(session_id, user_fingerprint, timestamp):
conversation_hash = generate_conversation_hash(session_id, user_fingerprint)
timestamp_str = timestamp.strftime('%Y%m%d_%H%M%S_%f')
return f"conversations/{timestamp_str}_{conversation_hash}.jsonl"
def get_conversation_files():
if not HF_TOKEN:
return []
try:
files = list_repo_files(repo_id=DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
return sorted([f for f in files if f.startswith("conversations/") and f.endswith(".jsonl")])
except:
return []
def get_global_chat_counter():
conversation_files = get_conversation_files()
return len(conversation_files) + 1
def find_existing_conversation(session_id, user_fingerprint):
conversation_hash = generate_conversation_hash(session_id, user_fingerprint)
conversation_files = get_conversation_files()
matching_files = [f for f in conversation_files if f.endswith(f"_{conversation_hash}.jsonl")]
if matching_files:
try:
latest_file = matching_files[-1]
local_file = hf_hub_download(
repo_id=DATASET_REPO,
repo_type="dataset",
filename=latest_file,
token=HF_TOKEN
)
with open(local_file, 'r') as f:
return json.load(f)
except:
pass
return None
def upload_conversation(conversation_data, session_id, user_fingerprint):
if not HF_TOKEN:
return
try:
try:
create_repo(
repo_id=DATASET_REPO,
repo_type="dataset",
private=True,
exist_ok=True,
token=HF_TOKEN
)
except:
pass
conversation_hash = generate_conversation_hash(session_id, user_fingerprint)
conversation_files = get_conversation_files()
matching_files = [f for f in conversation_files if f.endswith(f"_{conversation_hash}.jsonl")]
if matching_files:
filename = matching_files[-1]
else:
filename = generate_conversation_filename(session_id, user_fingerprint, datetime.now())
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f:
json.dump(conversation_data, f)
temp_path = f.name
upload_file(
path_or_fileobj=temp_path,
path_in_repo=filename,
repo_id=DATASET_REPO,
repo_type="dataset",
token=HF_TOKEN
)
os.unlink(temp_path)
except Exception as e:
print(f"Upload failed: {e}")
GLOBAL_CHAT_COUNTER = get_global_chat_counter()
print(f"Starting global chat counter at: {GLOBAL_CHAT_COUNTER}")
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request):
global GLOBAL_CHAT_COUNTER
if not OPENAI_API_KEYS or not OPENAI_API_KEYS[0]:
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "No API keys configured", gr.update(interactive=True), gr.update(interactive=True)
return
api_key = random.choice(OPENAI_API_KEYS)
client = OpenAI(api_key=api_key)
session_id = getattr(request, 'session_hash', 'unknown')
real_ip, user_fingerprint = get_user_fingerprint(request)
geo_info = get_country_from_ip(real_ip)
headers_dict = {key.decode('utf-8'): value.decode('utf-8') for key, value in request.headers.raw}
existing_conversation = find_existing_conversation(session_id, user_fingerprint) if chat_counter == 0 else None
if existing_conversation:
history = existing_conversation['messages_history']
chat_counter = existing_conversation['chat_counter']
chatbot = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)]
messages = []
for i, data in enumerate(history):
role = 'user' if i % 2 == 0 else 'assistant'
messages.append({"role": role, "content": data})
messages.append({"role": "user", "content": inputs})
GLOBAL_CHAT_COUNTER += 1
global_counter = GLOBAL_CHAT_COUNTER
chat_counter += 1
history.append(inputs)
token_counter = 0
partial_words = ""
try:
stream = client.chat.completions.create(
model=MODEL,
messages=messages,
temperature=temperature,
top_p=top_p,
stream=True,
presence_penalty=0,
frequency_penalty=0,
max_tokens=4096
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
partial_words += chunk.choices[0].delta.content
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
token_counter += 1
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=False), gr.update(interactive=False)
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=True), gr.update(interactive=True)
except Exception as e:
print(f'Error API OpenAI: {e}')
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, str(e), gr.update(interactive=True), gr.update(interactive=True)
conversation_data = {
"session_id": session_id,
"user_fingerprint": user_fingerprint,
"conversation_id": f"{session_id}_{datetime.now().strftime('%Y%m%d_%H')}",
"conversation_hash": generate_conversation_hash(session_id, user_fingerprint),
"country": geo_info["country"],
"country_code": geo_info["country_code"],
"region": geo_info["region"],
"chat_counter": chat_counter,
"global_chat_counter": global_counter,
"model": MODEL,
"messages": messages,
"messages_history": history,
"response": partial_words,
"headers": headers_dict,
"temperature": temperature,
"top_p": top_p,
"token_counter": token_counter,
"timestamp": datetime.now().isoformat(),
"last_updated": datetime.now().isoformat()
}
print(json.dumps({k: v for k, v in conversation_data.items() if k != 'messages_history'}))
upload_conversation(conversation_data, session_id, user_fingerprint)
def reset_textbox():
return gr.update(value='', interactive=False), gr.update(interactive=False)
title = """<h1 align="center">Copuchat: Recolecci贸n de datos para LatamGPT</h1>"""
if DISABLED:
title = """<h1 align="center" style="color:red">Esta app alcanz贸 su l铆mite de uso. Porfavor intenta reingresar ma帽ana.</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of GPT-4.1 mini while contributing to LatamGPT research.
"""
with gr.Blocks(css="""
#col_container {
margin-left: auto;
margin-right: auto;
max-width: 1200px;
width: 95%;
}
#chatbot {
height: 1200px;
overflow: auto;
}
.gradio-container {
max-width: unset !important;
}
#component-0 {
max-width: unset;
}
""") as demo:
gr.HTML(title)
with gr.Column(elem_id="col_container", visible=False) as main_block:
chatbot = gr.Chatbot(elem_id='chatbot')
inputs = gr.Textbox(
placeholder="",
label="Escribe tu mensaje y presiona Enter",
lines=3,
max_lines=8,
scale=1
)
state = gr.State([])
with gr.Row():
with gr.Column(scale=7):
b1 = gr.Button(visible=not DISABLED)
with gr.Column(scale=3):
server_status_code = gr.Textbox(label="C贸digo de estado del servidor")
with gr.Accordion("Par谩metros", open=False):
top_p = gr.Slider(minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (muestreo de n煤cleo)")
temperature = gr.Slider(minimum=0, maximum=2.0, value=0.2, step=0.1, interactive=True, label="Temperatura")
chat_counter = gr.Number(value=0, visible=False, precision=0)
with gr.Column(elem_id="user_consent_container") as user_consent_block:
accept_checkbox = gr.Checkbox(visible=False)
js = "(x) => confirm('Al hacer clic en \"Acepto\", acepto que mis datos pueden ser publicados o compartidos para investigaci贸n.')"
with gr.Accordion("Consentimiento de Usuario para Recolecci贸n, Uso y Compartici贸n de Datos", open=True):
gr.HTML("""
<div>
<p>Al usar nuestra aplicaci贸n, que funciona con la API de OpenAI, reconoces y aceptas los siguientes t茅rminos sobre los datos que proporcionas:</p>
<ol>
<li><strong>Recolecci贸n:</strong> Podemos recopilar informaci贸n, incluyendo las entradas que escribes en nuestra aplicaci贸n, las salidas generadas por la API de OpenAI, y ciertos detalles t茅cnicos sobre tu dispositivo y conexi贸n (como tipo de navegador, sistema operativo y ubicaci贸n geogr谩fica) proporcionados por los headers de solicitud de tu dispositivo. Estos datos pasaran por un proceso de anonimizaci贸n para evitar la recolecci贸n de informaci贸n privada.</li>
<li><strong>Uso:</strong> Podemos usar los datos recopilados para prop贸sitos de investigaci贸n y desarrollo de LatamGPT.</li>
<li><strong>Compartici贸n y Publicaci贸n:</strong> Los datos recolectados, incluyendo los detalles t茅cnicos recopilados de los headers de solicitud de tu dispositivo, pueden ser publicados, compartidos con terceros, o usados para an谩lisis y prop贸sitos de reportes.</li>
<li><strong>Retenci贸n de Datos:</strong> Podemos retener tus datos anonimizados, incluyendo los detalles t茅cnicos recopilados de los headers de solicitud de tu dispositivo, por el tiempo que sea necesario.</li>
</ol>
<p>Al continuar usando nuestra aplicaci贸n, proporcionas tu consentimiento expl铆cito para la recolecci贸n, uso y potencial compartici贸n de tus datos como se describe arriba. Si no est谩s de acuerdo con nuestras pr谩cticas de recolecci贸n, uso y compartici贸n de datos, por favor no uses nuestra aplicaci贸n.</p>
<p><strong>Este proyecto contribuye al desarrollo de LatamGPT, un modelo de lenguaje para Am茅rica Latina.</strong></p>
</div>
""")
accept_button = gr.Button("Acepto / I Agree")
def enable_inputs():
return gr.update(visible=False), gr.update(visible=True)
accept_button.click(None, None, accept_checkbox, js=js, queue=False)
accept_checkbox.change(fn=enable_inputs, inputs=[], outputs=[user_consent_block, main_block], queue=False)
inputs.submit(reset_textbox, [], [inputs, b1], queue=False)
inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1])
b1.click(reset_textbox, [], [inputs, b1], queue=False)
b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1])
if __name__ == "__main__":
demo.launch()