import gradio as gr import os import sys import json import random import hashlib import requests import tempfile from datetime import datetime from openai import OpenAI from huggingface_hub import upload_file, list_repo_files, create_repo, hf_hub_download MODEL = "gpt-4.1-mini" def get_env_bool(key, default="False"): value = os.getenv(key, default) if isinstance(value, bool): return value return str(value).lower() in ('true', '1', 'yes', 'on') def get_env_list(key, default=""): value = os.getenv(key, default) if not value or value == "": return [] if value.startswith('[') and value.endswith(']'): try: parsed = json.loads(value) if isinstance(parsed, list): return [str(item).strip() for item in parsed if str(item).strip()] except json.JSONDecodeError: pass return [item.strip() for item in str(value).split(',') if item.strip()] DISABLED = get_env_bool("DISABLED", "False") OPENAI_API_KEYS = get_env_list("OPENAI_API_KEYS", "") NUM_THREADS = int(os.getenv("NUM_THREADS", "4")) IP_SALT = os.getenv("IP_SALT", "latamgpt-default-salt-2025") HF_TOKEN = os.getenv("HF_TOKEN") DATASET_REPO = os.getenv("DATASET_REPO", "latam-gpt/copuchat-conversations") def exception_handler(exception_type, exception, traceback): print(f"{exception_type.__name__}: {exception}") sys.excepthook = exception_handler sys.tracebacklimit = 0 def get_user_fingerprint(request): real_ip = ( request.headers.get('x-forwarded-for', '').split(',')[0].strip() or request.headers.get('x-real-ip', '') or getattr(request, 'client', {}).get('host', 'unknown') ) fingerprint_data = f"{real_ip}:{IP_SALT}" user_fingerprint = hashlib.sha256(fingerprint_data.encode()).hexdigest()[:16] return real_ip, user_fingerprint def get_country_from_ip(ip): try: response = requests.get(f"http://ip-api.com/json/{ip}", timeout=2) if response.status_code == 200: data = response.json() return { "country": data.get('country', 'Unknown'), "country_code": data.get('countryCode', 'UN'), "region": data.get('regionName', 'Unknown') } except: pass return {"country": "Unknown", "country_code": "UN", "region": "Unknown"} def generate_conversation_hash(session_id, user_fingerprint): return hashlib.sha256(f"{session_id}:{user_fingerprint}".encode()).hexdigest()[:12] def generate_conversation_filename(session_id, user_fingerprint, timestamp): conversation_hash = generate_conversation_hash(session_id, user_fingerprint) timestamp_str = timestamp.strftime('%Y%m%d_%H%M%S_%f') return f"conversations/{timestamp_str}_{conversation_hash}.jsonl" def get_conversation_files(): if not HF_TOKEN: return [] try: files = list_repo_files(repo_id=DATASET_REPO, repo_type="dataset", token=HF_TOKEN) return sorted([f for f in files if f.startswith("conversations/") and f.endswith(".jsonl")]) except: return [] def get_global_chat_counter(): conversation_files = get_conversation_files() return len(conversation_files) + 1 def find_existing_conversation(session_id, user_fingerprint): conversation_hash = generate_conversation_hash(session_id, user_fingerprint) conversation_files = get_conversation_files() matching_files = [f for f in conversation_files if f.endswith(f"_{conversation_hash}.jsonl")] if matching_files: try: latest_file = matching_files[-1] local_file = hf_hub_download( repo_id=DATASET_REPO, repo_type="dataset", filename=latest_file, token=HF_TOKEN ) with open(local_file, 'r') as f: return json.load(f) except: pass return None def upload_conversation(conversation_data, session_id, user_fingerprint): if not HF_TOKEN: return try: try: create_repo( repo_id=DATASET_REPO, repo_type="dataset", private=True, exist_ok=True, token=HF_TOKEN ) except: pass conversation_hash = generate_conversation_hash(session_id, user_fingerprint) conversation_files = get_conversation_files() matching_files = [f for f in conversation_files if f.endswith(f"_{conversation_hash}.jsonl")] if matching_files: filename = matching_files[-1] else: filename = generate_conversation_filename(session_id, user_fingerprint, datetime.now()) with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f: json.dump(conversation_data, f) temp_path = f.name upload_file( path_or_fileobj=temp_path, path_in_repo=filename, repo_id=DATASET_REPO, repo_type="dataset", token=HF_TOKEN ) os.unlink(temp_path) except Exception as e: print(f"Upload failed: {e}") GLOBAL_CHAT_COUNTER = get_global_chat_counter() print(f"Starting global chat counter at: {GLOBAL_CHAT_COUNTER}") def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request): global GLOBAL_CHAT_COUNTER if not OPENAI_API_KEYS or not OPENAI_API_KEYS[0]: yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "No API keys configured", gr.update(interactive=True), gr.update(interactive=True) return api_key = random.choice(OPENAI_API_KEYS) client = OpenAI(api_key=api_key) session_id = getattr(request, 'session_hash', 'unknown') real_ip, user_fingerprint = get_user_fingerprint(request) geo_info = get_country_from_ip(real_ip) headers_dict = {key.decode('utf-8'): value.decode('utf-8') for key, value in request.headers.raw} existing_conversation = find_existing_conversation(session_id, user_fingerprint) if chat_counter == 0 else None if existing_conversation: history = existing_conversation['messages_history'] chat_counter = existing_conversation['chat_counter'] chatbot = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)] messages = [] for i, data in enumerate(history): role = 'user' if i % 2 == 0 else 'assistant' messages.append({"role": role, "content": data}) messages.append({"role": "user", "content": inputs}) GLOBAL_CHAT_COUNTER += 1 global_counter = GLOBAL_CHAT_COUNTER chat_counter += 1 history.append(inputs) token_counter = 0 partial_words = "" try: stream = client.chat.completions.create( model=MODEL, messages=messages, temperature=temperature, top_p=top_p, stream=True, presence_penalty=0, frequency_penalty=0, max_tokens=4096 ) for chunk in stream: if chunk.choices[0].delta.content is not None: partial_words += chunk.choices[0].delta.content if token_counter == 0: history.append(" " + partial_words) else: history[-1] = partial_words token_counter += 1 yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=False), gr.update(interactive=False) yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=True), gr.update(interactive=True) except Exception as e: print(f'Error API OpenAI: {e}') yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, str(e), gr.update(interactive=True), gr.update(interactive=True) conversation_data = { "session_id": session_id, "user_fingerprint": user_fingerprint, "conversation_id": f"{session_id}_{datetime.now().strftime('%Y%m%d_%H')}", "conversation_hash": generate_conversation_hash(session_id, user_fingerprint), "country": geo_info["country"], "country_code": geo_info["country_code"], "region": geo_info["region"], "chat_counter": chat_counter, "global_chat_counter": global_counter, "model": MODEL, "messages": messages, "messages_history": history, "response": partial_words, "headers": headers_dict, "temperature": temperature, "top_p": top_p, "token_counter": token_counter, "timestamp": datetime.now().isoformat(), "last_updated": datetime.now().isoformat() } print(json.dumps({k: v for k, v in conversation_data.items() if k != 'messages_history'})) upload_conversation(conversation_data, session_id, user_fingerprint) def reset_textbox(): return gr.update(value='', interactive=False), gr.update(interactive=False) title = """
Al usar nuestra aplicación, que funciona con la API de OpenAI, reconoces y aceptas los siguientes términos sobre los datos que proporcionas:
Al continuar usando nuestra aplicación, proporcionas tu consentimiento explícito para la recolección, uso y potencial compartición de tus datos como se describe arriba. Si no estás de acuerdo con nuestras prácticas de recolección, uso y compartición de datos, por favor no uses nuestra aplicación.
Este proyecto contribuye al desarrollo de LatamGPT, un modelo de lenguaje para América Latina.