Copuchat

Running

App Files Files Community

ouhenio commited on 23 days ago

Commit

1836e90

verified ·

1 Parent(s): 43ce33c

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -16

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import requests
 import tempfile
 from datetime import datetime
 from openai import OpenAI
-from huggingface_hub import upload_file
 MODEL = "gpt-4.1-mini"
@@ -39,7 +39,7 @@ OPENAI_API_KEYS = get_env_list("OPENAI_API_KEYS", "")
 NUM_THREADS = int(os.getenv("NUM_THREADS", "4"))
 IP_SALT = os.getenv("IP_SALT", "latamgpt-default-salt-2025")
 HF_TOKEN = os.getenv("HF_TOKEN")
-DATASET_REPO = os.getenv("DATASET_REPO", "ouhenio/latamgpt-conversations")
 def exception_handler(exception_type, exception, traceback):
     print(f"{exception_type.__name__}: {exception}")
@@ -75,13 +75,25 @@ def upload_to_dataset(log_data):
     if not HF_TOKEN:
         return
     try:
         with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f:
             f.write(json.dumps(log_data) + '\n')
             temp_path = f.name
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
-        filename = f"conversations/conv_{timestamp}.jsonl"
         upload_file(
             path_or_fileobj=temp_path,
             path_in_repo=filename,
@@ -95,7 +107,43 @@ def upload_to_dataset(log_data):
     except Exception as e:
         print(f"Upload failed: {e}")
 def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request):
     if not OPENAI_API_KEYS or not OPENAI_API_KEYS[0]:
         yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "No API keys configured", gr.update(interactive=True), gr.update(interactive=True)
         return
@@ -116,6 +164,8 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
     messages.append({"role": "user", "content": inputs})
     chat_counter += 1
     history.append(inputs)
     token_counter = 0
@@ -143,11 +193,10 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
                 token_counter += 1
                 yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=False), gr.update(interactive=False)
-        # Re-enable inputs after streaming completes
         yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=True), gr.update(interactive=True)
     except Exception as e:
-        print(f'OpenAI API error: {e}')
         yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, str(e), gr.update(interactive=True), gr.update(interactive=True)
     log_data = {
@@ -158,6 +207,7 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
         "country_code": geo_info["country_code"],
         "region": geo_info["region"],
         "chat_counter": chat_counter,
         "model": MODEL,
         "messages": messages,
         "response": partial_words,
@@ -173,9 +223,9 @@ def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:
 def reset_textbox():
     return gr.update(value='', interactive=False), gr.update(interactive=False)
-title = """<h1 align="center">LatamGPT Data Collection: Research Preview</h1>"""
 if DISABLED:
-    title = """<h1 align="center" style="color:red">This app has reached usage limit. Please check back tomorrow.</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 ```
@@ -194,18 +244,18 @@ with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;}
     with gr.Column(elem_id="col_container", visible=False) as main_block:
         chatbot = gr.Chatbot(elem_id='chatbot')
-        inputs = gr.Textbox(placeholder="¡Hola! ¿En qué puedo ayudarte?", label="Escribe tu mensaje y presiona Enter")
         state = gr.State([])
         with gr.Row():
             with gr.Column(scale=7):
                 b1 = gr.Button(visible=not DISABLED)
             with gr.Column(scale=3):
-                server_status_code = gr.Textbox(label="Status code from server")
-        with gr.Accordion("Parameters", open=False):
-            top_p = gr.Slider(minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)")
-            temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, interactive=True, label="Temperature")
             chat_counter = gr.Number(value=0, visible=False, precision=0)
     with gr.Column(elem_id="user_consent_container") as user_consent_block:
@@ -217,8 +267,9 @@ with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;}
             <div>
                 <p>Al usar nuestra aplicación, que funciona con la API de OpenAI, reconoces y aceptas los siguientes términos sobre los datos que proporcionas:</p>
                 <ol>
-                    <li><strong>Recolección:</strong> Podemos recopilar información, incluyendo las entradas que escribes en nuestra aplicación, las salidas generadas por la API de OpenAI, y ciertos detalles técnicos sobre tu dispositivo y conexión (como tipo de navegador, sistema operativo e dirección IP) proporcionados por los headers de solicitud de tu dispositivo.</li>
-                    <li><strong>Uso:</strong> Podemos usar los datos recopilados para propósitos de investigación, para mejorar nuestros servicios, y para desarrollar nuevos productos o servicios, incluyendo aplicaciones comerciales, y para propósitos de seguridad, como proteger contra acceso no autorizado y ataques.</li>
                     <li><strong>Compartición y Publicación:</strong> Tus datos, incluyendo los detalles técnicos recopilados de los headers de solicitud de tu dispositivo, pueden ser publicados, compartidos con terceros, o usados para análisis y propósitos de reportes.</li>
                     <li><strong>Retención de Datos:</strong> Podemos retener tus datos, incluyendo los detalles técnicos recopilados de los headers de solicitud de tu dispositivo, por el tiempo que sea necesario.</li>
                 </ol>

 import tempfile
 from datetime import datetime
 from openai import OpenAI
+from huggingface_hub import upload_file, list_repo_files, create_repo
 MODEL = "gpt-4.1-mini"
 NUM_THREADS = int(os.getenv("NUM_THREADS", "4"))
 IP_SALT = os.getenv("IP_SALT", "latamgpt-default-salt-2025")
 HF_TOKEN = os.getenv("HF_TOKEN")
+DATASET_REPO = os.getenv("DATASET_REPO", "latamgpt/copuchat-conversations")
 def exception_handler(exception_type, exception, traceback):
     print(f"{exception_type.__name__}: {exception}")
     if not HF_TOKEN:
         return
     try:
+        try:
+            create_repo(
+                repo_id=DATASET_REPO,
+                repo_type="dataset",
+                private=True,
+                exist_ok=True,
+                token=HF_TOKEN
+            )
+        except:
+            pass
+        # Create individual files with timestamp to avoid conflicts
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
+        filename = f"raw/{timestamp}.jsonl"
         with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f:
             f.write(json.dumps(log_data) + '\n')
             temp_path = f.name
         upload_file(
             path_or_fileobj=temp_path,
             path_in_repo=filename,
     except Exception as e:
         print(f"Upload failed: {e}")
+def get_global_chat_counter():
+    if not HF_TOKEN:
+        return 1
+    try:
+        files = list_repo_files(repo_id=DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
+        conversation_files = [f for f in files if f.startswith("conversations/") and f.endswith(".jsonl")]
+        if not conversation_files:
+            return 1
+        max_counter = 0
+        for file in conversation_files[-10:]:  # Check last 10 files for efficiency
+            try:
+                from huggingface_hub import hf_hub_download
+                local_file = hf_hub_download(
+                    repo_id=DATASET_REPO,
+                    repo_type="dataset",
+                    filename=file,
+                    token=HF_TOKEN
+                )
+                with open(local_file, 'r') as f:
+                    data = json.load(f)
+                    counter = data.get('chat_counter', 0)
+                    max_counter = max(max_counter, counter)
+            except:
+                continue
+        return max_counter + 1
+    except:
+        return 1
+GLOBAL_CHAT_COUNTER = get_global_chat_counter()
+print(f"Starting global chat counter at: {GLOBAL_CHAT_COUNTER}")
 def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request):
+    global GLOBAL_CHAT_COUNTER
     if not OPENAI_API_KEYS or not OPENAI_API_KEYS[0]:
         yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "No API keys configured", gr.update(interactive=True), gr.update(interactive=True)
         return
     messages.append({"role": "user", "content": inputs})
+    GLOBAL_CHAT_COUNTER += 1
+    global_counter = GLOBAL_CHAT_COUNTER
     chat_counter += 1
     history.append(inputs)
     token_counter = 0
                 token_counter += 1
                 yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=False), gr.update(interactive=False)
         yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200", gr.update(interactive=True), gr.update(interactive=True)
     except Exception as e:
+        print(f'Error API OpenAI: {e}')
         yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, str(e), gr.update(interactive=True), gr.update(interactive=True)
     log_data = {
         "country_code": geo_info["country_code"],
         "region": geo_info["region"],
         "chat_counter": chat_counter,
+        "global_chat_counter": global_counter,
         "model": MODEL,
         "messages": messages,
         "response": partial_words,
 def reset_textbox():
     return gr.update(value='', interactive=False), gr.update(interactive=False)
+title = """<h1 align="center">Copuchat: Recolección de datos para LatamGPT</h1>"""
 if DISABLED:
+    title = """<h1 align="center" style="color:red">Esta app alcanzó su límite de uso. Porfavor intenta reingresar mañana.</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 ```
     with gr.Column(elem_id="col_container", visible=False) as main_block:
         chatbot = gr.Chatbot(elem_id='chatbot')
+        inputs = gr.Textbox(placeholder="", label="Escribe tu mensaje y presiona Enter")
         state = gr.State([])
         with gr.Row():
             with gr.Column(scale=7):
                 b1 = gr.Button(visible=not DISABLED)
             with gr.Column(scale=3):
+                server_status_code = gr.Textbox(label="Código de estado del servidor")
+        with gr.Accordion("Parámetros", open=False):
+            top_p = gr.Slider(minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (muestreo de núcleo)")
+            temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, interactive=True, label="Temperatura")
             chat_counter = gr.Number(value=0, visible=False, precision=0)
     with gr.Column(elem_id="user_consent_container") as user_consent_block:
             <div>
                 <p>Al usar nuestra aplicación, que funciona con la API de OpenAI, reconoces y aceptas los siguientes términos sobre los datos que proporcionas:</p>
                 <ol>
+                    <li><strong>Recolección:</strong> Podemos recopilar información, incluyendo las entradas que escribes en nuestra aplicación, las salidas generadas por la API de OpenAI, y ciertos detalles técnicos sobre tu dispositivo y conexión (como tipo de navegador, sistema operativo y ubicación geográfica) proporcionados por los headers de solicitud de tu dispositivo.</li>
+                    <li><strong>Uso:</strong> Podemos usar los datos recopilados para propósitos de investigación y desarrollo de LatamGPT.</li>
+                    <li><strong>Uso:</strong> Tus datos pasaran por un proceso de anonimización para evitar la recolección de información privada.</li>
                     <li><strong>Compartición y Publicación:</strong> Tus datos, incluyendo los detalles técnicos recopilados de los headers de solicitud de tu dispositivo, pueden ser publicados, compartidos con terceros, o usados para análisis y propósitos de reportes.</li>
                     <li><strong>Retención de Datos:</strong> Podemos retener tus datos, incluyendo los detalles técnicos recopilados de los headers de solicitud de tu dispositivo, por el tiempo que sea necesario.</li>
                 </ol>