Spaces:

jzou19950715
/

Lossdog_Data_Science_Expert

Sleeping

App Files Files Community

jzou19950715 commited on Jan 17

Commit

6a8cba0

verified ·

1 Parent(s): 1b95733

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -329

app.py DELETED Viewed

@@ -1,329 +0,0 @@
-import os
-import sys
-import subprocess
-import requests
-import gradio as gr
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-import numpy as np
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LogisticRegression
-from sklearn.preprocessing import LabelEncoder
-# --------------------------------------------------------------------------------
-# OPTIONAL: dynamic installation for rarely used packages not in requirements.txt
-# --------------------------------------------------------------------------------
-def install_library(library):
-    """
-    Install a library using pip.
-    Useful for rarely used packages NOT in requirements.txt.
-    """
-    try:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", library])
-        return f"Successfully installed {library}."
-    except Exception as e:
-        return f"Error installing {library}: {str(e)}"
-def dynamic_import(library, alias=None):
-    """
-    Dynamically import a library. If not found, try to install it, then import again.
-    """
-    try:
-        if alias:
-            globals()[alias] = __import__(library)
-        else:
-            globals()[library] = __import__(library)
-    except ImportError:
-        install_msg = install_library(library)
-        print(install_msg)
-        globals()[library] = __import__(library)
-# --------------------------------------------------------------------------------
-# LLM CALLS: GPT-4o-mini, OpenAI, DeepSeek, Gemini
-# --------------------------------------------------------------------------------
-import openai
-from huggingface_hub import InferenceClient
-def call_gpt4o_mini(api_key, user_prompt):
-    """
-    Calls a GPT-4o-mini model hosted on Hugging Face.
-    Replace 'someUser/gpt-4o-mini' with your actual model repo.
-    """
-    if not api_key:
-        return "No Hugging Face API key provided. Cannot call GPT-4o-mini."
-    try:
-        client = InferenceClient(
-            repo_id="someUser/gpt-4o-mini",  # <--- Replace with your real GPT-4o-mini repo
-            token=api_key
-        )
-        # We use text_generation endpoint; adapt if your model differs
-        response = client.text_generation(user_prompt, max_new_tokens=128)
-        # 'response' can be a string or dict depending on the endpoint. Assume it's a string:
-        return response
-    except Exception as e:
-        return f"Error calling GPT-4o-mini: {str(e)}"
-def call_openai(api_key, user_prompt):
-    """Calls OpenAI's API (example usage)."""
-    openai.api_key = api_key
-    try:
-        response = openai.Completion.create(
-            model="text-davinci-003",
-            prompt=user_prompt,
-            max_tokens=128
-        )
-        return response["choices"][0]["text"].strip()
-    except Exception as e:
-        return f"OpenAI Error: {str(e)}"
-def call_deepseek(api_key, user_prompt):
-    """
-    Hypothetical function to call a DeepSeek API endpoint.
-    Replace with real DeepSeek logic as needed.
-    """
-    try:
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {api_key}"
-        }
-        payload = {
-            "prompt": user_prompt,
-            "max_tokens": 128
-        }
-        # Example POST; adapt to the real DeepSeek endpoint
-        response = requests.post(
-            "https://api.deepseek.ai/v1/chat",
-            json=payload,
-            headers=headers
-        )
-        response.raise_for_status()
-        data = response.json()
-        return data["choices"][0]["text"].strip()
-    except Exception as e:
-        return f"DeepSeek Error: {str(e)}"
-def call_gemini(api_key, user_prompt):
-    """
-    Hypothetical function for Gemini LLM.
-    Replace with real Gemini logic.
-    """
-    return "(Gemini usage not yet implemented; placeholder)"
-def call_llm(api_provider, api_key, user_prompt):
-    """Routes calls to the correct LLM provider."""
-    if not api_key:
-        return "No API key provided. Using GPT-4o-mini default is not possible without HF key." if api_provider.lower() == "gpt-4o-mini" else "No API key provided."
-    provider_lower = api_provider.lower()
-    if provider_lower == "gpt-4o-mini":
-        return call_gpt4o_mini(api_key, user_prompt)
-    elif provider_lower == "openai":
-        return call_openai(api_key, user_prompt)
-    elif provider_lower == "deepseek":
-        return call_deepseek(api_key, user_prompt)
-    elif provider_lower == "gemini":
-        return call_gemini(api_key, user_prompt)
-    else:
-        return f"Unknown provider: {api_provider}. Please choose GPT-4o-mini, OpenAI, DeepSeek, or Gemini."
-# --------------------------------------------------------------------------------
-# ADVANCED DATA ANALYSIS (extended_analysis)
-# --------------------------------------------------------------------------------
-def extended_analysis(df):
-    """
-    Sample advanced analysis:
-      1. Correlation heatmap for numeric columns
-      2. Bar plot of 'Career' (if present)
-      3. Simple logistic regression classification if 'Career' is suitable
-    """
-    output_paths = []
-    numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
-    cat_cols = df.select_dtypes(exclude=["number"]).columns.tolist()
-    # 1) Correlation Heatmap
-    if len(numeric_cols) > 1:
-        corr = df[numeric_cols].corr()
-        plt.figure(figsize=(8, 6))
-        sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
-        plt.title("Correlation Heatmap")
-        heatmap_path = "heatmap.png"
-        plt.savefig(heatmap_path)
-        plt.close()
-        output_paths.append(heatmap_path)
-    # 2) Bar Plot of 'Career' if present
-    if "Career" in df.columns:
-        plt.figure(figsize=(8, 5))
-        df["Career"].value_counts().plot(kind="bar")
-        plt.title("Count of Each Career")
-        plt.xlabel("Career")
-        plt.ylabel("Count")
-        barplot_path = "barplot_career.png"
-        plt.savefig(barplot_path)
-        plt.close()
-        output_paths.append(barplot_path)
-    # 3) Simple Logistic Regression if 'Career' exists with multiple categories
-    if "Career" in df.columns and len(numeric_cols) > 0:
-        le = LabelEncoder()
-        df["Career_encoded"] = le.fit_transform(df["Career"])
-        X = df[numeric_cols].fillna(0)
-        y = df["Career_encoded"]
-        if len(np.unique(y)) > 1:
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-            model = LogisticRegression(max_iter=1000)
-            model.fit(X_train, y_train)
-            score = model.score(X_test, y_test)
-            accuracy_info = f"Logistic Regression accuracy on test set: {score:.2f}"
-        else:
-            accuracy_info = "Career column has only one class; no classification performed."
-    else:
-        accuracy_info = "No 'Career' column or insufficient numeric data for classification."
-    return output_paths, accuracy_info
-# --------------------------------------------------------------------------------
-# MAIN ANALYSIS AND VISUALIZATION FUNCTION
-# --------------------------------------------------------------------------------
-def analyze_and_visualize(
-    file,
-    message,
-    history,
-    api_provider,
-    api_key
-):
-    """
-    Loads CSV, gives a summary, calls LLM for suggestions if an API key is provided,
-    does extended analysis if user requests ("sample analysis", "extended analysis", etc.),
-    and returns results/plots in the chatbot.
-    """
-    try:
-        # Load CSV
-        df = pd.read_csv(file.name)
-        numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
-        categorical_cols = df.select_dtypes(exclude=["number"]).columns.tolist()
-        # Basic info
-        summary = (
-            f"**File**: {file.name}\n"
-            f"**Shape**: {df.shape[0]} rows, {df.shape[1]} columns\n"
-            f"**Numerical Columns**: {', '.join(numeric_cols) if numeric_cols else 'None'}\n"
-            f"**Categorical Columns**: {', '.join(categorical_cols) if categorical_cols else 'None'}\n"
-        )
-        # LLM suggestions
-        llm_suggestions = ""
-        if api_key:
-            user_prompt = (
-                f"Data Summary:\n{summary}\n\n"
-                f"User question or request: {message}\n"
-                f"Suggest advanced data analysis or steps if relevant."
-            )
-            llm_response = call_llm(api_provider, api_key, user_prompt)
-            llm_suggestions = f"\n**LLM Suggestions**:\n{llm_response}\n"
-        else:
-            llm_suggestions = "\n(No LLM suggestions because no API key provided.)\n"
-        # Always produce example histogram if there's at least one numeric column
-        hist_path = None
-        if numeric_cols:
-            plt.figure(figsize=(6, 4))
-            sns.histplot(df[numeric_cols[0]], kde=True)
-            plt.title(f"Distribution of '{numeric_cols[0]}'")
-            plt.tight_layout()
-            hist_path = "temp_plot.png"
-            plt.savefig(hist_path)
-            plt.close()
-        # Check if the user wants extended analysis
-        trigger_phrases = ["sample analysis", "extended analysis", "advanced analysis", "run analysis"]
-        analysis_paths = []
-        accuracy_info = ""
-        if any(phrase in message.lower() for phrase in trigger_phrases):
-            analysis_paths, accuracy_info = extended_analysis(df)
-        # Build final response text
-        response_text = summary + llm_suggestions
-        if accuracy_info:
-            response_text += f"\n**ML Model Info**: {accuracy_info}\n"
-        # Construct the final chatbot content
-        chat_content = [(message, response_text)]
-        if hist_path:
-            chat_content.append((None, (hist_path,)))
-        for path in analysis_paths:
-            chat_content.append((None, (path,)))
-        return history + chat_content
-    except Exception as e:
-        return history + [(message, f"Error: {str(e)}")]
-# --------------------------------------------------------------------------------
-# CREATING THE GRADIO APP
-# --------------------------------------------------------------------------------
-def create_demo():
-    with gr.Blocks() as demo:
-        gr.Markdown("# 🤖 GPT-4o-mini (Default) + Multi-Provider AI Data Analysis Assistant")
-        gr.Markdown(
-            """
-            **Features**:
-            - Default LLM: GPT-4o-mini on Hugging Face (requires HF API key).
-            - Other providers: **OpenAI**, **DeepSeek**, **Gemini** (enter their respective API keys).
-            - Upload CSV for data summary & histograms.
-            - Type "sample analysis" or "extended analysis" to trigger correlation heatmaps, bar plots, and a simple logistic regression.
-            """
-        )
-        with gr.Row():
-            api_provider = gr.Dropdown(
-                choices=["GPT-4o-mini", "OpenAI", "DeepSeek", "Gemini"],
-                value="GPT-4o-mini",  # default
-                label="LLM Provider",
-            )
-            api_key = gr.Textbox(
-                label="LLM API Key",
-                placeholder="Enter your Hugging Face/DeepSeek/OpenAI/Gemini API key here..."
-            )
-        file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
-        chatbot = gr.Chatbot(label="Analysis Output")
-        msg = gr.Textbox(
-            label="Message",
-            placeholder="Ask the AI or type 'sample analysis' for extended analysis..."
-        )
-        send_btn = gr.Button("Send")
-        reset_btn = gr.Button("Reset Chat")
-        def reset_chat():
-            return []
-        msg.submit(
-            fn=lambda f, m, h, p, k: analyze_and_visualize(f, m, h or [], p, k),
-            inputs=[file_input, msg, chatbot, api_provider, api_key],
-            outputs=[chatbot]
-        ).then(lambda: "", None, [msg])
-        send_btn.click(
-            fn=lambda f, m, h, p, k: analyze_and_visualize(f, m, h or [], p, k),
-            inputs=[file_input, msg, chatbot, api_provider, api_key],
-            outputs=[chatbot]
-        ).then(lambda: "", None, [msg])
-        reset_btn.click(fn=reset_chat, inputs=[], outputs=[chatbot])
-        demo.queue()
-        return demo
-demo = create_demo()
-if __name__ == "__main__":
-    demo.launch(share=True)