import os
import subprocess
import signal
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr
import tempfile
import torch
import requests

from huggingface_hub import HfApi, ModelCard, whoami
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from pathlib import Path
from textwrap import dedent


###########

import subprocess
import threading
from queue import Queue, Empty

def stream_output(pipe, queue):
    """Read output from pipe and put it in the queue."""
    for line in iter(pipe.readline, b''):
        queue.put(line.decode('utf-8').rstrip())
    pipe.close()

def run_command(command, env_vars):
    # Create process with pipes for stdout and stderr
    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        #bufsize=1,
        universal_newlines=False,
        env=env_vars,
    )
    
    # Create queues to store output
    stdout_queue = Queue()
    stderr_queue = Queue()
    
    # Create and start threads to read output
    stdout_thread = threading.Thread(target=stream_output, args=(process.stdout, stdout_queue))
    stderr_thread = threading.Thread(target=stream_output, args=(process.stderr, stderr_queue))
    stdout_thread.daemon = True
    stderr_thread.daemon = True
    stdout_thread.start()
    stderr_thread.start()

    output_stdout = ""
    output_stderr = ""
    # Monitor output in real-time
    while process.poll() is None:
        # Check stdout
        try:
            stdout_line = stdout_queue.get_nowait()
            print(f"STDOUT: {stdout_line}")
            output_stdout += stdout_line + "\n"
        except Empty:
            pass
            
        # Check stderr
        try:
            stderr_line = stderr_queue.get_nowait()
            print(f"STDERR: {stderr_line}")
            output_stderr += stderr_line + "\n"
        except Empty:
            pass
    
    # Get remaining lines
    stdout_thread.join()
    stderr_thread.join()
    
    return (process.returncode, output_stdout, output_stderr)

###########

def guess_base_model(ft_model_id):
    res = requests.get(f"https://huggingface.co/api/models/{ft_model_id}")
    res = res.json()
    for tag in res["tags"]:
        if tag.startswith("base_model:"):
            return tag.split(":")[-1]
    raise Exception("Cannot guess the base model, please enter it manually")


def process_model(ft_model_id: str, base_model_id: str, rank: str, private_repo, oauth_token: gr.OAuthToken | None):
    # validate the oauth token
    try:
        whoami(oauth_token.token)
    except Exception as e:
        raise gr.Error("You must be logged in")

    model_name = ft_model_id.split('/')[-1]

    # validate the oauth token
    whoami(oauth_token.token)

    if not os.path.exists("outputs"):
        os.makedirs("outputs")

    try:
        api = HfApi(token=oauth_token.token)

        if not base_model_id:
            base_model_id = guess_base_model(ft_model_id)
            print("guess_base_model", base_model_id)
        
        with tempfile.TemporaryDirectory(dir="outputs") as outputdir:
            device = "cuda" if torch.cuda.is_available() else "cpu"
            cmd = [
                "mergekit-extract-lora",
                ft_model_id,
                base_model_id,
                outputdir,
                f"--rank={rank}",
                f"--device={device}"
            ]
            print("cmd", cmd)
            env_vars = dict(os.environ, HF_TOKEN=oauth_token.token)
            returncode, output_stdout, output_stderr = run_command(cmd, env_vars)
            print("returncode", returncode)
            print("output_stdout", output_stdout)
            print("output_stderr", output_stderr)
            if returncode != 0:
                raise Exception(f"Error converting to LoRA PEFT {output_stderr}")
            print("Model converted to LoRA PEFT successfully!")
            print(f"Converted model path: {outputdir}")

            # Check output dir
            if not os.listdir(outputdir):
                raise Exception("Output directory is empty!")

            # Create repo
            username = whoami(oauth_token.token)["name"]
            new_repo_url = api.create_repo(repo_id=f"{username}/LoRA-{model_name}", exist_ok=True, private=private_repo)
            new_repo_id = new_repo_url.repo_id
            print("Repo created successfully!", new_repo_url)

            # Upload files
            api.upload_folder(
                folder_path=outputdir,
                path_in_repo="",
                repo_id=new_repo_id,
            )
            print("Uploaded", outputdir)

        return (
            f'<h1>✅ DONE</h1><br/><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>'
        )
    except Exception as e:
        return (f"<h1>❌ ERROR</h1><br/><br/>{e}")


css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo: 
    gr.Markdown("You must be logged in.")
    gr.LoginButton(min_width=250)

    ft_model_id = HuggingfaceHubSearch(
        label="Fine tuned model repository",
        placeholder="Fine tuned model",
        search_type="model",
    )

    base_model_id = HuggingfaceHubSearch(
        label="Base model repository (optional)",
        placeholder="If empty, it will be guessed from repo tags",
        search_type="model",
    )

    rank = gr.Dropdown(
        ["16", "32", "64", "128"],
        label="LoRA rank",
        info="Higher the rank, better the result, but heavier the adapter",
        value="32",
        filterable=False,
        visible=True
    )

    private_repo = gr.Checkbox(
        value=False,
        label="Private Repo",
        info="Create a private repo under your username."
    )

    iface = gr.Interface(
        fn=process_model,
        inputs=[
            ft_model_id,
            base_model_id,
            rank,
            private_repo,
        ],
        outputs=[
            gr.Markdown(label="output"),
        ],
        title="Convert fine tuned model into LoRA with mergekit-extract-lora",
        description="The space takes a fine tuned model, a base model, then make a PEFT-compatible LoRA adapter based on the difference between 2 models.<br/><br/>NOTE: Each conversion takes about <b>5 to 20 minutes</b>, depending on how big the model is.",
        api_name=False
    )

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)