import numpy as np
import pickle
import pandas as pd
import requests
from selenium import webdriver
import matplotlib.pyplot as plt
#Simple assignment
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import requests 
import os
import seaborn as sns
from collections import Counter
import plotly.express as px
import streamlit as st


### Scrap the cosmic id information
# ### FRAMEWORKS NEEDED

def scrap():
            #### Setting options to the driver
            options = webdriver.FirefoxOptions()
            options.add_argument('--headless')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-dev-shm-usage')
            options.capabilities
            ### Setting options of webdriver
            # a) Setting the chromedriver
            browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
            ### Functions and execution to run the scrapping


            def getinfofromtable(oddrows:list,score:float,headertable)->list:
                    rows = []
                    for row in oddrows:
                        cols = []
                        for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
                            if  i==headertable.index( 'Primary Tissue') or  i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
                                cols.append(col.text)
                        cols.append(score)
                        rows.append(cols)
                    return rows            
            def getinfocosmic(mutationid):
                    import time
                    search = browser.find_element_by_id('search-field')
                    search = search.find_element_by_class_name("text_def")
                    search.send_keys(mutationid)
                    search.send_keys(Keys.RETURN)
                    time.sleep(5)
                    try:
                        container = browser.find_element_by_id("section-list")
                    
                    except NoSuchElementException:
                        return []
                    
                    try:

                        subq1 = container.text[container.text.find("score")+len("score"):]
                        score = float(subq1[:subq1.find(")")].strip())
                    except ValueError:
                        score = 0 
                    

                    section = browser.find_element_by_id("DataTables_Table_0")


                    headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]

                    oddrows = section.find_elements_by_class_name("odd")
                    evenrows = section.find_elements_by_class_name("even")

                    l1 = getinfofromtable(oddrows,score,headertable)
                    l1.extend(getinfofromtable(evenrows,score,headertable))
                    
                    # browser.close()
                    return l1
                    ## Looking for cosmic id info
                    cosl = []
                    browser.get("https://cancer.sanger.ac.uk/cosmic")
                    for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
                            if cos.find(",")!=-1:
                                    cos = cos.split(",")[0]

                            cosl.append(getinfocosmic(cos))
                            browser.get("https://cancer.sanger.ac.uk/cosmic")
### Pieplots
def pieplot(merging,id=0):
    genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
    if id==0:
        gtype = genecount[genecount.UV_exposure_tissue=="Intermittently-photoexposed"]
    if id ==1 :
        gtype = genecount[genecount.UV_exposure_tissue=="Chronically-photoexposed"]
    else:
        gtype = genecount

    gtype = gtype.groupby("gene_name").count()["sampleID"].reset_index()
    gtype.sort_values(by="sampleID",ascending=False,inplace=True)
    #define Seaborn color palette to use
    colors = sns.color_palette('pastel')[0:len(gtype)]
    #create pie chart
    # plt.suptitle("Gene Occuring for different genes")
    plt.pie(gtype.sampleID, labels =gtype.gene_name, colors = colors, autopct='%.0f%%',radius=2,textprops={"fontsize":9})
    plt.show()

### Depending on what result you want you return one or another
def filterp4(dfgenes,id=0):
    if id==0 or id==1:

        if id==0:
            chexposed=  dfgenes[dfgenes.UV_exposure_tissue=="Intermittently-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
        if id==1:
            chexposed=  dfgenes[dfgenes.UV_exposure_tissue=="Chronically-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
        return px.bar(chexposed,x="gene_name",y="mean_mut",error_y="std")
    if id==2:
        return px.bar(dfgenes,x="gene_name",y="mean_mut",color="UV_exposure_tissue",barmode='group',error_y="std")

### Read scrapping done with cosmic ids
def read_scrap()->list:
    with open('my_pickle_file.pickle', 'rb') as f :
        cosbase = pickle.load(f)
    return cosbase
### GendfClean
def gendfclean(cosbase,cid)->pd.DataFrame:
        dfd = {"tissue": None , "histology": None,"zygosity": None, "score": None }
        for i,key in enumerate(list(dfd.keys())):
            dfd[key] = list(map(lambda x : np.array(x)[:,i].tolist() if x!=[] else [] ,cosbase))

        dfd["cosmic_id"] = cid.tolist()
        cosmicdb = pd.DataFrame(dfd)
        cosmicdb = cosmicdb[(cosmicdb['tissue'].map(lambda d: len(d)) > 0) & (cosmicdb['histology'].map(lambda d: len(d)) > 0) & (cosmicdb['zygosity'].map(lambda d: len(d)) > 0) & (cosmicdb['score'].map(lambda d: len(d)) > 0) ]

        cosmicdb["score"] = cosmicdb.score.apply(lambda x: float(x[0]))

        return cosmicdb

### Look for stats of a gene
def inputgene(lookforgene,merging,id =0)->dict:
        ### id = 0--> Intermittently exposed
        ### id = 1--> Continuously exposed
        genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
        tgene = genecount[genecount.gene_name==lookforgene]
        if id==0:
            ph_gene = tgene[tgene.UV_exposure_tissue=='Intermittently-photoexposed']
        else:
            ph_gene = tgene[tgene.UV_exposure_tissue=="Chronically-photoexposed"]
        ### Statistiacs about gene|samples 
        stats = ph_gene.chr.describe()
        dc = dict(stats)
        dc["gene_name"] = lookforgene
        if id==0:
            dc["UV_exposure_tissue"] = 'Intermittently-photoexposed'
        else:
            dc["UV_exposure_tissue"] = 'Chronically-photoexposed'
        return  dc
### Look for stats of all genes
def gene_exposed(merging,id=0):
    return pd.DataFrame(list(map(lambda gene: inputgene(gene,merging,id),merging.gene_name.unique())))
### Merge stats for continuous and intermittently exposed
def mergecontintinfo(merging):
        ### Continuously Exposed 
        cont_exposed_info = gene_exposed(merging,1)
        ### Intermittently Exposed
        int_exposed_info = gene_exposed(merging,0)
        return pd.concat([cont_exposed_info,int_exposed_info],axis=0)

#### Common tissues, zygosities and histologies
def explodecommon(bd,N,col):
        return  Counter(bd[col].apply(lambda x: list(x.keys())).explode()).most_common(N)
def pdcommon(db,col,uv:str)->pd.DataFrame:
        df = pd.DataFrame(db).rename(columns={0:col,1:"Times_{}".format(col)})
        df["UV_exposure_tissue"] = uv
        return df
def get_N_common(df,col,N=10)->pd.DataFrame:
        cosm = df.copy(True)
        cosm[col] = cosm[col].apply(lambda x: Counter(x)) 
        intcosm = cosm[cosm.UV_exposure_tissue=="Intermittently-photoexposed"]
        contcosm = cosm[cosm.UV_exposure_tissue=="Chronically-photoexposed"]

        infotissues = explodecommon(cosm,N,col)
        inttissues = explodecommon(intcosm,N,col)
        contissues = explodecommon(contcosm,N,col)

        df1 = pdcommon(infotissues,col,"Total")
        df2 = pdcommon(inttissues,col,"Intermittently-photoexposed")
        df3 = pdcommon(contissues,col,"Chronically-photoexposed")
        return pd.concat([df1,df2,df3],axis=0)

### Deatiled information of mutation type
def mut_type(x):
    if x.mut_type=="Indel":
        
            if len(x.ref)>len(x.mut):
                    return "Del"
            elif len(x.mut)>len(x.ref):
                    return "In"
        #     if len(x.ref)>1 and len(x.mut)>1:
            
            return x.ref+">"+x.mut
    return x.mut_type


def distribution_gene(df,hue):
    
    
    plot4 = df.groupby([hue,"mut_type_cus"]).count().reset_index().iloc[:,:3]
    plot4 = plot4.rename(columns={"sampleID":"n_mut"})
    plot4 = plot4.sort_values(by="mut_type_cus",ascending=True)
    fig = px.bar(plot4,x="mut_type_cus",y="n_mut",color=hue,barmode="group")
    return fig