Spaces:
Runtime error
Runtime error
LC
Browse files
app.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
import numpy as np
|
2 |
import pickle
|
3 |
import pandas as pd
|
4 |
-
import requests
|
5 |
-
from selenium import webdriver
|
6 |
import matplotlib.pyplot as plt
|
7 |
#Simple assignment
|
8 |
-
from selenium.webdriver import Firefox
|
9 |
-
from selenium.webdriver.common.keys import Keys
|
10 |
from selenium.common.exceptions import NoSuchElementException
|
11 |
import requests
|
12 |
import os
|
@@ -20,73 +20,73 @@ import streamlit as st
|
|
20 |
### Scrap the cosmic id information
|
21 |
# ### FRAMEWORKS NEEDED
|
22 |
|
23 |
-
def scrap():
|
24 |
-
#### Setting options to the driver
|
25 |
-
options = webdriver.FirefoxOptions()
|
26 |
-
options.add_argument('--headless')
|
27 |
-
options.add_argument('--no-sandbox')
|
28 |
-
options.add_argument('--disable-dev-shm-usage')
|
29 |
-
options.capabilities
|
30 |
-
### Setting options of webdriver
|
31 |
-
# a) Setting the chromedriver
|
32 |
-
browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
|
33 |
-
### Functions and execution to run the scrapping
|
34 |
-
|
35 |
-
|
36 |
-
def getinfofromtable(oddrows:list,score:float,headertable)->list:
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
def getinfocosmic(mutationid):
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
|
66 |
|
67 |
|
68 |
-
|
69 |
|
70 |
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
|
76 |
-
|
77 |
-
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
### Pieplots
|
91 |
def pieplot(merging,id=0):
|
92 |
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|
|
|
1 |
import numpy as np
|
2 |
import pickle
|
3 |
import pandas as pd
|
4 |
+
# import requests
|
5 |
+
# from selenium import webdriver
|
6 |
import matplotlib.pyplot as plt
|
7 |
#Simple assignment
|
8 |
+
# from selenium.webdriver import Firefox
|
9 |
+
# from selenium.webdriver.common.keys import Keys
|
10 |
from selenium.common.exceptions import NoSuchElementException
|
11 |
import requests
|
12 |
import os
|
|
|
20 |
### Scrap the cosmic id information
|
21 |
# ### FRAMEWORKS NEEDED
|
22 |
|
23 |
+
# def scrap():
|
24 |
+
# #### Setting options to the driver
|
25 |
+
# options = webdriver.FirefoxOptions()
|
26 |
+
# options.add_argument('--headless')
|
27 |
+
# options.add_argument('--no-sandbox')
|
28 |
+
# options.add_argument('--disable-dev-shm-usage')
|
29 |
+
# options.capabilities
|
30 |
+
# ### Setting options of webdriver
|
31 |
+
# # a) Setting the chromedriver
|
32 |
+
# browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
|
33 |
+
# ### Functions and execution to run the scrapping
|
34 |
+
|
35 |
+
|
36 |
+
# def getinfofromtable(oddrows:list,score:float,headertable)->list:
|
37 |
+
# rows = []
|
38 |
+
# for row in oddrows:
|
39 |
+
# cols = []
|
40 |
+
# for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
|
41 |
+
# if i==headertable.index( 'Primary Tissue') or i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
|
42 |
+
# cols.append(col.text)
|
43 |
+
# cols.append(score)
|
44 |
+
# rows.append(cols)
|
45 |
+
# return rows
|
46 |
+
# def getinfocosmic(mutationid):
|
47 |
+
# import time
|
48 |
+
# search = browser.find_element_by_id('search-field')
|
49 |
+
# search = search.find_element_by_class_name("text_def")
|
50 |
+
# search.send_keys(mutationid)
|
51 |
+
# search.send_keys(Keys.RETURN)
|
52 |
+
# time.sleep(5)
|
53 |
+
# try:
|
54 |
+
# container = browser.find_element_by_id("section-list")
|
55 |
|
56 |
+
# except NoSuchElementException:
|
57 |
+
# return []
|
58 |
|
59 |
+
# try:
|
60 |
|
61 |
+
# subq1 = container.text[container.text.find("score")+len("score"):]
|
62 |
+
# score = float(subq1[:subq1.find(")")].strip())
|
63 |
+
# except ValueError:
|
64 |
+
# score = 0
|
65 |
|
66 |
|
67 |
|
68 |
+
# section = browser.find_element_by_id("DataTables_Table_0")
|
69 |
|
70 |
|
71 |
+
# headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
|
72 |
|
73 |
+
# oddrows = section.find_elements_by_class_name("odd")
|
74 |
+
# evenrows = section.find_elements_by_class_name("even")
|
75 |
|
76 |
+
# l1 = getinfofromtable(oddrows,score,headertable)
|
77 |
+
# l1.extend(getinfofromtable(evenrows,score,headertable))
|
78 |
|
79 |
+
# # browser.close()
|
80 |
+
# return l1
|
81 |
+
# ## Looking for cosmic id info
|
82 |
+
# cosl = []
|
83 |
+
# browser.get("https://cancer.sanger.ac.uk/cosmic")
|
84 |
+
# for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
|
85 |
+
# if cos.find(",")!=-1:
|
86 |
+
# cos = cos.split(",")[0]
|
87 |
+
|
88 |
+
# cosl.append(getinfocosmic(cos))
|
89 |
+
# browser.get("https://cancer.sanger.ac.uk/cosmic")
|
90 |
### Pieplots
|
91 |
def pieplot(merging,id=0):
|
92 |
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|